如题所说,用winfom分析网站日志并判断是否为正确spider
看过网上的方法是直接查找文本中出现的类似 BaiduSpider 但是并不准确,说是得用 cmd 命令下的 nslookup 进行ip ping服务得出,测试也是正确的,
通过nslookup匹配spider的方式非常慢,有没有更高效的办法呢,求解
还有具体内容如何分割较好呢,要分割成为 :访问时间 IP地址 Http状态 spider 地址
以下是测试内容 ,此日志内容是 CentOS 系统宝塔面板分割出来的一部分,IIS的暂时未得
[HTML] 纯文本查看 复制代码 211.149.164.223 - - [22/Jul/2017:09:00:58 +0800] "GET / HTTP/1.1" 301 5 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +[url=http://www.baidu.com/search/spider.html]http://www.baidu.com/search/spider.html[/url])"
211.149.164.223 - - [22/Jul/2017:09:00:58 +0800] "GET /index.html HTTP/1.1" 200 2915 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +[url=http://www.baidu.com/search/spider.html]http://www.baidu.com/search/spider.html[/url])"
212.83.158.95 - - [22/Jul/2017:09:46:49 +0800] "GET /admin.php HTTP/1.0" 404 2624 "-" "Opera/9.80 (Windows NT 6.1; U; ru) Presto/2.8.131 Version/11.10"
212.83.158.95 - - [22/Jul/2017:09:46:50 +0800] "GET /administrator/index.php HTTP/1.0" 404 2624 "-" "Opera/9.80 (Windows NT 6.1; U; ru) Presto/2.8.131 Version/11.10"
212.83.158.95 - - [22/Jul/2017:09:46:50 +0800] "GET /wp-login.php HTTP/1.0" 404 2624 "-" "Opera/9.80 (Windows NT 6.1; U; ru) Presto/2.8.131 Version/11.10"
119.123.78.82 - - [22/Jul/2017:10:58:48 +0800] "GET /m HTTP/1.1" 301 178 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36"
119.123.78.82 - - [22/Jul/2017:10:58:48 +0800] "GET /m/ HTTP/1.1" 200 79 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36"
119.123.78.82 - - [22/Jul/2017:10:58:48 +0800] "GET /favicon.ico HTTP/1.1" 404 1358 "http://zhengbanxt.win/m/" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36"
180.163.2.118 - - [22/Jul/2017:10:58:58 +0800] "GET /m HTTP/1.1" 301 178 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; InfoPath.3; rv:11.0) like Gecko"
119.123.78.82 - - [22/Jul/2017:10:58:58 +0800] "GET /m/index.php HTTP/1.1" 200 79 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36"
180.163.2.118 - - [22/Jul/2017:10:58:58 +0800] "GET /m/ HTTP/1.1" 200 79 "http://zhengbanxt.win/m" "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; InfoPath.3; rv:11.0) like Gecko"
117.185.27.114 - - [22/Jul/2017:10:59:00 +0800] "GET /m HTTP/1.1" 301 178 "" "Mozilla/5.0 (iPhone; CPU iPhone OS 8_4 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12H143 Safari/600.1.4"
117.185.27.114 - - [22/Jul/2017:10:59:00 +0800] "GET /m/ HTTP/1.1" 200 79 "" "Mozilla/5.0 (iPhone; CPU iPhone OS 8_4 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12H143 Safari/600.1.4"
101.226.99.197 - - [22/Jul/2017:11:07:14 +0800] "GET /m/index.php HTTP/1.1" 200 79 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.124 Safari/537.36"
101.226.66.187 - - [22/Jul/2017:11:07:26 +0800] "GET /m/index.php HTTP/1.1" 200 79 "" "Mozilla/5.0 (iPhone; CPU iPhone OS 8_4 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12H143 Safari/600.1.4"
219.129.216.239 - - [22/Jul/2017:11:44:38 +0800] "GET / HTTP/1.1" 301 5 "-" "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)"
222.186.3.229 - - [22/Jul/2017:11:44:38 +0800] "GET / HTTP/1.1" 301 5 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +[url=http://www.baidu.com/search/spider.html]http://www.baidu.com/search/spider.html[/url])"
222.186.3.229 - - [22/Jul/2017:11:44:38 +0800] "GET /index.html HTTP/1.1" 200 2915 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +[url=http://www.baidu.com/search/spider.html]http://www.baidu.com/search/spider.html[/url])"
49.4.142.15 - - [22/Jul/2017:11:44:52 +0800] "GET / HTTP/1.1" 301 5 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +[url=http://www.baidu.com/search/spider.html]http://www.baidu.com/search/spider.html[/url])"
49.4.142.15 - - [22/Jul/2017:11:44:55 +0800] "GET /index.html HTTP/1.1" 200 2915 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +[url=http://www.baidu.com/search/spider.html]http://www.baidu.com/search/spider.html[/url])"
119.123.78.82 - - [22/Jul/2017:15:46:47 +0800] "GET / HTTP/1.1" 301 5 "-" "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Win64; x64; Trident/4.0; Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1) ; .NET CLR 2.0.50727; SLCC2; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET4.0C; .NET4.0E)"
119.123.78.82 - - [22/Jul/2017:15:46:47 +0800] "GET /index.html HTTP/1.1" 200 8168 "-" "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Win64; x64; Trident/4.0; Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1) ; .NET CLR 2.0.50727; SLCC2; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET4.0C; .NET4.0E)"
119.123.78.82 - - [22/Jul/2017:15:46:57 +0800] "GET /favicon.ico HTTP/1.1" 404 2624 "-" "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Win64; x64; Trident/4.0; Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1) ; .NET CLR 2.0.50727; SLCC2; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET4.0C; .NET4.0E)"
119.123.78.82 - - [22/Jul/2017:15:47:06 +0800] "GET /Win7xitong/ HTTP/1.1" 200 21839 "http://www.zhengbanxt.win/index.html" "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Win64; x64; Trident/4.0; Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1) ; .NET CLR 2.0.50727; SLCC2; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET4.0C; .NET4.0E)"
119.123.78.82 - - [22/Jul/2017:15:47:09 +0800] "GET /favicon.ico HTTP/1.1" 404 2624 "-" "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Win64; x64; Trident/4.0; Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1) ; .NET CLR 2.0.50727; SLCC2; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET4.0C; .NET4.0E)"
119.123.78.82 - - [22/Jul/2017:15:47:21 +0800] "GET /Win8xitong/ HTTP/1.1" 200 24771 "http://www.zhengbanxt.win/Win7xitong/" "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Win64; x64; Trident/4.0; Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1) ; .NET CLR 2.0.50727; SLCC2; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET4.0C; .NET4.0E)"
119.123.78.82 - - [22/Jul/2017:15:47:42 +0800] "GET /Win10xitong/ HTTP/1.1" 200 14697 "http://www.zhengbanxt.win/Win8xitong/" "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Win64; x64; Trident/4.0; Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1) ; .NET CLR 2.0.50727; SLCC2; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET4.0C; .NET4.0E)"
219.129.216.239 - - [22/Jul/2017:16:55:28 +0800] "GET / HTTP/1.1" 301 5 "-" "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)"
183.2.247.134 - - [22/Jul/2017:16:55:29 +0800] "GET / HTTP/1.1" 301 5 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +[url=http://www.baidu.com/search/spider.html]http://www.baidu.com/search/spider.html[/url])"
183.2.247.134 - - [22/Jul/2017:16:55:29 +0800] "GET /index.html HTTP/1.1" 200 2915 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +[url=http://www.baidu.com/search/spider.html]http://www.baidu.com/search/spider.html[/url])"
219.129.216.239 - - [22/Jul/2017:16:55:55 +0800] "GET / HTTP/1.1" 301 5 "-" "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)"
104.131.254.50 - - [22/Jul/2017:18:41:42 +0800] "POST /xmlrpc.php HTTP/1.1" 404 2624 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36"
219.129.216.239 - - [23/Jul/2017:03:26:11 +0800] "GET / HTTP/1.1" 301 5 "-" "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)"
49.4.142.15 - - [23/Jul/2017:03:26:11 +0800] "GET / HTTP/1.1" 301 5 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +[url=http://www.baidu.com/search/spider.html]http://www.baidu.com/search/spider.html[/url])"
49.4.142.15 - - [23/Jul/2017:03:26:12 +0800] "GET /index.html HTTP/1.1" 200 2915 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +[url=http://www.baidu.com/search/spider.html]http://www.baidu.com/search/spider.html[/url])"
52.41.95.148 - - [23/Jul/2017:05:07:31 +0800] "GET / HTTP/1.1" 301 5 "-" "Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0"
52.41.95.148 - - [23/Jul/2017:05:07:32 +0800] "GET /index.html HTTP/1.1" 200 2915 "-" "Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0"
|