http://www.sufeinet.com/plugin.php?id=keke_group

苏飞论坛

 找回密码
 马上注册

QQ登录

只需一步,快速开始

分布式系统框架(V2.0) 轻松承载百亿数据,千万流量!讨论专区 - 源码下载 - 官方教程

HttpHelper爬虫框架(V2.7-含.netcore) HttpHelper官方出品,爬虫框架讨论区 - 源码下载 - 在线测试和代码生成

HttpHelper爬虫类(V2.0) 开源的爬虫类,支持多种模式和属性 源码 - 代码生成器 - 讨论区 - 教程- 例子

查看: 6715|回复: 8

[求助] 浏览器安全检查5秒的问题

[复制链接]
发表于 2019-3-19 09:38:11 | 显示全部楼层 |阅读模式
最近需要采集的网站,突然增加了个安全检查5秒倒计时的问题,网站采集的源码是:
<!DOCTYPE HTML>
<html lang="en-US">
<head>
  <meta charset="UTF-8" />
  <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
  <meta http-equiv="X-UA-Compatible" content="IE=Edge,chrome=1" />
  <meta name="robots" content="noindex, nofollow" />
  <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1" />
  <title>安全检查中...</title>
  <style type="text/css">
    html, body {width: 100%; height: 100%; margin: 0; padding: 0;}
    body {background-color: #ffffff; font-family: Helvetica, Arial, sans-serif; font-size: 100%;}
    h1 {font-size: 1.5em; color: #404040; text-align: center;}
    p {font-size: 1em; color: #404040; text-align: center; margin: 10px 0 0 0;}
    #spinner {margin: 0 auto 30px auto; display: block;}
    .attribution {margin-top: 20px;}
  </style>

    <script type="text/javascript">
  //<![CDATA[
  (function(){
    var a = function() {try{return !!window.addEventListener} catch(e) {return !1} },
    b = function(b, c) {a() ? document.addEventListener("DOMContentLoaded", b, c) : document.attachEvent("onreadystatechange", b)};
    b(function(){
      var a = document.getElementById('yjs-content');a.style.display = 'block';
      setTimeout(function(){
        var s,t,o,p,b,r,e,a,k,i,n,g,f, XrCEvGL={"sBbjjlJ":+((!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![]+[])+(!+[]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![])+(+[])+(!+[]+!![]+!![]+!![])+(+!![])+(!+[]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]))/+((!+[]+!![]+!![]+!![]+!![]+!![]+[])+(!+[]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![])+(+!![])+(+[])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![]))};
        t = document.createElement('div');
        t.innerHTML="<a href='/'>x</a>";
        t = t.firstChild.href;r = t.match(/https?:\/\//)[0];
        t = t.substr(r.length); t = t.substr(0,t.length-1);
        a = document.getElementById('jschl-answer');
        f = document.getElementById('challenge-form');
        ;XrCEvGL.sBbjjlJ*=+((!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![]+!![]+[])+(!+[]+!![])+(+[])+(!+[]+!![]+!![]+!![])+(+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![])+(!+[]+!![]+!![]))/+((!+[]+!![]+!![]+!![]+[])+(!+[]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![])+(+[])+(!+[]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![])+(+!![])+(!+[]+!![]+!![])+(!+[]+!![]+!![]));XrCEvGL.sBbjjlJ-=+((!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![]+!![]+[])+(!+[]+!![])+(+[])+(!+[]+!![]+!![]+!![])+(+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![])+(!+[]+!![]+!![]))/+((!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![]+!![]+[])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![])+(+[])+(+!![])+(+!![])+(!+[]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]));XrCEvGL.sBbjjlJ+=+((!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![]+[])+(!+[]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![])+(+[])+(!+[]+!![]+!![]+!![])+(+!![])+(!+[]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]))/+((!+[]+!![]+!![]+!![]+!![]+!![]+[])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![]+!![])+(+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]));XrCEvGL.sBbjjlJ*=+((!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![]+!![]+[])+(!+[]+!![])+(+[])+(!+[]+!![]+!![]+!![])+(+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![])+(!+[]+!![]+!![]))/+((!+[]+!![]+!![]+!![]+!![]+[])+(!+[]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![])+(+!![])+(!+[]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]));XrCEvGL.sBbjjlJ*=+((!+[]+!![]+!![]+!![]+!![]+!![]+!![]+[])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![])+(+[])+(+[])+(!+[]+!![]+!![])+(!+[]+!![]+!![]+!![])+(!+[]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]))/+((!+[]+!![]+!![]+!![]+!![]+!![]+[])+(!+[]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![])+(+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![]));XrCEvGL.sBbjjlJ+=+((!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![]+[])+(!+[]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![])+(+[])+(!+[]+!![]+!![]))/+((!+[]+!![]+!![]+!![]+[])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![])+(+[])+(!+[]+!![]+!![])+(!+[]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![]+!![]));XrCEvGL.sBbjjlJ+=+((!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![]+[])+(+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![])+(+[])+(!+[]+!![]+!![]+!![])+(!+[]+!![])+(!+[]+!![]+!![]+!![])+(!+[]+!![]+!![]))/+((+!![]+[])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![])+(+[]));XrCEvGL.sBbjjlJ+=+((!+[]+!![]+!![]+!![]+!![]+!![]+!![]+[])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![])+(+[])+(+[])+(!+[]+!![]+!![])+(!+[]+!![]+!![]+!![])+(!+[]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![]))/+((+!![]+[])+(!+[]+!![]+!![]+!![]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![]+!![])+(!+[]+!![])+(!+[]+!![]+!![]+!![])+(!+[]+!![]+!![]+!![])+(!+[]+!![])+(!+[]+!![]+!![]+!![])+(+!![]));a.value = +XrCEvGL.sBbjjlJ.toFixed(10) + t.length; '; 121'
        f.submit();
      }, 4000);
    }, false);
  })();
  //]]>
</script>
<script type="text/javascript"  src="https://captcha.su.baidu.com/anti-bot/mfcd.js">
</script>


</head>
<body>
  <table width="100%" height="100%" cellpadding="20">
    <tr>
      <td align="center" valign="middle">
          <div class="yjs-browser-verification yjs-im-under-attack">

  <noscript><h1 data-translate="turn_on_js" style="color:#bd2426;">请打开浏览器的javascript,然后刷新浏览器</h1></noscript>
  <div id="yjs-content" style="display:none">
    <div>
      <div class="bubbles"></div>
      <div class="bubbles"></div>
      <div class="bubbles"></div>
    </div>
    <h1>jxtc.com.cn <span data-translate="checking_browser">浏览器安全检查中...</span></h1>

    <p data-translate="process_is_automatic"></p>
    <p data-translate="allow_5_secs">还剩 5 秒&hellip;</p>
  </div>
  <form id="challenge-form" action="/cdn-cgi/l/chk_jschl" method="get">
    <input type="hidden" name="jschl_vc" value="cb1cdef26461c389b2f3c2000232c61f"/>
    <input type="hidden" name="pass" value="1552724103.532-SCNWTtrqxF"/>
    <input type="hidden" id="jschl-answer" name="jschl_answer"/>
  </form>

</div>


          <div class="attribution"><a  target="_blank" style="font-size: 12px;"></a></div>
      </td>
    </tr>
  </table>
</body>
</html>

需要采集打开的url是http://www.jxtc.com.cn/news_list/newsCategoryId=5.html
看了些网上的资料说是研究下页面调转的参数
我找到了实际调转的url是http://www.jxtc.com.cn/cdn-cgi/l/chk_jschl?jschl_vc=de929a12e97ccf20594d62f8fe39a2a0&pass=1552721040.836-JDY04Oikp8&jschl_answer=11.8254323066
3个参数都是每次随机的,
3个参数,但是问题是手动复制上面url的直接打开页面打开不了,如果这样是不是cookies提交也没用了?
实在没辙了!求高手提供下思路和方法!不胜感激.


1. 开通SVIP会员,免费下载本站所有源码,不限次数据,不限时间
2. 加官方QQ群,加官方微信群获取更多资源和帮助
3. 找站长苏飞做网站、商城、CRM、小程序、App、爬虫相关、项目外包等点这里
 楼主| 发表于 2019-3-22 14:56:35 | 显示全部楼层
不知道为什么用HttpHelper 抓取这些 503结果的 页面都出错没返回结果!
用WebBrowser抓取都可以得到抓取的页面
回复 支持 1 反对 0

使用道具 举报

发表于 2019-3-19 11:13:38 | 显示全部楼层
同样是要抓包单独请求处理
 楼主| 发表于 2019-3-19 16:44:06 | 显示全部楼层
这块没接触过,抓包都不会...,请问哪里有地方学习的吗?
其实问题我知道,但是不会弄,也没脸叫人贴源码..
 楼主| 发表于 2019-3-19 16:46:44 | 显示全部楼层
飞哥晚上可以加QQ聊下吗?
发表于 2019-3-21 15:04:05 | 显示全部楼层
这种是加了防御的,我做过!,先访问那个5秒的,抓js代码,运行后模拟
 楼主| 发表于 2019-3-22 14:29:14 | 显示全部楼层
先访问那个5秒的地址503错误!抓不到啊!
GET http://www.jxtc.com.cn/news_list/newsCategoryId=5.html HTTP/1.1
Host: www.jxtc.com.cn
Connection: keep-alive
Cache-Control: max-age=0
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2900.0 Iron Safari/537.36
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8
DNT: 1
Accept-Encoding: gzip, deflate, sdch
Accept-Language: zh-CN,zh;q=0.8
Cookie: yjs_ab_lid=%7B%22data%22%3A%7B%22server_time%22%3A%221553235485158%22%2C%22ver%22%3A%221.0%22%2C%22lid%22%3A%223a63335129d06bde92d237d2ba4fe77bf9f82%22%2C%22ip%22%3A%22182.99.238.81%22%2C%22ret_code%22%3A%22300%22%7D%2C%22key_id%22%3A%227%22%2C%22sign%22%3A%227EB63562%22%7D; yjs_ab_score=undefined; __cfduid=d171320927faf7b3c2973ec1be7f9de2d1552700196; existFlag=1; vct=6; yjs_ab_lid=%7B%22data%22%3A%7B%22server_time%22%3A%221553233508402%22%2C%22ver%22%3A%221.0%22%2C%22lid%22%3A%223a63335129d06bde92d237d2ba4fe77bf9f82%22%2C%22ip%22%3A%22182.99.238.81%22%2C%22ret_code%22%3A%22300%22%7D%2C%22key_id%22%3A%227%22%2C%22sign%22%3A%22AB8AF165%22%7D; yjs_ab_score=undefined; JSESSIONID=B4CA8824D29649028C3F003C4CD2B117.DLOG4J; cf_clearance=3e7e5857aa2f28e65e3dff766dbba6e78797d30b-1553235489-300-150; GUID=3ee9ea70-a0aa-4773-8106-d71e268c44a8; pvc=6; BROWSEID=c45a643f-83d9-45a2-8b56-b7404d6fee20; rd=
AlexaToolbar-ALX_NS_PH: AlexaToolbar/alxg-3.3
 楼主| 发表于 2019-3-22 14:31:37 | 显示全部楼层
           item = new HttpItem()
            {
                URL = "http://www.jxtc.com.cn/news_list/newsCategoryId=5.html",//URL     必需项
                Encoding = "utf-8",//编码格式(utf-8,gb2312,gbk)     可选项 默认类会自动识别
                Method = "get"//URL     可选项 默认为Get               
        
            }
textBox1.Text = http.GetHtml(item);

这样抓总是显示string error
 楼主| 发表于 2019-3-22 14:32:10 | 显示全部楼层
GET http://www.jxtc.com.cn/news_list/newsCategoryId=5.html HTTP/1.1
Host: www.jxtc.com.cn
Connection: keep-alive
Cache-Control: max-age=0
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2900.0 Iron Safari/537.36
Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8
DNT: 1
Accept-Encoding: gzip, deflate, sdch
Accept-Language: zh-CN,zh;q=0.8
Cookie: yjs_ab_lid=%7B%22data%22%3A%7B%22server_time%22%3A%221553235485158%22%2C%22ver%22%3A%221.0%22%2C%22lid%22%3A%223a63335129d06bde92d237d2ba4fe77bf9f82%22%2C%22ip%22%3A%22182.99.238.81%22%2C%22ret_code%22%3A%22300%22%7D%2C%22key_id%22%3A%227%22%2C%22sign%22%3A%227EB63562%22%7D; yjs_ab_score=undefined; __cfduid=d171320927faf7b3c2973ec1be7f9de2d1552700196; existFlag=1; vct=6; yjs_ab_lid=%7B%22data%22%3A%7B%22server_time%22%3A%221553233508402%22%2C%22ver%22%3A%221.0%22%2C%22lid%22%3A%223a63335129d06bde92d237d2ba4fe77bf9f82%22%2C%22ip%22%3A%22182.99.238.81%22%2C%22ret_code%22%3A%22300%22%7D%2C%22key_id%22%3A%227%22%2C%22sign%22%3A%22AB8AF165%22%7D; yjs_ab_score=undefined; JSESSIONID=B4CA8824D29649028C3F003C4CD2B117.DLOG4J; cf_clearance=3e7e5857aa2f28e65e3dff766dbba6e78797d30b-1553235489-300-150; GUID=3ee9ea70-a0aa-4773-8106-d71e268c44a8; pvc=6; BROWSEID=c45a643f-83d9-45a2-8b56-b7404d6fee20; rd=
AlexaToolbar-ALX_NS_PH: AlexaToolbar/alxg-3.3
这个是抓包的
您需要登录后才可以回帖 登录 | 马上注册

本版积分规则

QQ|手机版|小黑屋|手机版|联系我们|关于我们|广告合作|苏飞论坛 ( 豫ICP备18043678号-2)

GMT+8, 2024-11-22 07:02

© 2014-2021

快速回复 返回顶部 返回列表