{"id":7510,"date":"2020-05-18T11:18:05","date_gmt":"2020-05-18T03:18:05","guid":{"rendered":"https:\/\/kyle.ai\/blog\/?p=7510"},"modified":"2020-05-18T11:21:57","modified_gmt":"2020-05-18T03:21:57","slug":"%e7%88%ac%e8%99%ab%e5%a6%82%e4%bd%95%e7%bb%95%e8%bf%87-cloudflare-%e7%9a%84-ddos-%e4%bf%9d%e6%8a%a4%e9%aa%8c%e8%af%81","status":"publish","type":"post","link":"https:\/\/kyle.ai\/blog\/7510.html","title":{"rendered":"\u722c\u866b\u5982\u4f55\u7ed5\u8fc7 Cloudflare \u7684 DDos \u4fdd\u62a4\u9a8c\u8bc1"},"content":{"rendered":"<p>\u88ab Cloudflare \u4fdd\u62a4\u7684\u7ad9\u70b9\uff0c\u5728\u521d\u6b21\u8bbf\u95ee\u65f6\uff0c\u4f1a\u7b49\u5f85 5 \u79d2\u949f\u7684\u9a8c\u8bc1\uff0c\u68c0\u6d4b\u4f60\u662f\u4e0d\u662f\u901a\u8fc7\u6d4f\u89c8\u5668\u6b63\u5e38\u8bbf\u95ee\u7684\uff0c\u5982\u4e0b\u56fe\uff1a<\/p>\n<p><img decoding=\"async\" src=\"https:\/\/kyle.ai\/blog\/wp-content\/uploads\/2020\/05\/\u622a\u5c4f2020-05-1810_51_00.png\" alt=\"\" \/><\/p>\n<p>\u672c\u6587\u4e3b\u8981\u8bf4\u660e\u5982\u679c\u901a\u8fc7\u6280\u672f\u624b\u6bb5\u7ed5\u8fc7\u8fd9\u4e2a\u9a8c\u8bc1\uff0c\u6211\u8bd5\u4e86\u4e24\u79cd\u529e\u6cd5\uff0c\u90fd\u7ba1\u7528\u3002<\/p>\n<p>1\u3001\u4f7f\u7528 python \u7b2c\u4e09\u65b9\u5e93\uff0c\u5982 https:\/\/github.com\/VeNoMouS\/cloudscraper<\/p>\n<p>\u4f7f\u7528\u8d77\u6765\u4e5f\u975e\u5e38\u7b80\u5355\uff0c\u770b\u5b98\u65b9\u4f7f\u7528\u6587\u6863\u5c31\u597d\u4e86\uff0c\u793a\u4f8b\uff1a<\/p>\n<pre><code class=\"language-python \">import cloudscraper\nscraper = cloudscraper.create_scraper()\nres = scraper.get(\"http:\/\/xxx\")\nprint(res.content)\n<\/code><\/pre>\n<p>\u8fd9\u4e2a\u5e93\u5b83\u662f\u7528\u539f\u751f\u7684 python \u4ee3\u7801\u6765\u89e3\u6790\u548c\u8ba1\u7b97 cloudflare \u7684\u9a8c\u8bc1\u903b\u8f91\u7684\uff0c\u4e5f\u53ef\u4ee5\u8bbe\u7f6e\u91c7\u7528 nodejs \u7b49\u5916\u90e8\u5e93\u6765\u8ba1\u7b97\u9a8c\u8bc1\uff0c\u5177\u4f53\u53ef\u770b\u5b98\u65b9\u6587\u6863\u3002<\/p>\n<p>\u4e0d\u8fc7\u8fd9\u4e2a\u5e93\u6709\u4e2a\u7f3a\u9677\u5c31\u662f\uff0c\u5982\u679c Cloudflare \u53d8\u66f4\u4e86\u7b97\u6cd5\uff0c\u54ea\u6015\u53ea\u6539\u52a8\u4e86\u4e00\u70b9\uff0c\u8fd9\u4e2a\u5e93\u5c31\u4f1a\u5931\u6548\uff0c\u53ea\u80fd\u7b49\u4f5c\u8005\u66f4\u65b0\u4ee3\u7801\u6765\u652f\u6301\uff0c\u6bd4\u8f83\u88ab\u52a8\u3002<\/p>\n<p>2\u3001\u4f7f\u7528 Splash \u6765\u6293\u53d6\u9875\u9762<\/p>\n<p>Splash \u662f\u4e00\u4e2a\u547d\u4ee4\u884c\u6d4f\u89c8\u5668\uff0chttps:\/\/splash.readthedocs.io\/ \uff0c\u6bd4\u8d77\u4e0a\u9762\u6211\u4eec\u901a\u8fc7\u7a0b\u5e8f\u6765\u8ba1\u7b97\uff0c\u8fd8\u4e0d\u5982\u76f4\u63a5\u8ba9\u4e00\u4e2a\u771f\u5b9e\u7684\u6d4f\u89c8\u5668\u6765\u8bbf\u95ee\u53d7\u5230\u4fdd\u62a4\u7684\u7f51\u9875\u3002<\/p>\n<p>Cloudflare \u9a8c\u8bc1\u901a\u8fc7\u540e\uff0c\u4f1a\u751f\u6210\u4e24\u4e2a cookie \u503c\uff0c\u540e\u9762\u7684\u8bf7\u6c42\u53ea\u8981\u4e00\u76f4\u5e26\u4e0a\u8fd9\u4e9b cookie\uff0c\u5c31\u4e0d\u7528\u518d\u6b21\u9a8c\u8bc1\u3002\u6240\u4ee5\u6211\u7684\u529e\u6cd5\u662f\u5982\u679c\u9700\u8981\u9a8c\u8bc1\uff0c\u5c31\u7528 splash \u8bbf\u95ee\uff0c\u8bbf\u95ee\u5b8c\u540e\uff0c\u4fdd\u5b58\u8fd4\u56de\u7684 cookie \u4e0e header \u7b49\u5fc5\u8981\u4fe1\u606f\uff0c\u4e0b\u6b21\u5e26\u4e0a\u76f4\u63a5\u6b63\u5e38\u8bbf\u95ee\u5c31\u884c\u4e86\u3002<\/p>\n<p>\u793a\u4f8b\u4ee3\u7801\u5982\u4e0b\uff1a<\/p>\n<pre><code class=\"language-python \"><br \/>requests_timeout = 15\n\n\ndef log(msg):\n    print(f\"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] {msg}\", flush=True)\n\n\nclass Sraper:\n    splash_lua_script = '''\n        treat = require(\"treat\")\n        base64 = require(\"base64\")\n        local res = {}\n        splash.response_body_enabled = true\n        splash.request_body_enabled = true\n        splash:on_response(function ( response )\n            res['url'] = treat.as_string(response.url)\n            res['cookies'] = response.request.info['cookies']\n            res['set-cookie'] = response.headers[\"set-cookie\"]\n            res['method'] = response.request.method\n            res['info'] = response.request.info\n            response.abort()\n        end)\n        splash:go(splash.args.url)\n        splash:wait(5.5)\n        return res\n    '''\n\n    def __init__(self):\n        self.session = requests.session()\n        self.headers = {}\n\n    def splash_request(self, url):\n        params = {\n            \"url\": url,\n            \"lua_source\": self.splash_lua_script,\n        }\n        headers = {\n            \"Content-Type\": \"application\/json\"\n        }\n        res = self.session.post(urllib.parse.urljoin(ConfigProxy.splash_url, \"\/run\"), headers=headers,\n                                data=json.dumps(params), timeout=requests_timeout)\n        rdata = res.json()\n        cf_headers = {}\n        for header in rdata['info']['headers']:\n            cf_headers[header['name']] = header['value']\n        if 'postData' not in rdata['info']:\n            log(\"Warning: postData not in info dict\")\n            return None\n        postdata = rdata['info']['postData']['text']\n        url = rdata['info']['url']\n        res = self.session.post(url, headers=cf_headers, data=postdata, timeout=requests_timeout, allow_redirects=False)\n        cookie = SimpleCookie()\n        cookie.load(res.headers['set-cookie'])\n        cookie_str = \"\"\n        for k, v in cookie.items():\n            cookie_str += f\"{k}={v.value}; \"\n        self.headers = {\n            \"Referer\": \"https:\/\/xxx.com\",\n            \"User-Agent\": cf_headers['User-Agent'],\n            \"Cookie\": cookie_str,\n        }\n        return res\n\n    def request(self, url):\n        if not self.headers:\n            return self.splash_request(url)\n        res = self.session.get(url, headers=self.headers, timeout=requests_timeout, allow_redirects=False)\n        if res.status_code == 503:\n            log(\"Get 503 response, back to splash_request...\")\n            return self.splash_request(url)\n        else:\n            return res\n\n\nif __name__ == '__main__':\n    scraper = Sraper()\n    url = 'xxx'\n    res = scraper.request(url)\n    if res is None:\n        log(\"Get res is None\")\n        return False\n    if res.status_code == 200:\n        log('success')\n    else:\n        log(f\"Get {url} , status={res.status_code}\")\n<\/code><\/pre>\n<p>\u8fd9\u91cc\u6211\u7528\u5230\u4e86 Splash \u7684 lua \u811a\u672c\uff0c\u56e0\u4e3a Splash \u4e0d\u80fd\u6e32\u67d3\u51fa  pdf \u7b49\u4e8c\u8fdb\u5236\u9875\u9762\uff0c\u53ea\u80fd\u8fd4\u56de html \u6b63\u5e38\u9875\u9762\uff0c\u6240\u4ee5\u4e0d\u80fd\u4f7f\u7528 splash:html() \uff0c\u4e5f\u4e0d\u80fd\u5728 splash:on_response \u56de\u8c03\u4e2d\uff0c\u901a\u8fc7 responde.body \u53d8\u91cf\u62ff\u8fd4\u56de\u7684\u4e8c\u8fdb\u5236\u6570\u636e\uff0csplash \u6e32\u67d3\u9875\u9762\u5f02\u5e38\uff0c\u5c31\u76f4\u63a5\u4e0d\u4f1a\u7ed9 responde.body \u8d4b\u503c\u4e86\uff0c\u5c31\u7b97\u4f60\u8bbe\u7f6e\u4e86 <code>splash.response_body_enabled<\/code> \u6216 <code>request:enable_response_body<\/code> \u4e00\u6837\u4e0d\u884c\uff0c\u62ff\u4e0d\u5230 response.body \u53d8\u91cf\u3002<\/p>\n<p>\u8fd9\u65f6\u5019\u6211\u8ba9 splash \u62ff\u5230\u8bf7\u6c42\u8fd4\u56de\u7684\u5934\u90e8\u540e\uff0c\u5c31\u76f4\u63a5\u653e\u5f03\u8bfb\u53d6 body\uff0c\u6240\u4ee5\u624d\u6709\u4e0a\u9762\u7684 lua \u811a\u672c\u8fd9\u4e00\u6bb5\uff1a<\/p>\n<pre><code class=\"language-lua \">splash:on_response(function ( response )\n            res['url'] = treat.as_string(response.url)\n            res['cookies'] = response.request.info['cookies']\n            res['set-cookie'] = response.headers[\"set-cookie\"]\n            res['method'] = response.request.method\n            res['info'] = response.request.info\n            response.abort()\n        end)\n<\/code><\/pre>\n<p>\u7136\u540e\u6211\u518d\u62ff\u8fd4\u56de\u7684 cookies \u4ee5\u53ca\u5176\u4ed6\u5934\u90e8\u4fe1\u606f\uff0c\u81ea\u5df1\u901a\u8fc7 requests \u53bb\u8bbf\u95ee\u4e0b\u8f7d body \u5185\u5bb9\u3002<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u88ab Cloudflare \u4fdd\u62a4\u7684\u7ad9\u70b9\uff0c\u5728\u521d\u6b21\u8bbf\u95ee\u65f6\uff0c\u4f1a\u7b49\u5f85 5 \u79d2\u949f\u7684\u9a8c\u8bc1\uff0c\u68c0\u6d4b\u4f60\u662f\u4e0d\u662f\u901a\u8fc7\u6d4f\u89c8\u5668\u6b63\u5e38\u8bbf\u95ee\u7684\uff0c [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[5],"tags":[],"class_list":["post-7510","post","type-post","status-publish","format-standard","hentry","category-diary"],"_links":{"self":[{"href":"https:\/\/kyle.ai\/blog\/wp-json\/wp\/v2\/posts\/7510","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/kyle.ai\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/kyle.ai\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/kyle.ai\/blog\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/kyle.ai\/blog\/wp-json\/wp\/v2\/comments?post=7510"}],"version-history":[{"count":3,"href":"https:\/\/kyle.ai\/blog\/wp-json\/wp\/v2\/posts\/7510\/revisions"}],"predecessor-version":[{"id":7514,"href":"https:\/\/kyle.ai\/blog\/wp-json\/wp\/v2\/posts\/7510\/revisions\/7514"}],"wp:attachment":[{"href":"https:\/\/kyle.ai\/blog\/wp-json\/wp\/v2\/media?parent=7510"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/kyle.ai\/blog\/wp-json\/wp\/v2\/categories?post=7510"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/kyle.ai\/blog\/wp-json\/wp\/v2\/tags?post=7510"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}