Skip to content

Commit

Permalink
Cloudflare!!!
Browse files Browse the repository at this point in the history
  • Loading branch information
naibo committed Jul 12, 2023
1 parent 1e2ca08 commit e50cd7a
Show file tree
Hide file tree
Showing 26 changed files with 443 additions and 48 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1577,13 +1577,14 @@ def getData(self, para, loopElement, isInLoop=True, parentPath="", index=0):
browser_t = MyChrome(
options=options, chrome_options=option, executable_path=driver_path)
elif cloudflare == 1:
if sys.platform != "darwin":
options.binary_location = "" # 需要用自己的浏览器
if sys.platform == "win32":
options.binary_location = "C:\\Program Files\\Google\\Chrome Beta\\Application\\chrome.exe" # 需要用自己的浏览器
# options.binary_location = "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe" # 需要用自己的浏览器
browser_t = MyUCChrome(
options=options, driver_executable_path=driver_path)
else:
print("Not support Cloudflare Mode on MacOS")
print("MacOS不支持Cloudflare验证模式")
print("Cloudflare模式只支持Windows x64平台。")
print("Cloudflare Mode only support on Windows x64 platform.")
sys.exit()
event = Event()
event.set()
Expand All @@ -1607,9 +1608,9 @@ def getData(self, para, loopElement, isInLoop=True, parentPath="", index=0):
print("正在运行任务,长按键盘p键可暂停任务的执行以便手工操作浏览器如输入验证码;如果想恢复任务的执行,请再次长按p键。")
print("Running task, long press 'p' to pause the task for manual operation of the browser such as entering the verification code; If you want to resume the execution of the task, please long press 'p' again.")
print("----------------------------------\n\n")
if cloudflare:
print("过Cloudflare验证模式有时候会不稳定,请注意观察上方提示的浏览器版本信息是否正确,如果无法通过验证则需要隔几分钟重试一次,或者可以更换新的用户信息文件夹再执行任务。")
print("Passing the Cloudflare verification mode is sometimes unstable. Please pay attention to whether the browser version information prompted above is correct. If the verification fails, you need to try again every few minutes, or you can change to a new user information folder and then execute the task.")
# if cloudflare:
# print("过Cloudflare验证模式有时候会不稳定,如果无法通过验证则需要隔几分钟重试一次,或者可以更换新的用户信息文件夹再执行任务。")
# print("Passing the Cloudflare verification mode is sometimes unstable. If the verification fails, you need to try again every few minutes, or you can change to a new user information folder and then execute the task.")
# 使用监听器监听键盘输入
try:
with Listener(on_press=on_press_creator(press_time, event), on_release=on_release_creator(event, press_time)) as listener:
Expand Down
12 changes: 9 additions & 3 deletions .temp_to_pub/EasySpider_windows_x64/Code/myChrome.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
from selenium.webdriver.support.ui import Select
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
import sys

desired_capabilities = DesiredCapabilities.CHROME
desired_capabilities["pageLoadStrategy"] = "none"

Expand Down Expand Up @@ -89,9 +91,13 @@ def find_elements(self, by=By.ID, value=None, iframe=False):
else:
return super().find_elements(by=by, value=value)

import sys
if sys.platform != "darwin": # MacOS不支持Cloudflare
import undetected_chromedriver_ES as uc
# MacOS不支持直接打包带Cloudflare的功能,如果要自己编译运行,可以把这个if去掉,然后配置好浏览器和driver路径
if sys.platform != "darwin":
ES = True
if ES: # 用自己写的ES版本
import undetected_chromedriver_ES as uc
else:
import undetected_chromedriver as uc

class MyUCChrome(uc.Chrome):

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"id":0,"name":"详情页","url":"https://www.dongchedi.com/article/7254469214726324796","links":"https://www.dongchedi.com/article/7254469214726324796","create_time":"2023/7/11 17:53:04","update_time":"2023/7/11 17:54:46","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"outputFormat":"xlsx","saveName":"current_time","containJudge":false,"desc":"https://www.dongchedi.com/article/7254469214726324796","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.dongchedi.com/article/7254469214726324796","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.dongchedi.com/article/7254469214726324796"}],"outputParameters":[{"id":0,"name":"参数1_页面标题","desc":"","type":"text","recordASField":1,"exampleValue":"荣威D7 DMH混动版官图发布 定位中大型轿车 续航1400km_懂车帝"},{"id":1,"name":"参数2_文本","desc":"","type":"text","recordASField":1,"exampleValue":"日前,荣威D7 DMH(图片)混动版官图正式发布。据悉,新车定位新能源中大型轿车,将会在年内上市发售。外观方面,新车将采用全新的设计风格,整体造型十分时尚且富有运动感。值得注意的是,新车并没有采用与电动版相同的分体式大灯设计,而是相对常规的样式。车身尺寸方面,新车长宽高分别为4890/1890/1510mm,轴距为2810mm。动力方面,新车将搭载热效率大于43%的混动专用发动机,CLTC工况下纯电续航里程为125km,综合续航里程为1400km,馈电油耗为4.3L/100km。"},{"id":2,"name":"参数4_图片地址","desc":"","type":"text","recordASField":1,"exampleValue":"https://p9-dcd.byteimg.com/img/motor-article-img/0e7a4f1c6e89438ea8dc22163d80fae3~noop.webp"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2,5],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.dongchedi.com/article/7254469214726324796","links":"https://www.dongchedi.com/article/7254469214726324796","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":6,"relative":false,"name":"参数1_页面标题","desc":"","extractType":0,"relativeXPath":"/html/body/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/aside[1]/div[1]/h2[1]","allXPaths":["/html/body/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/aside[1]/div[1]/h2[1]","//h2[contains(., '相关推荐')]","//H2[@class='jsx-1932881358 title']","/html/body/div[last()-5]/div/div/div/div/div/aside/div[last()-1]/h2"],"exampleValues":[{"num":0,"value":"荣威D7 DMH混动版官图发布 定位中大型轿车 续航1400km_懂车帝"}],"unique_index":"zq1hj9zt0inljy40dht","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":0,"contentType":0,"relative":false,"name":"参数2_文本","desc":"","extractType":0,"relativeXPath":"id(\"article\")","allXPaths":["/html/body/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/main[1]/section[1]/div[1]/article[1]/div[2]/div[1]/section[1]","//section[contains(., '日前,荣威D7 DM')]","id(\"article\")","//SECTION[@class='jsx-3371063651 article-content']","/html/body/div[last()-5]/div/div/div/div/div/main/section/div[last()-1]/article/div[last()-1]/div/section"],"exampleValues":[{"num":0,"value":"日前,荣威D7 DMH(图片)混动版官图正式发布。据悉,新车定位新能源中大型轿车,将会在年内上市发售。外观方面,新车将采用全新的设计风格,整体造型十分时尚且富有运动感。值得注意的是,新车并没有采用与电动版相同的分体式大灯设计,而是相对常规的样式。车身尺寸方面,新车长宽高分别为4890/1890/1510mm,轴距为2810mm。动力方面,新车将搭载热效率大于43%的混动专用发动机,CLTC工况下纯电续航里程为125km,综合续航里程为1400km,馈电油耗为4.3L/100km。"}],"unique_index":"zcgjfmkb41ljy4164a","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}},{"id":-1,"index":3,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[4],"isInLoop":false,"position":2,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/main[1]/section[1]/div[1]/article[1]/div[2]/div[1]/section[1]/div/img[1]","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/main[1]/section[1]/div[1]/article[1]/div[2]/div[1]/section[1]/div[1]/img[1]","//img[contains(., '')]","/html/body/div[last()-6]/div[last()-1]/div/div/div/div/main/section/div[last()-1]/article/div[last()-1]/div/section/div[last()-4]/img"]}},{"id":-1,"index":4,"parentId":3,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":4,"contentType":0,"relative":true,"name":"参数3_图片地址","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"https://p9-dcd.byteimg.com/img/motor-article-img/0e7a4f1c6e89438ea8dc22163d80fae3~noop.webp"}],"unique_index":"wdaxxokem3ljy44mzo","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":1}],"loopType":1}},{"id":3,"index":5,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[6],"isInLoop":false,"position":2,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"id(\"article\")//img","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/main[1]/section[1]/div[1]/article[1]/div[2]/div[1]/section[1]/div[1]/img[1]","//img[contains(., '')]","/html/body/div[last()-6]/div[last()-1]/div/div/div/div/main/section/div[last()-1]/article/div[last()-1]/div/section/div[last()-4]/img"]}},{"id":4,"index":6,"parentId":3,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":4,"contentType":0,"relative":true,"name":"参数4_图片地址","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"https://p9-dcd.byteimg.com/img/motor-article-img/0e7a4f1c6e89438ea8dc22163d80fae3~noop.webp"}],"unique_index":"bmdeqk77gfdljy45n1u","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}}]}
Loading

0 comments on commit e50cd7a

Please sign in to comment.