Skip to content

Commit

Permalink
UC Update Iframe
Browse files Browse the repository at this point in the history
  • Loading branch information
naibo committed Jul 8, 2023
1 parent 021ccf1 commit 751fa6e
Show file tree
Hide file tree
Showing 32 changed files with 205 additions and 52 deletions.
27 changes: 11 additions & 16 deletions .temp_to_pub/EasySpider_windows_x64/Code/easyspider_executestage.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ def saveData(self, exit=False):
str(self.id) + "/" + self.saveName + '.xlsx'
write_to_excel(file_name, self.OUTPUT, self.outputParametersTypes, self.outputParametersRecord)
elif self.outputFormat == "mysql":
self.mysql.write_to_mysql(self.OUTPUT, self.outputParametersRecord)
self.mysql.write_to_mysql(self.OUTPUT, self.outputParametersRecord, self.outputParametersTypes)

self.OUTPUT = []
self.log = ""
Expand Down Expand Up @@ -1345,19 +1345,19 @@ def getData(self, para, loopElement, isInLoop=True, parentPath="", index=0):
if sys.platform == "win32" and platform.architecture()[0] == "32bit":
options.binary_location = os.path.join(
os.getcwd(), "EasySpider/resources/app/chrome_win32/chrome.exe") # 指定chrome位置
options.add_extension("EasySpider/resources/app/XPathHelper.crx")
driver_path = os.path.join(
os.getcwd(), "EasySpider/resources/app/chrome_win32/chromedriver_win32.exe")
option.add_extension("EasySpider/resources/app/XPathHelper.crx")
elif sys.platform == "win32" and platform.architecture()[0] == "64bit":
options.binary_location = os.path.join(
os.getcwd(), "EasySpider/resources/app/chrome_win64/chrome.exe")
driver_path = os.path.join(
os.getcwd(), "EasySpider/resources/app/chrome_win64/chromedriver_win64.exe")
options.add_extension("EasySpider/resources/app/XPathHelper.crx")
option.add_extension("EasySpider/resources/app/XPathHelper.crx")
elif sys.platform == "linux" and platform.architecture()[0] == "64bit":
options.binary_location = "EasySpider/resources/app/chrome_linux64/chrome"
options.add_extension("EasySpider/resources/app/XPathHelper.crx")
driver_path = "EasySpider/resources/app/chrome_linux64/chromedriver_linux64"
option.add_extension("EasySpider/resources/app/XPathHelper.crx")
else:
print("Unsupported platform")
sys.exit()
Expand All @@ -1370,21 +1370,16 @@ def getData(self, para, loopElement, isInLoop=True, parentPath="", index=0):
# # option.binary_location = "C:\\Users\\q9823\\AppData\\Local\\Google\\Chrome\\Application\\chrome.exe"
# driver_path = "./Chrome/chromedriver.exe"
elif os.path.exists(os.getcwd()+"/../ElectronJS"):
if os.getcwd().find("ElectronJS") >= 0: # 软件dev用
print("Finding chromedriver in EasySpider",
os.getcwd())
options.binary_location = "chrome_win64/chrome.exe"
driver_path = "chrome_win64/chromedriver_win64.exe"
options.add_extension("../ElectronJS/XPathHelper.crx")
else: # 直接在executeStage文件夹内使用python easyspider_executestage.py时的路径
print("Finding chromedriver in EasySpider",
os.getcwd()+"/ElectronJS")
option.binary_location = "../ElectronJS/chrome_win64/chrome.exe" # 指定chrome位置
driver_path = "../ElectronJS/chrome_win64/chromedriver_win64.exe"
option.add_extension("../ElectronJS/XPathHelper.crx")
# 软件dev用
print("Finding chromedriver in EasySpider",
os.getcwd()+"/ElectronJS")
option.binary_location = "../ElectronJS/chrome_win64/chrome.exe" # 指定chrome位置
driver_path = "../ElectronJS/chrome_win64/chromedriver_win64.exe"
option.add_extension("../ElectronJS/XPathHelper.crx")
else:
options.binary_location = "./chrome.exe" # 指定chrome位置
driver_path = "./chromedriver.exe"
option.add_extension("XPathHelper.crx")

option.add_experimental_option(
'excludeSwitches', ['enable-automation']) # 以开发者模式
Expand Down
37 changes: 32 additions & 5 deletions .temp_to_pub/EasySpider_windows_x64/Code/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# 控制流程的暂停和继续

import csv
import datetime
import json
import os
import time
Expand Down Expand Up @@ -228,15 +229,41 @@ def create_table(self, table_name, parameters):
print("The data table " + table_name + " already exists.")
cursor.close()

def write_to_mysql(self, OUTPUT, record):
def write_to_mysql(self, OUTPUT, record, types):
# 创建一个游标对象
cursor = self.conn.cursor()

for row in OUTPUT:
for line in OUTPUT:
for i in range(len(line)):
if types[i] == "int" or types[i] == "bigInt":
try:
line[i] = int(line[i])
except:
line[i] = 0
elif types[i] == "double":
try:
line[i] = float(line[i])
except:
line[i] = 0.0
elif types[i] == "datetime":
try:
line[i] = datetime.datetime.strptime(line[i], '%Y-%m-%d %H:%M:%S')
except:
line[i] = datetime.datetime.strptime("1970-01-01 00:00:00", '%Y-%m-%d %H:%M:%S')
elif types[i] == "date":
try:
line[i] = datetime.datetime.strptime(line[i], '%Y-%m-%d')
except:
line[i] = datetime.datetime.strptime("1970-01-01", '%Y-%m-%d')
elif types[i] == "time":
try:
line[i] = datetime.datetime.strptime(line[i], '%H:%M:%S')
except:
line[i] = datetime.datetime.strptime("00:00:00", '%H:%M:%S')
to_write = []
for i in range(len(row)):
for i in range(len(line)):
if record[i]:
to_write.append(row[i])
to_write.append(line[i])
# 构造插入数据的 SQL 语句
sql = f"INSERT INTO "+ self.table_name +" "+self.field_sql+" VALUES ("
for item in to_write:
Expand All @@ -248,7 +275,7 @@ def write_to_mysql(self, OUTPUT, record):
cursor.execute(sql, to_write)
except Exception as e:
print("Error:", e)
# print("Error SQL:", sql)
print("Error SQL:", sql, to_write)
print("插入数据库错误,请查看以上的错误提示,然后检查数据的类型是否正确,是否文本过长(超过一万的文本类型要设置为大文本)。")
print("Inserting database error, please check the above error, and then check whether the data type is correct, whether the text is too long (text type over 10,000 should be set to large text).")
print("重新执行任务时,请删除数据库中的数据表" + self.table_name + ",然后再次运行程序。")
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"id":1,"name":"EBAY","url":"https://www.ebay.com","links":"https://www.ebay.com","create_time":"7/8/2023, 7:41:07 AM","update_time":"7/8/2023, 7:41:07 AM","version":"0.3.5","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"outputFormat":"xlsx","saveName":"current_time","containJudge":true,"desc":"https://www.ebay.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"Open Page","value":"https://www.ebay.com","desc":"List of URLs to be collected, separated by \\n for multiple lines","type":"text","exampleValue":"https://www.ebay.com"},{"id":1,"name":"urlList_1","nodeId":3,"nodeName":"Open Page","value":"https://ebay.com?id=Field[\"para1_linktext\"]","desc":"List of URLs to be collected, separated by \\n for multiple lines","type":"text","exampleValue":"https://ebay.com?id=Field[\"para1_linktext\"]"}],"outputParameters":[{"id":0,"name":"para1_linktext","desc":"","type":"text","recordASField":1,"exampleValue":"Score these trending kicks"},{"id":1,"name":"para1_text","desc":"","type":"text","recordASField":1,"exampleValue":"Home"},{"id":2,"name":"Exit Loop","desc":"Output of custom action","type":"text","recordASField":0,"exampleValue":""}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2,3,4],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"Open Page","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.ebay.com","links":"https://www.ebay.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":0,"option":3,"title":"Collect Data","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":1,"contentType":0,"relative":false,"name":"para1_linktext","desc":"","extractType":0,"relativeXPath":"/html/body/div[6]/div[3]/div[1]/div[1]/div[1]/h2[1]/a[1]","allXPaths":["/html/body/div[6]/div[3]/div[1]/div[1]/div[1]/h2[1]/a[1]","//a[contains(., 'Score thes')]","/html/body/div[last()-10]/div[last()-4]/div/div/div[last()-1]/h2/a"],"exampleValues":[{"num":0,"value":"Score these trending kicks"}],"unique_index":"0em1vatjz5vnljt7w6f7","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}},{"id":3,"index":3,"parentId":0,"type":0,"option":1,"title":"Open Page","sequence":[],"isInLoop":false,"position":2,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"about:blank","links":"https://ebay.com?id=Field[\"para1_linktext\"]","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":4,"index":4,"parentId":0,"type":1,"option":8,"title":"Loop","sequence":[6,5],"isInLoop":false,"position":3,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[6]/div[1]/ul[1]/li","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[6]/div[1]/ul[1]/li[1]","//li[contains(., 'Home')]","//LI[@class='vl-flyout-nav__active']","/html/body/div[last()-10]/div[last()-6]/ul/li[last()-13]"]}},{"id":6,"index":5,"parentId":4,"type":0,"option":3,"title":"Collect Data","sequence":[],"isInLoop":true,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":0,"relative":true,"name":"para1_text","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"Home"}],"unique_index":"x6qhjfubbbljt7x736","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"arguments[0].innerText += \"Field[\"para1_linktext\"]\"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}},{"id":5,"index":6,"parentId":4,"type":2,"option":9,"title":"If Condition","sequence":[7],"isInLoop":true,"position":0,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0}},{"id":7,"parentId":5,"index":7,"type":3,"option":10,"title":"Condition","sequence":[9],"isInLoop":true,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"class":"3","value":"biles","code":"","waitTime":0},"position":0},{"id":-1,"parentId":5,"index":8,"type":3,"option":10,"title":"Condition","sequence":[],"isInLoop":true,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"class":0,"value":"","code":"","waitTime":0},"position":1},{"id":8,"index":9,"parentId":7,"type":0,"option":5,"title":"Exit Loop","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"codeMode":"3","code":"","waitTime":0,"recordASField":0,"paraType":"text"}}]}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"id":10,"name":"Page Not Found","url":"https://www.genecards.org/lookup/text=Mrpl52","links":"https://turnstile.zeroclover.io/","create_time":"7/8/2023, 8:04:15 AM","update_time":"7/8/2023, 8:04:49 AM","version":"0.3.5","saveThreshold":10,"cloudflare":1,"environment":0,"maxViewLength":15,"outputFormat":"xlsx","saveName":"current_time","containJudge":false,"desc":"https://www.genecards.org/lookup/text=Mrpl52","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"Open Page","value":"https://turnstile.zeroclover.io/","desc":"List of URLs to be collected, separated by \\n for multiple lines","type":"text","exampleValue":"https://turnstile.zeroclover.io/"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"text","recordASField":1,"exampleValue":"Captcha success!\n\n\n\n \n \n \n Cloudflare Turnstile Demo\n \n \n\n\n \n \n Cloudflare Turnstile Demo w/ Managed Mode\n \n \n \n \n \n\n\n\n ✍操作提示框(可点此拖动) \n ● 已选中1个元素,您可以:\n 确认采集 取消选择 Path: /html/body \n"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"Open Page","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":10,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.genecards.org/lookup/text=Mrpl52","links":"https://turnstile.zeroclover.io/","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":0,"option":3,"title":"Collect Data","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":0,"relative":false,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"/html/body","allXPaths":["/html/body","//body[contains(., 'Captcha su')]","/html/body"],"exampleValues":[{"num":0,"value":"Captcha success!\n\n\n\n \n \n \n Cloudflare Turnstile Demo\n \n \n\n\n \n \n Cloudflare Turnstile Demo w/ Managed Mode\n \n \n \n \n \n\n\n\n ✍操作提示框(可点此拖动) \n ● 已选中1个元素,您可以:\n 确认采集 取消选择 Path: /html/body \n"}],"unique_index":"hqoc6f3lcauljt8tjz5","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}}]}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"id":11,"name":"Page Not Found","url":"https://www.genecards.org/lookup/text=Mrpl52","links":"https://turnstile.zeroclover.io/","create_time":"7/8/2023, 8:04:15 AM","update_time":"7/8/2023, 8:05:38 AM","version":"0.3.5","saveThreshold":10,"cloudflare":1,"environment":0,"maxViewLength":15,"outputFormat":"xlsx","saveName":"current_time","containJudge":false,"desc":"https://www.genecards.org/lookup/text=Mrpl52","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"Open Page","value":"https://turnstile.zeroclover.io/","desc":"List of URLs to be collected, separated by \\n for multiple lines","type":"text","exampleValue":"https://turnstile.zeroclover.io/"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"text","recordASField":1,"exampleValue":"Captcha success!\n\n\n\n \n \n \n Cloudflare Turnstile Demo\n \n \n\n\n \n \n Cloudflare Turnstile Demo w/ Managed Mode\n \n \n \n \n \n\n\n\n ✍操作提示框(可点此拖动) \n ● 已选中1个元素,您可以:\n 确认采集 取消选择 Path: /html/body \n"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"waitType":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"Open Page","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":19,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.genecards.org/lookup/text=Mrpl52","links":"https://turnstile.zeroclover.io/","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":0,"option":3,"title":"Collect Data","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":5,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"paras":[{"nodeType":0,"contentType":0,"relative":false,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"/html/body","allXPaths":["/html/body","//body[contains(., 'Captcha su')]","/html/body"],"exampleValues":[{"num":0,"value":"Captcha success!\n\n\n\n \n \n \n Cloudflare Turnstile Demo\n \n \n\n\n \n \n Cloudflare Turnstile Demo w/ Managed Mode\n \n \n \n \n \n\n\n\n ✍操作提示框(可点此拖动) \n ● 已选中1个元素,您可以:\n 确认采集 取消选择 Path: /html/body \n"}],"unique_index":"hqoc6f3lcauljt8tjz5","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}}]}
Loading

0 comments on commit 751fa6e

Please sign in to comment.