Skip to content

Commit

Permalink
可选是否生成新行以解决多行问题
Browse files Browse the repository at this point in the history
  • Loading branch information
naibo committed Sep 5, 2023
1 parent 3a3edd3 commit 5921d84
Show file tree
Hide file tree
Showing 9 changed files with 49 additions and 718 deletions.
706 changes: 0 additions & 706 deletions .temp_to_pub/EasySpider_windows_x64/info.log

This file was deleted.

8 changes: 8 additions & 0 deletions ElectronJS/src/taskGrid/FlowChart.html
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ <h4 class="modal-title">Equivalent XPath</h4>
<button class="btn btn-primary" v-on:mousedown= 'addPara'>New Field</button>
</p>
<p><input onkeydown="inputDelete(event)" type="checkbox" v-model='nowNode["parameters"]["clear"]'></input>Clear other field existing values before extracting</p>
<p><input onkeydown="inputDelete(event)" type="checkbox" v-model='nowNode["parameters"]["newLine"]'></input>This operation will generate a new row of data: </p>
<div class="toolkitcontain">
<table class="toolkittb2" cellspacing="0">
<tbody>
Expand Down Expand Up @@ -426,6 +427,13 @@ <h4 class="modal-title">Equivalent XPath</h4>
<option :value = 1>Yes</option>
</select>
</p>
<p>
<label>This operation will generate a new row of data: </label>
<select v-model='nowNode["parameters"]["newLine"]' class="form-control">
<option :value = 1>Yes</option>
<option :value = 0>No</option>
</select>
</p>
<label>Maximum wait time for script execution (0 represents unlimited wait time): </label>
<input onkeydown="inputDelete(event)" required class="form-control" type="number" v-model.number='nowNode["parameters"]["waitTime"]'></input>
</div>
Expand Down
6 changes: 3 additions & 3 deletions ElectronJS/src/taskGrid/FlowChart.js
Original file line number Diff line number Diff line change
Expand Up @@ -698,16 +698,16 @@ document.oncontextmenu = function() {
// 创建一个包含删除选项的右键菜单
let contextMenu = document.createElement("div");
contextMenu.id = "contextMenu";
contextMenu.innerHTML = `<div>${LANG("删除元素", "Delete Element")}`;
contextMenu.innerHTML = `<div>${LANG("删除元素(双击)", "Delete Element (Double Click)")}`;

// 设置右键菜单的样式
contextMenu.style.position = "absolute";
contextMenu.style.left = event.clientX + "px";
contextMenu.style.top = event.clientY + "px";
contextMenu.style.width = LANG("140px", "180px");
contextMenu.style.width = LANG("180px", "220px");

// 添加删除元素的功能
contextMenu.addEventListener("click", function() {
contextMenu.addEventListener("dblclick", function() {
// myElement.remove(); // 删除元素
deleteElement();
contextMenu.remove(); // 删除右键菜单
Expand Down
8 changes: 8 additions & 0 deletions ElectronJS/src/taskGrid/FlowChart_CN.html
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,7 @@ <h4 class="modal-title">等价XPath</h4>
<button class="btn btn-primary" v-on:mousedown= 'addPara'>新增字段</button>
</p>
<p><input onkeydown="inputDelete(event)" type="checkbox" v-model='nowNode["parameters"]["clear"]'></input>提取数据前清空其他操作字段已记录的值</p>
<p><input onkeydown="inputDelete(event)" type="checkbox" v-model='nowNode["parameters"]["newLine"]'></input>此提取数据操作后生成新数据行 <span style="font-size: 30px!important;" title="取消勾选则适用于不想每次提取操作都生成一个新行的场景"></span></p>
<div class="toolkitcontain">
<table class="toolkittb2" cellspacing="0">
<tbody>
Expand Down Expand Up @@ -428,6 +429,13 @@ <h4 class="modal-title">等价XPath</h4>
<option :value = 1></option>
</select>
</p>
<p>
<label>此提取数据操作后生成新数据行:</label>
<select v-model='nowNode["parameters"]["newLine"]' class="form-control">
<option :value = 1></option>
<option :value = 0></option>
</select>
</p>
<label>最长等待脚本执行时间(0代表无限等待): </label>
<input onkeydown="inputDelete(event)" required class="form-control" type="number" v-model.number='nowNode["parameters"]["waitTime"]'></input>
</div>
Expand Down
2 changes: 2 additions & 0 deletions ElectronJS/src/taskGrid/logic.js
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ function addParameters(t) {
t["parameters"]["afterJSWaitTime"] = 0; //执行后js等待时间
} else if (t.option == 3) { //提取数据
t["parameters"]["clear"] = 0; //清空其他字段数据
t["parameters"]["newLine"] = 1; //生成新行
t["parameters"]["paras"] = []; //默认参数列表
} else if (t.option == 4) { //输入文字
t["parameters"]["value"] = "";
Expand All @@ -203,6 +204,7 @@ function addParameters(t) {
t["parameters"]["afterJSWaitTime"] = 0; //执行后js等待时间
} else if(t.option == 5) { //自定义操作
t["parameters"]["clear"] = 0; //清空其他字段数据
t["parameters"]["newLine"] = 1; //生成新行
t["parameters"]["codeMode"] = 0; //代码模式,0代表JS, 2代表系统级别
t["parameters"]["code"] = "";
t["parameters"]["waitTime"] = 0; //最长等待时间
Expand Down
1 change: 1 addition & 0 deletions ElectronJS/tasks/208.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"id":208,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"9/5/2023, 7:40:33 PM","update_time":"9/5/2023, 7:50:58 PM","version":"0.5.0","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","inputExcel":"","startFromExit":0,"containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.jd.com"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"text","recordASField":1,"exampleValue":"/手机/数码"},{"id":1,"name":"参数2_链接文本","desc":"","type":"text","recordASField":1,"exampleValue":"平板電腦"},{"id":2,"name":"参数3_文本","desc":"","type":"text","recordASField":1,"exampleValue":"/家用电器"},{"id":3,"name":"自定义操作","desc":"自定义操作返回的数据","type":"text","recordASField":1,"exampleValue":""}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2,3,4,5,7,6],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"newLine":false,"paras":[{"nodeType":0,"contentType":0,"relative":false,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"//div[1]","allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]","//div[contains(., '/手机/数码')]","//DIV[@class='LeftSide_menu_item__SBMWC LeftSide_text_space__2UhbG LeftSide_menu_hover__OCHiO']","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]"],"exampleValues":[{"num":0,"value":"/手机/数码"}],"unique_index":"7wb3yiix2bdlm68o0wr","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}},{"id":3,"index":3,"parentId":0,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":false,"position":2,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//*[@id=\"header\"]/div[5]","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":["/html/body/div[4]/div[1]","//div[contains(., '')]","//DIV[@class='w']","/html/body/div[last()-6]/div"]}},{"id":4,"index":4,"parentId":0,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":false,"position":3,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"newLine":false,"paras":[{"nodeType":1,"contentType":0,"relative":false,"name":"参数2_链接文本","desc":"","extractType":0,"relativeXPath":"//a[1]","allXPaths":["/html/body/div[4]/div[1]/div[4]/a[1]","//a[contains(., '平板電腦')]","/html/body/div[last()-6]/div/div/a[last()-4]"],"exampleValues":[{"num":0,"value":"平板電腦"}],"unique_index":"nocgz46xcfslm68o55u","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}},{"id":5,"index":5,"parentId":0,"type":0,"option":2,"title":"点击元素","sequence":[],"isInLoop":false,"position":4,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"//div[15656]","iframe":false,"wait":2,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"clickWay":0,"maxWaitTime":10,"paras":[],"allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[3]","//div[contains(., 'HI~欢迎来到京东!')]","//DIV[@class='right_members']","/html/body/div[last()-5]/div/div[last()-4]/div/div"]}},{"id":7,"index":6,"parentId":0,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":false,"position":6,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"newLine":1,"paras":[{"nodeType":0,"contentType":0,"relative":false,"name":"参数3_文本","desc":"","extractType":0,"relativeXPath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[2]","allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[2]","//div[contains(., '/家用电器')]","//DIV[@class='LeftSide_menu_item__SBMWC LeftSide_text_space__2UhbG LeftSide_menu_hover__OCHiO']","/html/body/div[last()-5]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-11]"],"exampleValues":[{"num":0,"value":"/家用电器"}],"unique_index":"cy7iu26jedulm68obkl","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}]}},{"id":6,"index":7,"parentId":0,"type":0,"option":5,"title":"自定义操作","sequence":[],"isInLoop":false,"position":5,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"newLine":0,"codeMode":0,"code":"return \"123\"","waitTime":0,"recordASField":1,"paraType":"text"}}]}
2 changes: 1 addition & 1 deletion ExecuteStage/.vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"justMyCode": false,
// "args": ["--id", "[7]", "--read_type", "remote", "--headless", "0"]
// "args": ["--id", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
"args": ["--id", "[2]", "--headless", "0", "--user_data", "0", "--keyboard", "0"]
"args": ["--id", "[36]", "--headless", "0", "--user_data", "0", "--keyboard", "0"]
}
]
}
12 changes: 10 additions & 2 deletions ExecuteStage/easyspider_executestage.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,10 @@ def preprocess(self):
clear = node["parameters"]["clear"]
except:
node["parameters"]["clear"] = 0
try:
newLine = node["parameters"]["newLine"]
except:
node["parameters"]["newLine"] = 1
for para in paras:
try:
iframe = para["iframe"]
Expand Down Expand Up @@ -274,6 +278,10 @@ def preprocess(self):
clear = node["parameters"]["clear"]
except:
node["parameters"]["clear"] = 0
try:
newLine = node["parameters"]["newLine"]
except:
node["parameters"]["newLine"] = 1
elif node["option"] == 7: # 移动到元素
if node["parameters"]["useLoop"]:
if self.task_version <= "0.3.5":
Expand Down Expand Up @@ -618,7 +626,7 @@ def customOperation(self, node, loopValue, loopPath, index):
# self.print_and_log("操作<" + node["title"] + ">的返回值为:" + output)
# self.print_and_log("The return value of operation <" + node["title"] + "> is: " + output)
self.outputParameters[node["title"]] = output
if recordASField:
if recordASField and paras["newLine"]:
line = new_line(self.outputParameters,
self.maxViewLength, self.outputParametersRecord)
self.OUTPUT.append(line)
Expand Down Expand Up @@ -1746,7 +1754,7 @@ def getData(self, para, loopElement, isInLoop=True, parentPath="", index=0):
self.outputParameters[p["name"]] = content
self.execute_code(
2, p["afterJS"], p["afterJSWaitTime"], element, iframe=p["iframe"]) # 执行后置JS
if para["recordASField"] > 0:
if para["recordASField"] > 0 and para["newLine"]:
line = new_line(self.outputParameters,
self.maxViewLength, self.outputParametersRecord)
self.OUTPUT.append(line)
Expand Down
22 changes: 16 additions & 6 deletions ExecuteStage/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,19 +290,22 @@ def __init__(self, config_file="mysql_config.json"):
print("MySQL config file path: ", config_file)
with open(config_file, 'r') as f:
config = json.load(f)
host = config["host"]
port = config["port"]
user = config["username"]
passwd = config["password"]
db = config["database"]
self.host = config["host"]
self.port = config["port"]
self.user = config["username"]
self.passwd = config["password"]
self.db = config["database"]
except Exception as e:
print("读取配置文件失败,请检查配置文件:"+config_file+"是否存在,或配置信息是否有误。")
print("Failed to read configuration file, please check if the configuration file: " +
config_file+" exists, or if the configuration information is incorrect.")
print(e)
self.connect()

def connect(self):
try:
self.conn = pymysql.connect(
host=host, port=port, user=user, passwd=passwd, db=db)
host=self.host, port=self.port, user=self.user, passwd=self.passwd, db=self.db)
print("成功连接到数据库。")
print("Successfully connected to the database.")
except:
Expand Down Expand Up @@ -408,6 +411,13 @@ def write_to_mysql(self, OUTPUT, record, types):
# 执行 SQL 语句
try:
cursor.execute(sql, to_write)
except pymysql.OperationalError as e:
print("Error:", e)
print("Try to reconnect to the database...")
self.connect()
cursor = self.conn.cursor() # 重新创建游标对象
cursor.execute(sql, to_write) # 重新执行SQL语句
# self.write_to_mysql(OUTPUT, record, types)
except Exception as e:
print("Error:", e)
print("Error SQL:", sql, to_write)
Expand Down

0 comments on commit 5921d84

Please sign in to comment.