Skip to content

Commit

Permalink
Speed UP!!!
Browse files Browse the repository at this point in the history
  • Loading branch information
naibo committed Dec 9, 2023
1 parent 5b1d653 commit 6794998
Show file tree
Hide file tree
Showing 7 changed files with 1,115 additions and 21 deletions.
5 changes: 3 additions & 2 deletions ElectronJS/src/taskGrid/invokeTask.html
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ <h4 style="text-align: center;">{{"Task Invocation~任务调用" | lang}}</h4>
<p style="word-wrap: break-word;word-break: break-all;overflow: hidden;max-height: 100px;">{{"Task Description:~任务描述:" | lang}} {{task["desc"]}}</p>
<p style="word-wrap: break-word;word-break: break-all;overflow: hidden;max-height: 100px;">{{"API URL (POST):~API 调用网址(POST):" |
lang}} {{backEndAddressServiceWrapper}}/invokeTask?id={{task["id"]}}</p>
<p style="word-wrap: break-word;word-break: break-all;overflow: hidden;max-height: 100px;">{{"Click here to see how to invoke task by API via POST request (Postman or JavaScript): ~点此查看通过POST方式进行API调用的示例(Postman或JS代码):" | lang}}<a target="_blank" href="https://github.com/NaiboWang/EasySpider/wiki/API-Invoke-Example">https://github.com/NaiboWang/EasySpider/wiki/API-Invoke-Example</a></p>
<p style="word-wrap: break-word;word-break: break-all;overflow: hidden;max-height: 100px;">{{"URL of how to invoke task by API via POST request (Postman or JavaScript): ~通过POST方式进行API调用的示例教程(Postman或JS代码):" | lang}}<a target="_blank" href="https://github.com/NaiboWang/EasySpider/wiki/API-Invoke-Example">https://github.com/NaiboWang/EasySpider/wiki/API-Invoke-Example</a></p>
<p><button class="btn btn-primary" @click="readFromExcel">{{"Read parameters from Excel file~从Excel文件读取输入参数"
| lang}}
</button></p>
Expand Down Expand Up @@ -237,7 +237,7 @@ <h4 style="text-align: center;">{{"Task Invocation~任务调用" | lang}}</h4>
<input type="text" class="form-control" v-model="mysql_config_path"></input>
</div>
</form>
<label style="display: block">{{"Click the button below to execute the task. Long press p on the keyboard to pause the task. Manual intervention is possible during the task execution process, ~点击以下按钮执行任务,任务执行过程中可以长按p键暂停任务的执行以便" | lang }}<b>{{"~人工干预," | lang}}</b>{{"such as manually input a password or captcha: ~如手动输入密码,验证码等。" | lang}}</label>
<label style="display: block">{{"Click the button below to execute the task. Long press the pause button (default: p) on the keyboard to pause the task. Manual intervention is possible during the task execution process, ~点击以下按钮执行任务,任务执行过程中可以长按暂停键(默认:p键)暂停任务的执行以便" | lang }}<b>{{"~人工干预," | lang}}</b>{{"such as manually input a password or captcha: ~如手动输入密码,验证码等。" | lang}}</label>
<button class="btn btn-primary" v-on:click="localExecuteInstant(false)">{{"Directly Run Locally (Clean Mode)~本地直接执行(纯净模式)" |
lang}}
</button>
Expand All @@ -247,6 +247,7 @@ <h4 style="text-align: center;">{{"Task Invocation~任务调用" | lang}}</h4>
<!-- <button style="margin-left: 5px;" v-on:click="remoteExcuteInstant" class="btn btn-primary">Directly Run Remotely</button> -->
<label style="margin-top: 15px;display: block">{{"You can also use the XPath Helper extension to test XPaths when executing the task:~执行任务的过程中也可以随时使用XPath Helper扩展来调试XPath。" | lang}}</label>
<label style="margin-top: 15px;display: block">{{"如果想进行更复杂的操作,如设置无头模式,设置定时执行等,请使用下方的命令行执行任务选项并配置好命令行参数。~ If you want to perform more complex operations, such as setting headless mode, setting scheduled execution, etc., please use the command line to execute the task and configure the command line parameters below." | lang}}</label>
</label>
<div style="margin-bottom: 10px;">
<label style="margin-top: 10px;">{{"Execution ID (EID), execution files are stored in 'execution_instances' folder:~执行ID(执行文件存放在execution_instances文件夹内):" | lang}}</label>
<input class="form-control" v-model="ID"></input>
Expand Down
3 changes: 2 additions & 1 deletion ElectronJS/src/taskGrid/taskList.html
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,11 @@ <h4 style="text-align: center;">{{"Task List~任务列表" | lang}}</h4>
<h5 style="text-align: center;" v-if="mobile==1">{{"View this table by direction keys on keyboard~按键盘方向键浏览此表格" | lang}}</h5>
<p><a v-if="type==3" href="javascript:void(0)" v-on:click="newTask" class="btn btn-primary">{{"New Task~创建新任务" | lang}}</a></p>
<div v-if="type != 3" style="margin-bottom: 20px">
<div style="margin-bottom: 5px">{{"提示:下方的官方教程和答疑平台均在Github,可能出现访问速度慢的问题,请耐心等待。~" | lang}}</div>
<a class="btn btn-primary" href="https://github.com/NaiboWang/EasySpider/wiki" target="_blank">{{"Software Documentation~软件使用说明文档" | lang}}</a>
<a class="btn btn-primary" href="https://github.com/NaiboWang/EasySpider/issues?q=is%3Aissue" target="_blank">{{"Ask questions here~官方答疑平台" | lang}}</a>
<a class="btn btn-primary" href="https://github.com/NaiboWang/EasySpider/issues/22" target="_blank">{{"See how to run task by schedule~定时执行任务教程" | lang}}</a>
<a class="btn btn-primary" href="https://github.com/NaiboWang/EasySpider/wiki/Run-multiple-tasks-in-parallel" target="_blank">{{"See how to run multiple tasks in parallel~同时执行多个任务教程" | lang}}</a>
<!-- <a class="btn btn-primary" href="https://github.com/NaiboWang/EasySpider/wiki/Run-multiple-tasks-in-parallel" target="_blank">{{"See how to run multiple tasks in parallel~同时执行多个任务教程" | lang}}</a>-->
</div>
<div style="margin-bottom: 10px">
<table style="table-layout: auto;" class="table table-hover">
Expand Down
2 changes: 1 addition & 1 deletion ElectronJS/tasks/221.json

Large diffs are not rendered by default.

1,091 changes: 1,090 additions & 1 deletion ElectronJS/tasks/235.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion ExecuteStage/.vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"justMyCode": false,
// "args": ["--ids", "[7]", "--read_type", "remote", "--headless", "0"]
// "args": ["--ids", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
"args": ["--ids", "[89]", "--headless", "0", "--user_data", "0", "--keyboard", "0"]
"args": ["--ids", "[93]", "--headless", "0", "--user_data", "0", "--keyboard", "0"]
// "args": "--ids '[97]' --user_data 1 --server_address http:https://localhost:8074 --config_folder '/Users/naibo/Documents/EasySpider/ElectronJS/' --headless 0 --read_type remote --config_file_name config.json --saved_file_name"
}
]
Expand Down
22 changes: 14 additions & 8 deletions ExecuteStage/easyspider_executestage.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,13 +322,13 @@ def preprocess(self):
except:
node["parameters"]["exitElement"] = "//body"
node["parameters"]["quickExtractable"] = False # 是否可以快速提取
# 如果循环中只有一个提取数据操作,且提取数据操作的提取内容为元素截图,那么可以快速提取
if len(node["sequence"]) == 1 and self.procedure[node["sequence"][0]]["option"] == 3:
# 如果(不)固定元素列表循环中只有一个提取数据操作,且提取数据操作的提取内容为元素截图,那么可以快速提取
if len(node["sequence"]) == 1 and self.procedure[node["sequence"][0]]["option"] == 3 and (int(node["parameters"]["loopType"]) == 1 or int(node["parameters"]["loopType"]) == 2):
paras = self.procedure[node["sequence"][0]]["parameters"]["paras"]
waitElement = self.procedure[node["sequence"][0]]["parameters"]["waitElement"]
node["parameters"]["quickExtractable"] = True # 先假设可以快速提取
for para in paras:
optimizable = detect_optimizable(para, ignoreWaitElement=False, waitElement=waitElement, includePicture=True)
optimizable = detect_optimizable(para, ignoreWaitElement=False, waitElement=waitElement)
if para["iframe"]: # 如果是iframe,那么不可以快速提取
optimizable = False
if not optimizable: # 如果有一个不满足优化条件,那么就不能快速提取
Expand Down Expand Up @@ -1803,6 +1803,8 @@ def getData(self, para, loopElement, isInLoop=True, parentPath="", index=0):
content_type = ""
elif p["nodeType"] == 2:
content_type = "//@href"
elif p["nodeType"] == 4:
content_type = "//@src"
elif p["contentType"] == 1:
content_type = "/text()"
elif p["contentType"] == 0:
Expand Down Expand Up @@ -1843,7 +1845,7 @@ def getData(self, para, loopElement, isInLoop=True, parentPath="", index=0):
# 拼接所有文本内容并去掉两边的空白
content = ' '.join(result.strip()
for result in content if result.strip())
if p["nodeType"] == 2:
if p["nodeType"] == 2 or p["nodeType"] == 4:
base_url = self.browser.current_url
# 合并链接相对路径为绝对路径
content = urljoin(base_url, content)
Expand Down Expand Up @@ -1992,6 +1994,7 @@ def getData(self, para, loopElement, isInLoop=True, parentPath="", index=0):
"headless": False,
"server_address": "http:https://localhost:8074",
"keyboard": True, # 是否监听键盘输入
"pause_key": "p", # 暂停键
"version": "0.6.0",
}
c = Config(config)
Expand Down Expand Up @@ -2189,10 +2192,13 @@ def getData(self, para, loopElement, isInLoop=True, parentPath="", index=0):
# Thread(target=check_pause, args=("p", event)).start()
# else:
time.sleep(3)
try:
pause_key = service["pauseKey"]
except:
pause_key = "p"
if c.pause_key == "p":
try:
pause_key = service["pauseKey"]
except:
pause_key = "p"
else:
pause_key = c.pause_key
press_time = {"duration": 0, "is_pressed": False, "pause_key": pause_key}
print("\n\n----------------------------------")
print(
Expand Down
11 changes: 4 additions & 7 deletions ExecuteStage/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,21 +94,18 @@ def on_release(key):
# event.clear()
# time.sleep(1) # 每秒检查一次

def detect_optimizable(para, ignoreWaitElement=True, waitElement="", includePicture=False):
def detect_optimizable(para, ignoreWaitElement=True, waitElement=""):
if para["beforeJS"] == "" and para["afterJS"] == "" and para["contentType"] <= 1:
if para["nodeType"] <= 2:
if ignoreWaitElement or waitElement == "":
return True
else:
return False
elif para["nodeType"] == 4: # 如果是图片
if includePicture:
if para["downloadPic"]:
return False
else:
return True
else:
if para["downloadPic"]:
return False
else:
return True
else:
return False

Expand Down

0 comments on commit 6794998

Please sign in to comment.