Skip to content

Commit

Permalink
Two new custom operations
Browse files Browse the repository at this point in the history
  • Loading branch information
naibo committed Dec 30, 2023
1 parent 2a24101 commit c5a4b11
Show file tree
Hide file tree
Showing 9 changed files with 44 additions and 13 deletions.
Original file line number Diff line number Diff line change
@@ -1 +1 @@
Note: The various folders within this directory are not directly usable software, but temporary folders used by the author at the time of release. Please visit the official website to download readily usable software packages: https://www.easyspider.cn
Note: The various folders within this directory are not directly usable software, but temporary folders used by the author at the time of release. Please visit the official website to download readily usable software packages: https://www.easyspider.net
5 changes: 1 addition & 4 deletions ElectronJS/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -913,10 +913,7 @@ async function beginInvoke(msg, ws) {
xpath = parent_xpath + xpath;
}
let elementInfo = {iframe: param.iframe, xpath: xpath, id: -1};
let element = await findElementAcrossAllWindows(
elementInfo,
(notifyBrowser = false)
);
let element = await findElementAcrossAllWindows(elementInfo);
if (element != null) {
await execute_js(param.beforeJS, element, param.beforeJSWaitTime);
if (param.contentType == 0) {
Expand Down
11 changes: 9 additions & 2 deletions ElectronJS/src/taskGrid/FlowChart.html
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,8 @@ <h4 class="modal-title">Equivalent XPath</h4>
<option :value = 7>Pause program execution (such as when the captcha box appears)</option>
<option :value = 8>Refresh page</option>
<option :value = 9>Send Email</option>
<option :value = 10>Clear all field values</option>
<option :value = 11>Generate new data row</option>
</select>
<div v-if='nowNode["parameters"]["codeMode"] < 3 || nowNode["parameters"]["codeMode"] >= 5 && nowNode["parameters"]["codeMode"] <=6'>
<label>Code (Use Field["FieldName"] to input the lastest value of a field): </label>
Expand Down Expand Up @@ -486,7 +488,12 @@ <h4 class="modal-title">Equivalent XPath</h4>
<label>Email content:</label>
<textarea spellcheck=false onkeydown="inputDelete(event)" class="form-control" rows="2" v-model='nowNode["parameters"]["emailConfig"]["content"]' placeholder="Write the email content here"></textarea>
</div>

<div v-if='nowNode["parameters"]["codeMode"] == 10'>
<label>This action can clear all field values, such as when used before starting a web scraping task to clear all values.</label>
</div>
<div v-if='nowNode["parameters"]["codeMode"] == 11'>
<label>This action can generate a new row of data, such as when designing a web scraping task to not generate rows of data temporarily, and instead generate a new row of data once all fields have been extracted.</label>
</div>
</div>

<div class="elements" v-if="nodeType==6">
Expand Down Expand Up @@ -708,8 +715,8 @@ <h4 class="modal-title" id="myModalLabel">Save Task (Can press Ctrl + S to open
<input spellcheck=false onkeydown="inputDelete(event)" id="serviceDescription" name="serviceDescription" class="form-control"></input>
<label>Export Data Format (Excel/CSV/TXT/Database):</label>
<select id="outputFormat" class="form-control">
<option value="xlsx">XLSX (Excel file, recommended use CSV format when single cell exceeds 500 characters)</option>
<option value="csv">CSV (Recommended for collecting long articles)</option>
<option value="xlsx">XLSX (Excel file, recommended use CSV format when single cell exceeds 500 characters)</option>
<option value="txt">TXT</option>
<option value="json">JSON</option>
<option value="mysql">MySQL Database (recommended for large amounts of data)</option>
Expand Down
6 changes: 6 additions & 0 deletions ElectronJS/src/taskGrid/FlowChart.js
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,12 @@ let app = new Vue({
case 9:
this.nowNode["title"] = LANG("发送邮件", "Send Email");
break;
case 10:
this.nowNode["title"] = LANG("清空字段值", "Clear Field Value");
break;
case 11:
this.nowNode["title"] = LANG("生成新行", "Generate New Row");
break;
case -1: // 跳转到其他操作时,不改变标题
break;
default: // 默认情况
Expand Down
11 changes: 9 additions & 2 deletions ElectronJS/src/taskGrid/FlowChart_CN.html
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,8 @@ <h4 class="modal-title">等价XPath</h4>
<option :value = 7>暂停程序执行(如检测到验证码框出现时暂停执行)</option>
<option :value = 8>刷新页面</option>
<option :value = 9>发送邮件</option>
<option :value = 10>清空所有字段值</option>
<option :value = 11>生成新数据行</option>
</select>
<div v-if='nowNode["parameters"]["codeMode"] < 3 || nowNode["parameters"]["codeMode"] >= 5 && nowNode["parameters"]["codeMode"] <=6'>
<label>代码/脚本内容(用Field["字段名"]来输入某字段/自定义操作的最新提取/返回值): </label>
Expand Down Expand Up @@ -486,7 +488,12 @@ <h4 class="modal-title">等价XPath</h4>
<label>邮件内容:</label>
<textarea spellcheck=false onkeydown="inputDelete(event)" class="form-control" rows="2" v-model='nowNode["parameters"]["emailConfig"]["content"]' placeholder="这里写邮件内容"></textarea>
</div>

<div v-if='nowNode["parameters"]["codeMode"] == 10'>
<label>此操作可以清空所有字段值,如用于爬虫任务开始前清空所有字段值。</label>
</div>
<div v-if='nowNode["parameters"]["codeMode"] == 11'>
<label>此操作可以生成新数据行,如用于爬虫任务设计时暂不生成数据行,等所有字段提取结束后统一生成新数据行。</label>
</div>
</div>

<div class="elements" v-if="nodeType==6">
Expand Down Expand Up @@ -708,8 +715,8 @@ <h4 class="modal-title" id="myModalLabel">保存任务(可按Ctrl+S调出此
<input spellcheck=false onkeydown="inputDelete(event)" id="serviceDescription" name="serviceDescription" class="form-control"></input>
<label>导出数据格式(Excel/CSV/TXT/数据库,<a href="https://www.bilibili.com/video/BV1os4y1679S/" target="_blank">查看MySQL操作教程</a>):</label>
<select id="outputFormat" class="form-control">
<option value = "xlsx">XLSX(即EXCEL文件,建议单个单元格长度超过500时使用CSV格式存储)</option>
<option value = "csv">CSV(采集长文章推荐使用此格式)</option>
<option value = "xlsx">XLSX(即EXCEL文件,建议单个单元格长度超过500时使用CSV格式存储)</option>
<option value = "txt">TXT</option>
<option value = "json">JSON</option>
<option value = "mysql">MySQL数据库(大量数据推荐使用)</option>
Expand Down
1 change: 1 addition & 0 deletions ElectronJS/tasks/314.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion ExecuteStage/.vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"justMyCode": false,
// "args": ["--ids", "[7]", "--read_type", "remote", "--headless", "0"]
// "args": ["--ids", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
"args": ["--ids", "[13]", "--headless", "0", "--user_data", "0", "--keyboard", "0",
"args": ["--ids", "[70]", "--headless", "0", "--user_data", "0", "--keyboard", "0",
"--read_type", "remote"]
// "args": "--ids '[97]' --user_data 1 --server_address http:https://localhost:8074 --config_folder '/Users/naibo/Documents/EasySpider/ElectronJS/' --headless 0 --read_type remote --config_file_name config.json --saved_file_name"
}
Expand Down
6 changes: 6 additions & 0 deletions ExecuteStage/easyspider_executestage.py
Original file line number Diff line number Diff line change
Expand Up @@ -857,6 +857,12 @@ def customOperation(self, node, loopValue, loopPath, index):
self.print_and_log("根据设置的自定义操作,任务已刷新页面|Task refreshed page according to custom operation")
elif codeMode == 9: # 发送邮件
send_email(node["parameters"]["emailConfig"])
elif codeMode == 10: # 清空所有字段值
self.clearOutputParameters()
elif codeMode == 11: # 生成新的数据行
line = new_line(self.outputParameters,
self.maxViewLength, self.outputParametersRecord)
self.OUTPUT.append(line)
else: # 0 1 5 6
output = self.execute_code(
codeMode, code, max_wait_time, iframe=params["iframe"])
Expand Down
13 changes: 10 additions & 3 deletions ExecuteStage/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -539,10 +539,17 @@ def write_to_excel(file_name, data, types, record):
for i in range(len(line)):
if record[i]:
to_write.append(line[i])
ws.append(to_write)
try:
ws.append(to_write)
except:
print("写入Excel文件失败,请检查数据类型是否正确。")
print("Failed to write to Excel file, please check if the data type is correct.")
# 保存工作簿
wb.save(file_name)

try:
wb.save(file_name)
except:
print("保存Excel文件失败,请检查文件是否被其他程序打开。")
print("Failed to save Excel file, please check if the file is opened by other programs.")

class Time:
def __init__(self, type1=""):
Expand Down

0 comments on commit c5a4b11

Please sign in to comment.