Skip to content

Commit

Permalink
增加元素属性值提取的选项
Browse files Browse the repository at this point in the history
  • Loading branch information
naibo committed Jul 19, 2023
1 parent 31c5361 commit f16722f
Show file tree
Hide file tree
Showing 6 changed files with 22 additions and 3 deletions.
7 changes: 6 additions & 1 deletion ElectronJS/src/taskGrid/FlowChart.html
Original file line number Diff line number Diff line change
Expand Up @@ -270,13 +270,18 @@ <h4 class="modal-title">Equivalent XPath</h4>
<option :value = 6>Webpage Title</option>
<option :value = 7>Element Screenshot</option>
<option :value = 8>OCR Results</option>
<option :value = 14>Properties of elements</option>
<option :value = 9>Return value of JavaScript code (for this element), starting with 'return')</option>
<option :value = 12>System command return value</option>
<option :value = 13>Value of a Python expression (the "eval" operation)</option>
<option :value = 10>Selected value of the current select box</option>
<option :value = 11>Selected text of the current select box</option>
</select>
<div v-if='paras.parameters[paraIndex]["contentType"] == 9 || paras.parameters[paraIndex]["contentType"] >= 12'>
<div v-if='paras.parameters[paraIndex]["contentType"] == 14'>
<label>Attribute Name:</label>
<input onkeydown="inputDelete(event)" class="form-control" v-model='paras.parameters[paraIndex]["JS"]' placeholder="Attribute names, such as href to represent the href attribute of the current element, that is, the link address."></input>
</div>
<div v-else-if='paras.parameters[paraIndex]["contentType"] == 9 || paras.parameters[paraIndex]["contentType"] >= 12'>
<label>Code (Use Field["FieldName"] to input the lastest value of a field): </label>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="2"
placeholder='The element should be represented by arguments[0]. Here is an example: return arguments[0].innerText + "US Dollar". This code extracts the innerText of the element and appends "US Dollar" to it.' v-model='paras.parameters[paraIndex]["JS"]'></textarea>
Expand Down
8 changes: 7 additions & 1 deletion ElectronJS/src/taskGrid/FlowChart_CN.html
Original file line number Diff line number Diff line change
Expand Up @@ -270,19 +270,25 @@ <h4 class="modal-title">等价XPath</h4>
<option :value = 6>页面标题</option>
<option :value = 7>元素截图</option>
<option :value = 8>OCR识别文字</option>
<option :value = 14>元素的属性值</option>
<option :value = 9>(针对该元素的)JavaScript代码返回值(需以return 开头)</option>
<option :value = 12>系统命令返回值</option>
<option :value = 13>执行环境下的Python表达式值(eval操作)</option>
<option :value = 10>当前选择框选中的选项值</option>
<option :value = 11>当前选择框选中的选项文本</option>
</select>
<div v-if='paras.parameters[paraIndex]["contentType"] == 9 || paras.parameters[paraIndex]["contentType"] >= 12'>
<div v-if='paras.parameters[paraIndex]["contentType"] == 14'>
<label>属性名称:</label>
<input onkeydown="inputDelete(event)" class="form-control" v-model='paras.parameters[paraIndex]["JS"]' placeholder="属性名称,如class表示当前元素的class属性值,即元素所拥有的类名。"></input>
</div>
<div v-else-if='paras.parameters[paraIndex]["contentType"] == 9 || paras.parameters[paraIndex]["contentType"] >= 12'>
<label>JavaScript(也可以不针对该元素,直接写return JS代码即可)/系统命令/Python代码:</label>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="2"
placeholder='如要针对该元素,则该元素用arguments[0]来表示,示例:return arguments[0].innerText + "美元",即实现了提取该元素innerText并后面加“美元”的功能;不然直接如写return new Date().toString()即可获得当前时间戳。' v-model='paras.parameters[paraIndex]["JS"]'></textarea>
<label>最长等待脚本执行时间(0代表无限等待): </label>
<input onkeydown="inputDelete(event)" required class="form-control" type="number" v-model.number='paras.parameters[paraIndex]["JSWaitTime"]'></input>
</div>

<label>节点类型</label>
<select v-model='paras.parameters[paraIndex]["nodeType"]' class="form-control">
<option :value = 0>普通节点</option>
Expand Down
1 change: 1 addition & 0 deletions ElectronJS/tasks/191.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"id":191,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"7/19/2023, 1:16:44 PM","update_time":"7/19/2023, 1:16:44 PM","version":"0.5.0","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","inputExcel":"","startFromExit":0,"containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.jd.com"}],"outputParameters":[{"id":0,"name":"参数1_文本","desc":"","type":"text","recordASField":1,"exampleValue":"/手机/数码"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]","//div[contains(., '/手机/数码')]","//DIV[@class='LeftSide_menu_item__SBMWC LeftSide_text_space__2UhbG ']","/html/body/div[last()-6]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]"]}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"paras":[{"nodeType":0,"contentType":14,"relative":true,"name":"参数1_文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"/手机/数码"}],"unique_index":"z1d2156wtmolk99szzv","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"class","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}}]}
1 change: 1 addition & 0 deletions ElectronJS/tasks/192.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"id":192,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"7/19/2023, 1:20:03 PM","update_time":"7/19/2023, 1:20:03 PM","version":"0.5.0","saveThreshold":10,"cloudflare":0,"environment":0,"maxViewLength":15,"recordLog":1,"outputFormat":"xlsx","saveName":"current_time","inputExcel":"","startFromExit":0,"containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"text","exampleValue":"https://www.jd.com"}],"outputParameters":[{"id":0,"name":"参数1_链接文本","desc":"","type":"text","recordASField":1,"exampleValue":"手机"},{"id":1,"name":"参数2_链接地址","desc":"","type":"text","recordASField":1,"exampleValue":"https://shouji.jd.com/"}],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"cookies":""}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div/a","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/a[1]","//a[contains(., '手机')]","/html/body/div[last()-6]/div/div[last()-4]/div/div[last()-2]/div/div/div/div[last()-1]/div[last()-12]/a[last()-1]"]}},{"id":3,"index":3,"parentId":2,"type":0,"option":3,"title":"提取数据","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"","iframe":false,"wait":0,"waitType":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"waitElement":"","waitElementTime":10,"waitElementIframeIndex":0,"clear":0,"paras":[{"nodeType":1,"contentType":14,"relative":true,"name":"参数1_链接文本","desc":"","extractType":0,"relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"手机"}],"unique_index":"wf0s192w7clk99xyqe","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"href","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0},{"nodeType":2,"contentType":0,"relative":true,"name":"参数2_链接地址","desc":"","relativeXPath":"","allXPaths":"","exampleValues":[{"num":0,"value":"https://shouji.jd.com/"}],"unique_index":"wf0s192w7clk99xyqe","iframe":false,"default":"","paraType":"text","recordASField":1,"beforeJS":"","beforeJSWaitTime":0,"JS":"","JSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"downloadPic":0}],"loopType":1}}]}
2 changes: 1 addition & 1 deletion ExecuteStage/.vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"justMyCode": false,
// "args": ["--id", "[7]", "--read_type", "remote", "--headless", "0"]
// "args": ["--id", "[9]", "--read_type", "remote", "--headless", "0", "--saved_file_name", "YOUTUBE"]
"args": ["--id", "[7]", "--headless", "0", "--user_data", "1", "--keyboard", "0"]
"args": ["--id", "[88]", "--headless", "0", "--user_data", "1", "--keyboard", "0"]
}
]
}
6 changes: 6 additions & 0 deletions ExecuteStage/easyspider_executestage.py
Original file line number Diff line number Diff line change
Expand Up @@ -1516,6 +1516,12 @@ def get_content(self, p, element):
content = select_element.first_selected_option.text
except:
content = ""
elif p["contentType"] == 14: # 元素属性值
attribute_name = p["JS"]
try:
content = element.get_attribute(attribute_name)
except:
content = ""
return content

def clearOutputParameters(self):
Expand Down

0 comments on commit f16722f

Please sign in to comment.