Skip to content

Commit

Permalink
DEV Version of v0.3.1
Browse files Browse the repository at this point in the history
  • Loading branch information
naibo committed May 24, 2023
1 parent 76fe3a8 commit d88a502
Show file tree
Hide file tree
Showing 16 changed files with 117 additions and 24 deletions.
Binary file added ElectronJS/src/img/NUSLOGO.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added ElectronJS/src/img/nuslogo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added ElectronJS/src/img/xidian.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added ElectronJS/src/img/zju.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
25 changes: 23 additions & 2 deletions ElectronJS/src/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,22 @@
</link>
<title>EasySpider: NoCode Visual Web Crawler</title>
</head>

<style>
.img-container {
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
margin-top: 20px;
}

.img-container img {
/*max-width: 50%;*/
height: 75px;
margin-top: 10px;
margin-bottom: 10px; /* 可根据需要调整图片之间的间距 */
}
</style>
<body>
<div id="app">

Expand All @@ -25,9 +40,15 @@ <h5 style="margin-top: 20px">选择语言/Select Language</h5>

<p><a @click="changeLang('en')" class="btn btn-outline-primary btn-lg"
style="margin-top: 15px; width: 300px;height:60px;padding-top:12px;">English</a></p>
<p><a href="https://github.com/NaiboWang/EasySpider/releases" target="_blank">Github</a>最新版本/Newest Version:{{newest_version}}</p>
<p style="font-size: 17px"><a href="https://github.com/NaiboWang/EasySpider/releases" target="_blank">Github</a>最新版本/Newest Version:<b>{{newest_version}}</b></p>
<!-- <p>如发现新版本更新,可从以下Github仓库下载最新版本使用/If a new version is found, you can download the latest version from the following Github repository:</p>-->
<!-- <p></p>-->
<div class="img-container">
<h6>出品方/Producer</h6>
<a href="https://www.zju.edu.cn" target="_blank"><img src="img/zju.png"></a>
<a href="https://www.nus.edu.sg" target="_blank"><img src="img/nuslogo.png"></a>
<a href="https://www.xidian.edu.cn" target="_blank"><img src="img/xidian.png"></a>
</div>

</div>

Expand Down
17 changes: 13 additions & 4 deletions ElectronJS/src/taskGrid/FlowChart_CN.html
Original file line number Diff line number Diff line change
Expand Up @@ -301,11 +301,12 @@ <h4 class="modal-title">等价XPath</h4>
<select v-model='nowNode["parameters"]["codeMode"]' class="form-control">
<option value = 0>执行一段JavaScript脚本</option>
<option value = 1>执行一段操作系统级别命令</option>
<option v-if="nowNode['isInLoop']" value = 2>针对当前循环项的JavaScript脚本</option>
</select>

<div>
<label>代码/脚本内容: </label>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="2" v-model='nowNode["parameters"]["code"]' placeholder="输入JS或系统命令,如:document.body.innerText = '1' 或 python D:/test.py,分别为JS命令和系统命令示例。"></textarea>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="2" v-model='nowNode["parameters"]["code"]' placeholder="输入JS或系统命令,如:document.body.innerText = '1' 或 python D:/test.py,分别为JS命令和系统命令示例。如选择针对当前循环项的JS脚本,则循环项元素用arguments[0]表示,如arguments[0].style.color = 'blue'"></textarea>
<p style="margin-top: 15px">是否将执行后的输出/返回值作为字段记录:</p>
<p><select v-model='nowNode["parameters"]["recordASField"]' class="form-control">
<option value = 0></option>
Expand Down Expand Up @@ -339,9 +340,17 @@ <h4 class="modal-title">等价XPath</h4>
</div>

<div class="elements" v-if="nodeType==7">
<label>XPath: </label>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="2" v-model='nowNode["parameters"]["xpath"]'></textarea>
<p><button type="button" data-toggle="modal" data-target="#myModal_XPath" @click="changeXPaths(nowNode['parameters']['allXPaths'])" class="btn btn-primary" style="margin-top: 10px">点此查看其他等价的XPath</button></p>
<div v-if="nowNode['isInLoop']">
<!-- 如果在循环内才显示此行元素 -->
<p><input onkeydown="inputDelete(event)" type="checkbox" v-model='useLoop'></input>使用循环内的元素</p>
</div>
<div v-if='!useLoop'>
<label>XPath: </label>
<textarea onkeydown="inputDelete(event)" class="form-control" rows="2" v-model='nowNode["parameters"]["xpath"]'></textarea>
<p><button type="button" data-toggle="modal" data-target="#myModal_XPath" @click="changeXPaths(nowNode['parameters']['allXPaths'])" class="btn btn-primary" style="margin-top: 10px">点此查看其他等价的XPath</button></p>
</div>


</div>

<div class="elements" v-if="nodeType==8">
Expand Down
4 changes: 4 additions & 0 deletions ElectronJS/src/taskGrid/logic_CN.js
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ function handleAddElement(msg) {
addElement(6, msg);
} else if (msg["type"] == "mouseMove") {
addElement(7, msg);
} else if (msg["type"] == "loopMouseMove") {
addElement(8, msg);
addElement(7, msg);
} else if (msg["type"] == "loopClickSingle") {
addElement(8, msg);
addElement(2, msg);
Expand Down Expand Up @@ -223,6 +226,7 @@ function modifyParameters(t, para) {
t["parameters"]["optionValue"] = para["optionValue"];
} else if(t.option == 7){
t["parameters"]["xpath"] = para["xpath"];
t["parameters"]["useLoop"] = para["useLoop"];
t["parameters"]["allXPaths"] = para["allXPaths"];
} else if (t.option == 8) { //循环事件
t["parameters"]["loopType"] = para["loopType"];
Expand Down
2 changes: 1 addition & 1 deletion ElectronJS/tasks/69.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"id":69,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"5/24/2023, 4:07:25 AM","version":"0.3.1","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://www.jd.com"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1}},{"id":2,"index":2,"parentId":0,"type":0,"option":7,"title":"移动到元素","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":0,"useLoop":false,"xpath":"//*[contains(@class, \"LeftSide_menu_list__qXCeM\")]/div[1]","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]","//div[contains(., '/手机/数码')]","//DIV[@class='LeftSide_menu_item__SBMWC LeftSide_text_space__2UhbG ']"]}}]}
{"id":69,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"5/24/2023, 8:20:48 PM","version":"0.3.1","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://www.jd.com"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1}},{"id":2,"index":2,"parentId":0,"type":0,"option":7,"title":"移动到元素","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":0,"useLoop":false,"xpath":"//*[contains(@class, \"LeftSide_menu_list__qXCeM\")]/div[1]","wait":4,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"allXPaths":["/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]","//div[contains(., '/手机/数码')]","//DIV[@class='LeftSide_menu_item__SBMWC LeftSide_text_space__2UhbG ']"]}}]}
1 change: 1 addition & 0 deletions ElectronJS/tasks/70.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"id":70,"name":"京东全球版-专业的综合网上购物商城","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"5/24/2023, 8:21:45 PM","version":"0.3.1","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://www.jd.com"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1}},{"id":2,"index":2,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[3],"isInLoop":false,"position":1,"parameters":{"history":4,"tabIndex":-1,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":""}},{"id":3,"index":3,"parentId":2,"type":0,"option":7,"title":"移动到元素","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":-1,"useLoop":true,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div","wait":2,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"allXPaths":"","loopType":1}}]}
1 change: 1 addition & 0 deletions ElectronJS/tasks/71.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"id":71,"name":"self-defined","url":"https://www.jd.com","links":"https://www.jd.com","create_time":"5/24/2023, 9:01:45 PM","version":"0.3.1","containJudge":false,"desc":"https://www.jd.com","inputParameters":[{"id":0,"name":"urlList_0","nodeId":1,"nodeName":"打开网页","value":"https://www.jd.com","desc":"要采集的网址列表,多行以\\n分开","type":"string","exampleValue":"https://www.jd.com"}],"outputParameters":[],"graph":[{"index":0,"id":0,"parentId":0,"type":-1,"option":0,"title":"root","sequence":[1,2,3,4],"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0},"isInLoop":false},{"id":1,"index":1,"parentId":0,"type":0,"option":1,"title":"打开网页","sequence":[],"isInLoop":false,"position":0,"parameters":{"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"url":"https://www.jd.com","links":"https://www.jd.com","maxWaitTime":10,"scrollType":0,"scrollCount":1,"scrollWaitTime":1}},{"id":2,"index":2,"parentId":0,"type":0,"option":5,"title":"自定义操作","sequence":[],"isInLoop":false,"position":1,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"codeMode":0,"code":"document.title = \"test\"","waitTime":0,"recordASField":0}},{"id":3,"index":3,"parentId":0,"type":0,"option":5,"title":"自定义操作","sequence":[],"isInLoop":false,"position":2,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"codeMode":"1","code":"python D:/test.py","waitTime":0,"recordASField":0}},{"id":4,"index":4,"parentId":0,"type":1,"option":8,"title":"循环","sequence":[5,6],"isInLoop":false,"position":3,"parameters":{"history":4,"tabIndex":0,"useLoop":false,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"scrollType":0,"scrollCount":1,"scrollWaitTime":1,"loopType":1,"pathList":"","textList":"","code":"","waitTime":0,"exitCount":0,"historyWait":2,"breakMode":0,"breakCode":"","breakCodeWaitTime":0,"allXPaths":""}},{"id":5,"index":5,"parentId":4,"type":0,"option":7,"title":"移动到元素","sequence":[],"isInLoop":true,"position":0,"parameters":{"history":4,"tabIndex":0,"useLoop":true,"xpath":"/html/body/div[5]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"allXPaths":"","loopType":1}},{"id":6,"index":6,"parentId":4,"type":0,"option":5,"title":"自定义操作","sequence":[],"isInLoop":true,"position":1,"parameters":{"history":1,"tabIndex":0,"useLoop":false,"xpath":"","wait":0,"beforeJS":"","beforeJSWaitTime":0,"afterJS":"","afterJSWaitTime":0,"codeMode":"2","code":"arguments[0].style.color = 'blue'","waitTime":0,"recordASField":0}}]}
2 changes: 1 addition & 1 deletion ExecuteStage/.vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"console": "integratedTerminal",
"justMyCode": true,
// "args": ["--id", "38", "--read_type", "local", "--headless", "1"]
"args": ["--id", "9", "--headless", "0"]
"args": ["--id", "15", "--headless", "0"]
}
]
}
46 changes: 31 additions & 15 deletions ExecuteStage/easyspider_executestage.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,12 +170,21 @@ def execute_code(codeMode, code, max_wait_time, element=None):
recordLog("命令执行失败")
return str(output)

def customOperation(node, loopValue):
def customOperation(node, loopValue, loopPath, index):
paras = node["parameters"]
codeMode = paras["codeMode"]
codeMode = int(paras["codeMode"])
code = paras["code"]
max_wait_time = int(paras["waitTime"])
output = execute_code(codeMode, code, max_wait_time)
if codeMode == 2: # 使用循环的情况下,传入的clickPath就是实际的xpath
try:
elements = browser.find_elements(By.XPATH, loopPath)
element = elements[index]
output = execute_code(codeMode, code, max_wait_time, element)
except:
output = ""
recordLog("JavaScript execution failed")
else:
output = execute_code(codeMode, code, max_wait_time)
recordASField = int(paras["recordASField"])
if recordASField:
global OUTPUT, outputParameters
Expand Down Expand Up @@ -214,9 +223,16 @@ def switchSelect(para, loopValue):
print("Cannot find drop-down box element:", para["xpath"])


def moveToElement(para, loopValue):
def moveToElement(para, loopElement=None, loopPath="", index=0):
time.sleep(0.1) # 移动之前等待0.1秒
if para["useLoop"]: # 使用循环的情况下,传入的clickPath就是实际的xpath
path = loopPath
else:
index = 0
path = para["xpath"] # 不然使用元素定义的xpath
try:
element = browser.find_element(By.XPATH, para["xpath"])
elements = browser.find_elements(By.XPATH, path)
element = elements[index]
try:
ActionChains(browser).move_to_element(element).perform()
except:
Expand All @@ -228,7 +244,7 @@ def moveToElement(para, loopValue):


# 执行节点关键函数部分
def executeNode(nodeId, loopValue="", clickPath="", index=0):
def executeNode(nodeId, loopValue="", loopPath="", index=0):
node = procedure[nodeId]
WebDriverWait(browser, 10).until
# 等待元素出现才进行操作,10秒内未出现则报错
Expand All @@ -237,33 +253,33 @@ def executeNode(nodeId, loopValue="", clickPath="", index=0):
# 根据不同选项执行不同操作
if node["option"] == 0 or node["option"] == 10: # root操作,条件分支操作
for i in node["sequence"]: # 从根节点开始向下读取
executeNode(i, loopValue, clickPath, index)
executeNode(i, loopValue, loopPath, index)
elif node["option"] == 1: # 打开网页操作
recordLog("openPage")
openPage(node["parameters"], loopValue)
elif node["option"] == 2: # 点击元素
recordLog("Click")
clickElement(node["parameters"], loopValue, clickPath, index)
clickElement(node["parameters"], loopValue, loopPath, index)
elif node["option"] == 3: # 提取数据
recordLog("getData")
getData(node["parameters"], loopValue, node["isInLoop"],
parentPath=clickPath, index=index)
parentPath=loopPath, index=index)
saveData()
elif node["option"] == 4: # 输入文字
inputInfo(node["parameters"], loopValue)
elif node["option"] == 5: # 自定义操作
customOperation(node, loopValue)
customOperation(node, loopValue, loopPath, index)
saveData()
elif node["option"] == 6: # 切换下拉框
switchSelect(node["parameters"], loopValue)
elif node["option"] == 7: # 鼠标移动到元素上
moveToElement(node["parameters"], loopValue)
moveToElement(node["parameters"], loopValue, loopPath, index)
elif node["option"] == 8: # 循环
recordLog("loop")
loopExcute(node, loopValue, clickPath, index) # 执行循环
loopExcute(node, loopValue, loopPath, index) # 执行循环
elif node["option"] == 9: # 条件分支
recordLog("judge")
judgeExcute(node, loopValue, clickPath, index)
judgeExcute(node, loopValue, loopPath, index)

# 执行完之后进行等待
if node["option"] != 0:
Expand Down Expand Up @@ -630,7 +646,7 @@ def inputInfo(para, loopValue):
# 点击元素事件
def clickElement(para, loopElement=None, clickPath="", index=0):
global history
time.sleep(0.1) # 点击之前等待1秒
time.sleep(0.1) # 点击之前等待0.1秒
rt = Time("Click Element")
Log("Wait 0.1 second before clicking element")
if para["useLoop"]: # 使用循环的情况下,传入的clickPath就是实际的xpath
Expand Down Expand Up @@ -669,7 +685,7 @@ def clickElement(para, loopElement=None, clickPath="", index=0):
recordLog(str(e))
time.sleep(0.5) # 点击之后等半秒
Log("Wait 0.5 second after clicking element")
time.sleep(random.uniform(1, 3)) # 生成一个a到b的小数等待时间
time.sleep(random.uniform(1, 2)) # 生成一个a到b的小数等待时间
# 点击前对该元素执行一段JavaScript代码
try:
if para["afterJS"] != "":
Expand Down
27 changes: 27 additions & 0 deletions Extension/manifest_v3/src/content-scripts/messageInteraction.js
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,33 @@ export function sendMouseMove(){
chrome.runtime.sendMessage(msg);
}

export function sendLoopMouseMove(){
let message = {
"type": "loopMouseMove",
"history": history.length, //记录history的长度
"tabIndex": -1,
"xpath": "", //默认值设置为空
"allXPaths": "",
"useLoop": true, //是否使用循环内元素
"loopType": 1, //循环类型,1为不固定元素列表
};
if (!detectAllSelected()) //如果不是全部选中的话
{
message.loopType = 2; //固定元素列表
}
if (message.loopType == 1) {
message["xpath"] = global.app._data.nowPath;
} else { //固定元素列表
//有的网站像淘宝,每个元素都有一个独一无二的ID号,这时候就不适用用id进行xpath定位了,这个问题暂时搁置
message["pathList"] = [];
for (let i = 0; i < global.nodeList.length; i++) {
message["pathList"].push(readXPath(global.nodeList[i]["node"], 0));
}
}
let msg = { "type": 3, msg: message };
chrome.runtime.sendMessage(msg);
}

//采集单个元素
export function collectSingle() {
let message = {
Expand Down
Loading

0 comments on commit d88a502

Please sign in to comment.