Skip to content

Commit

Permalink
New Version Pre-Release
Browse files Browse the repository at this point in the history
  • Loading branch information
naibo committed Jul 8, 2023
1 parent 751fa6e commit 76b9b10
Show file tree
Hide file tree
Showing 21 changed files with 100 additions and 28 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
from PIL import Image
# import uuid
from threading import Thread, Event
from myChrome import MyChrome
from myChrome import MyChrome, MyUCChrome
from utils import check_pause, download_image, get_output_code, isnull, myMySQL, new_line, write_to_csv, write_to_excel
desired_capabilities = DesiredCapabilities.CHROME
desired_capabilities["pageLoadStrategy"] = "none"
Expand Down Expand Up @@ -1473,7 +1473,7 @@ def getData(self, para, loopElement, isInLoop=True, parentPath="", index=0):
browser_t = MyChrome(
options=options, chrome_options=option, executable_path=driver_path)
elif cloudflare == 1:
browser_t = uc.Chrome(
browser_t = MyUCChrome(
options=options, chrome_options=option, executable_path=driver_path)
print("Pass Cloudflare Mode")
print("过Cloudflare验证模式")
Expand Down
78 changes: 76 additions & 2 deletions .temp_to_pub/EasySpider_windows_x64/Code/myChrome.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@


from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
Expand All @@ -14,10 +12,12 @@
from selenium.webdriver.support.ui import Select
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
import undetected_chromedriver as uc
desired_capabilities = DesiredCapabilities.CHROME
desired_capabilities["pageLoadStrategy"] = "none"



class MyChrome(webdriver.Chrome):

def __init__(self, *args, **kwargs):
Expand Down Expand Up @@ -89,3 +89,77 @@ def find_elements(self, by=By.ID, value=None, iframe=False):
raise NoSuchElementException
else:
return super().find_elements(by=by, value=value)


class MyUCChrome(uc.Chrome):

def __init__(self, *args, **kwargs):
self.iframe_env = False # 现在的环境是root还是iframe
super().__init__(*args, **kwargs) # 调用父类的 __init__

def find_element(self, by=By.ID, value=None, iframe=False):
# 在这里改变查找元素的行为
if self.iframe_env:
super().switch_to.default_content()
self.iframe_env = False
if iframe:
# 获取所有的 iframe
try:
iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
except Exception as e:
print(e)
find_element = False
# 遍历所有的 iframe 并点击里面的元素
for iframe in iframes:
# 切换到 iframe
super().switch_to.default_content()
super().switch_to.frame(iframe)
self.iframe_env = True
try:
# 在 iframe 中查找并点击元素
# 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
element = super().find_element(by=by, value=value)
find_element = True
except:
print("No such element found in the iframe")
# 完成操作后切回主文档
# super().switch_to.default_content()
if find_element:
return element
if not find_element:
raise NoSuchElementException
else:
return super().find_element(by=by, value=value)

def find_elements(self, by=By.ID, value=None, iframe=False):
# 在这里改变查找元素的行为
if self.iframe_env:
super().switch_to.default_content()
self.iframe_env = False
if iframe:
# 获取所有的 iframe
iframes = super().find_elements(By.CSS_SELECTOR, "iframe")
find_element = False
# 遍历所有的 iframe 并点击里面的元素
for iframe in iframes:
# 切换到 iframe
try:
super().switch_to.default_content()
super().switch_to.frame(iframe)
self.iframe_env = True
# 在 iframe 中查找并点击元素
# 在这个例子中,我们查找 XPath 为 '//div[1]' 的元素
elements = super().find_elements(by=by, value=value)
if len(elements) > 0:
find_element = True
# 完成操作后切回主文档
# super().switch_to.default_content()
if find_element:
return elements
except:
print("No such element found in the iframe")
if not find_element:
raise NoSuchElementException
else:
return super().find_elements(by=by, value=value)

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

Loading

0 comments on commit 76b9b10

Please sign in to comment.