-
Notifications
You must be signed in to change notification settings - Fork 17
/
datasets_downloader.py
57 lines (43 loc) · 1.73 KB
/
datasets_downloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
from __future__ import annotations
import asyncio
import time
from collections import Counter
from contextlib import suppress
from pathlib import Path
from hcaptcha_challenger.agents import Malenia, AgentT
from hcaptcha_challenger.utils import SiteKey
from loguru import logger
from playwright.async_api import BrowserContext as ASyncContext, async_playwright
collected = []
per_times = 60
tmp_dir = Path(__file__).parent.joinpath("tmp_dir")
sitekey = SiteKey.user_easy
async def collete_datasets(context: ASyncContext):
page = await context.new_page()
agent = AgentT.from_page(page=page, tmp_dir=tmp_dir)
sitelink = SiteKey.as_sitelink(sitekey)
await page.goto(sitelink)
await agent.handle_checkbox()
for pth in range(1, per_times + 1):
with suppress(Exception):
t0 = time.time()
label = await agent.collect()
te = f"{time.time() - t0:.2f}s"
probe = list(agent.qr.requester_restricted_answer_set.keys())
mixed_label = probe[0] if len(probe) > 0 else label
collected.append(mixed_label)
print(f">> COLLETE - progress=[{pth}/{per_times}] timeit={te} {label=} {probe=}")
await page.wait_for_timeout(500)
fl = page.frame_locator(agent.HOOK_CHALLENGE)
await fl.locator("//div[@class='refresh button']").click()
@logger.catch
async def bytedance():
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
context = await browser.new_context(locale="en-US")
await Malenia.apply_stealth(context)
await collete_datasets(context)
await context.close()
print(f"\n>> RESULT - {Counter(collected)=}")
if __name__ == "__main__":
asyncio.run(bytedance())