Skip to content

Commit

Permalink
Update app.py
Browse files Browse the repository at this point in the history
Updated the downloadUrl to reflect changes to the BBS for scrapping. Also added some regex to cartFile_match.
  • Loading branch information
cdeschenes committed Jul 25, 2023
1 parent d1f02dd commit 42501d1
Showing 1 changed file with 8 additions and 7 deletions.
15 changes: 8 additions & 7 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,15 @@
from bs4 import BeautifulSoup
from tqdm import tqdm
import os
import re

class Downloader:
downloadUrl = "https://lexaloffle.com/bbs/?pid={cartId}"
downloadUrl = "https://www.lexaloffle.com/bbs/cposts/ne/{cartId}.png"

def __init__(self, threadCount, workRange):
"""
:param threadCount: Amount of threads to use, sane amounts are between 1 and 50
:param workRange: Range of id's to check for. for example: (0, 105000)
:param workRange: Range of ids to check for. for example: (0, 105000)
"""
self.threadCount = threadCount
self.workRange = workRange
Expand All @@ -36,12 +37,12 @@ def download(self, cartId):
if r is None or not r.ok:
return

# Scrape the image file link
soup = BeautifulSoup(r.text, "html.parser")
cartFile = soup.find("a", {"title": "Open Cartridge File"})
if cartFile is None:
# Extract the cart file link from the response text using regex
cartFile_match = re.search(r'print_cart_code\("([^"]+)"', r.text)
if cartFile_match:
link = cartFile_match.group(1)
else:
return
link = f"https://lexaloffle.com{cartFile['href']}"

# Try getting the image file
r = self.request(link)
Expand Down

0 comments on commit 42501d1

Please sign in to comment.