Update app.py

Updated the downloadUrl to reflect changes to the BBS for scrapping. Also added some regex to cartFile_match.
cdeschenes · Jul 25, 2023 · 42501d1 · 42501d1
1 parent d1f02dd
commit 42501d1
Showing 1 changed file with 8 additions and 7 deletions.
diff --git a/app.py b/app.py
@@ -4,14 +4,15 @@
 from bs4 import BeautifulSoup
 from tqdm import tqdm
 import os
+import re
 
 class Downloader:
- downloadUrl = "https://lexaloffle.com/bbs/?pid={cartId}"
+ downloadUrl = "https://www.lexaloffle.com/bbs/cposts/ne/{cartId}.png"
 
  def __init__(self, threadCount, workRange):
  """
  :param threadCount: Amount of threads to use, sane amounts are between 1 and 50
- :param workRange: Range of id's to check for. for example: (0, 105000)
+ :param workRange: Range of ids to check for. for example: (0, 105000)
  """
  self.threadCount = threadCount
  self.workRange = workRange
@@ -36,12 +37,12 @@ def download(self, cartId):
  if r is None or not r.ok:
  return
 
- # Scrape the image file link
- soup = BeautifulSoup(r.text, "html.parser")
- cartFile = soup.find("a", {"title": "Open Cartridge File"})
- if cartFile is None:
+ # Extract the cart file link from the response text using regex
+ cartFile_match = re.search(r'print_cart_code\("([^"]+)"', r.text)
+ if cartFile_match:
+ link = cartFile_match.group(1)
+ else:
  return
- link = f"https://lexaloffle.com{cartFile['href']}"
 
  # Try getting the image file
  r = self.request(link)