Skip to content

Commit

Permalink
Update app.py
Browse files Browse the repository at this point in the history
  • Loading branch information
cdeschenes committed Jul 25, 2023
1 parent 01c216d commit 3b7d57c
Showing 1 changed file with 38 additions and 9 deletions.
47 changes: 38 additions & 9 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,21 @@
from bs4 import BeautifulSoup
from tqdm import tqdm
import os
import logging
import re

# Configure the logging module
logging.basicConfig(level=logging.INFO, filename='output_log.txt', filemode='w',
format='%(asctime)s - %(levelname)s - %(message)s')


class Downloader:
downloadUrl = "https://www.lexaloffle.com/bbs/cposts/ne/{cartId}.png"
downloadUrl = "https://lexaloffle.com/bbs/?tid={cartId}"

def __init__(self, threadCount, workRange):
"""
:param threadCount: Amount of threads to use, sane amounts are between 1 and 50
:param workRange: Range of ids to check for. for example: (0, 105000)
:param workRange: Range of id's to check for. for example: (0, 105000)
"""
self.threadCount = threadCount
self.workRange = workRange
Expand All @@ -31,24 +37,40 @@ def request(self, url, method=requests.get, attempts=15, timeout=7, *args, **kwa
except requests.RequestException:
pass

def get_title(self, cartId):
r = self.request(self.downloadUrl.format(cartId=cartId))
if r is not None and r.ok:
soup = BeautifulSoup(r.text, "html.parser")
title_element = soup.find("title")
if title_element:
title = re.sub(r'[^\w\s-]', '', title_element.get_text().strip())
return title

return None

def download(self, cartId):
title = self.get_title(cartId)
if not title:
return

# Visit the threadId
r = self.request(self.downloadUrl.format(cartId=cartId))
if r is None or not r.ok:
return

# Extract the cart file link from the response text using regex
cartFile_match = re.search(r'print_cart_code\("([^"]+)"', r.text)
if cartFile_match:
link = cartFile_match.group(1)
else:
# Scrape the image file link
soup = BeautifulSoup(r.text, "html.parser")
cartFile = soup.find("a", {"title": "Open Cartridge File"})
if cartFile is None:
return
link = f"https://lexaloffle.com{cartFile['href']}"

# Try getting the image file
r = self.request(link)
if r is None or not r.ok:
return
self.save(content=r.content, filename=os.path.basename(link))
self.save(content=r.content, filename=f"{title}-{cartId}.p8.png")
logging.info(f"Downloaded cartId {cartId}")

@staticmethod
def save(content, filename):
Expand All @@ -58,7 +80,8 @@ def save(content, filename):
# Main function for fetching an id and downloading it
def loop(self):
while not self.queue.empty():
self.download(cartId=self.queue.get())
cartId = self.queue.get()
self.download(cartId=cartId)
self.progress.update(1)
self.queue.task_done()

Expand All @@ -81,3 +104,9 @@ def run(self):

[self.queue.put(x) for x in range(*self.workRange)]
self.startThreads()


# Example usage
if __name__ == "__main__":
downloader = Downloader(threadCount=10, workRange=(0, 10000))
downloader.run()

0 comments on commit 3b7d57c

Please sign in to comment.