-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Benjamin
committed
Jun 21, 2024
1 parent
6d96d9d
commit 5b85942
Showing
29 changed files
with
1,092 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
# EbayScrapper | ||
# EbayScrap | ||
Scrapping Ebay For Data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
LOG_DIRECTORY: str = "./log" | ||
LOGNAME_PATTERN: str = "ebay-{subject}-{date}.log" | ||
|
||
MAIL_CREDENTIAL_FILENAME = "mail_keys.json" | ||
POSTGRESS_CREDENTIAL_FILENAME = "pg_keys.json" | ||
LINKS_FILENAME = "links.json" | ||
CONFIG_DATA = "config.json" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
|
||
if __name__ == "__main__": | ||
import os, sys, json, psycopg2, csv | ||
import offer | ||
from common import POSTGRESS_CREDENTIAL_FILENAME | ||
from queries import INSERT_QUERY | ||
from scrap import notnull, nullable | ||
|
||
postgress_path: str = os.path.abspath(POSTGRESS_CREDENTIAL_FILENAME) | ||
with open(postgress_path, 'r') as key_file: | ||
postgress_data: dict = json.loads(key_file.read()) | ||
|
||
connection = psycopg2.connect(**postgress_data) | ||
cursor = connection.cursor() | ||
|
||
filepath: str = sys.argv[1] | ||
type: str = sys.argv[2] | ||
|
||
try: | ||
with open(filepath, "r", encoding="UTF-8") as file: | ||
reader = csv.reader(file) | ||
next(reader) | ||
|
||
for id, date, is_new, title, star, subtitle, price, pseudo, sales_count, satisfaction, bid_count, purchase, shipping, country in reader: | ||
id: str = id | ||
date: str = date[:10].replace("/", "-") | ||
title: str = title.replace("\'", "_") | ||
|
||
if id == "None" or title == "None": | ||
continue | ||
|
||
is_new: str = None if is_new == "None" else is_new | ||
star: float = None if star == "-1.0" else float(star) | ||
subtitle: str = subtitle.replace("\'", "_") | ||
price: float = price | ||
pseudo: str = pseudo | ||
sales_count: int = sales_count | ||
satisfaction: int= satisfaction | ||
bid_count: int = None if bid_count == "-1" else int(bid_count) | ||
purchase: str = None if purchase == "None" else purchase | ||
shipping: str = shipping | ||
country: str = country | ||
|
||
cursor.execute(INSERT_QUERY.format( | ||
id = notnull(id), | ||
title = notnull(title), | ||
type = notnull(type), | ||
current_date = notnull(date), | ||
is_new = nullable(is_new), | ||
star = nullable(star), | ||
subtitle = nullable(subtitle), | ||
price = nullable(price), | ||
pseudo = nullable(pseudo), | ||
sales_count = nullable(sales_count), | ||
satisfaction = nullable(satisfaction), | ||
bid_count = nullable(bid_count), | ||
purchase = nullable(purchase), | ||
shipping = nullable(shipping), | ||
country = nullable(country) | ||
)) | ||
|
||
connection.commit() | ||
except Exception as error: | ||
print(error) | ||
|
||
finally: | ||
if "connection" in locals(): | ||
connection.close() | ||
if "cursor" in locals(): | ||
cursor.close() | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
import psycopg2, json, os | ||
|
||
from common import POSTGRESS_CREDENTIAL_FILENAME | ||
|
||
def create_db(): | ||
postgress_path: str = os.path.abspath(POSTGRESS_CREDENTIAL_FILENAME) | ||
with open(postgress_path, 'r') as key_file: | ||
postgress_data: dict = json.loads(key_file.read()) | ||
|
||
connection = psycopg2.connect(**postgress_data) | ||
cursor = connection.cursor() | ||
|
||
with open("db.sql", "r") as file: | ||
cursor.execute(file.read()) | ||
|
||
connection.commit() | ||
|
||
create_db() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
DROP TABLE IF EXISTS offer; | ||
CREATE TABLE offer ( | ||
id VARCHAR, | ||
date DATE, | ||
title VARCHAR, | ||
type VARCHAR, | ||
is_new VARCHAR NULL, | ||
star DECIMAL NULL, | ||
subtitle VARCHAR NULL, | ||
price DECIMAL NULL, | ||
pseudo VARCHAR NULL, | ||
sales_count INTEGER NULL, | ||
satisfaction DECIMAL NULL, | ||
bid_count INTEGER NULL, | ||
purchase VARCHAR NULL, | ||
shipping VARCHAR NULL, | ||
country VARCHAR NULL, | ||
CONSTRAINT pk_offer PRIMARY KEY (id, date, title, type) | ||
); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
from dataclasses import dataclass | ||
|
||
@dataclass | ||
class GenericDataclass: | ||
pass | ||
|
||
@dataclass | ||
class WebsiteConfig: | ||
filename: str | ||
wait_time: int | ||
update_interval: int | ||
url: str | ||
|
||
@dataclass | ||
class MailCredential: | ||
sender: str | ||
password: str | ||
receiver: str | ||
|
||
@dataclass | ||
class PostgressCredential: | ||
dbname: str | ||
user: str | ||
password: str | ||
host: str | ||
port: str | ||
|
||
@dataclass | ||
class Offer: | ||
id: str | ||
date: str | ||
title: str | ||
type: str | ||
is_new: str | ||
star: float | ||
subtitle: str | ||
price: float | ||
pseudo: str | ||
sales_count: int | ||
satisfaction: float | ||
bid_count: int | ||
purchase: str | ||
shipping: str | ||
country: str | ||
|
||
@dataclass | ||
class ScrapStats: | ||
skipped: int = 0 | ||
|
||
@dataclass | ||
class RunStats: | ||
total: int = 0 | ||
exist: int = 0 | ||
added: int = 0 | ||
|
||
|
||
class StatsSingleton: | ||
_stats = None | ||
def get_instance() -> ScrapStats: | ||
if StatsSingleton._stats is None: | ||
StatsSingleton._stats = ScrapStats() | ||
return StatsSingleton._stats | ||
|
||
def reset(): | ||
StatsSingleton._stats = None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
import os | ||
import sys | ||
import time | ||
import json | ||
import logging | ||
from logging import Logger | ||
import datetime as dt | ||
from dataclasses import asdict | ||
|
||
import psycopg2 | ||
from prometheus_client import start_http_server, Counter | ||
|
||
import notify | ||
from scrap import run | ||
from utils import get_json_as | ||
from models import ( | ||
PostgressCredential, | ||
MailCredential, | ||
) | ||
from common import ( | ||
POSTGRESS_CREDENTIAL_FILENAME, | ||
MAIL_CREDENTIAL_FILENAME, | ||
LOG_DIRECTORY, | ||
LINKS_FILENAME, | ||
LOGNAME_PATTERN | ||
) | ||
|
||
def get_logger(): | ||
now = dt.datetime.now() | ||
current_date: str = now.strftime("%Y-%m-%d") | ||
|
||
log_filename = LOGNAME_PATTERN.format(subject="log", date=current_date) | ||
|
||
logging.basicConfig( | ||
level=logging.DEBUG, | ||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | ||
filename=os.path.join(LOG_DIRECTORY, log_filename) | ||
) | ||
|
||
return logging.getLogger("log") | ||
|
||
if __name__ == "__main__": | ||
skipped = Counter('ebayscrap_skipped', 'skip offer') | ||
total = Counter('ebayscrap_total', 'total offer') | ||
exist = Counter('ebayscrap_exist', 'exist offer') | ||
added = Counter('ebayscrap_added', 'added offer') | ||
|
||
start_http_server(8000) | ||
|
||
if (len(sys.argv)) > 1: | ||
links_file = sys.argv[1] | ||
if os.path.exists(links_file) and os.path.isfile(links_file): | ||
if not os.path.exists(LOG_DIRECTORY): | ||
os.mkdir(LOG_DIRECTORY) | ||
|
||
postgress = get_json_as(POSTGRESS_CREDENTIAL_FILENAME, PostgressCredential) | ||
mail = get_json_as(MAIL_CREDENTIAL_FILENAME, MailCredential) | ||
|
||
with open(LINKS_FILENAME, "r") as file: | ||
links: dict = json.loads(file.read()) | ||
|
||
wait_time: int = 3 | ||
while True: | ||
for index in range(wait_time): | ||
print(f"Time left: {wait_time - index}s") | ||
time.sleep(1) | ||
|
||
logger: Logger = get_logger() | ||
|
||
try: | ||
logger.info("Connection to database...") | ||
|
||
connection = psycopg2.connect(**asdict(postgress)) | ||
cursor = connection.cursor() | ||
|
||
logger.info("Connected to database!") | ||
logger.info("Scrapping begings...") | ||
|
||
for subject, link in links.items(): | ||
scrap_stats, run_stats = run(subject, link, cursor) | ||
|
||
skipped.inc(run_stats.skipped) | ||
|
||
total.inc(scrap_stats.total) | ||
exist.inc(scrap_stats.exist) | ||
added.inc(scrap_stats.added) | ||
|
||
logger.info(f"Scrapping ended with {json.dumps(asdict(run_stats))}") | ||
logger.info("Commit to databse...") | ||
|
||
connection.commit() | ||
|
||
logger.info("Scrapping succes !") | ||
|
||
except Exception as error: | ||
logger.exception(error) | ||
|
||
notify.send_message( | ||
mail.sender, | ||
mail.password, | ||
mail.receiver, | ||
f"Error ebayscrap with {subject}", | ||
f"Error with {error} with {json.dumps(asdict(run_stats))}" | ||
) | ||
finally: | ||
logger.info("Closing database connecion...") | ||
|
||
if "connection" in locals(): | ||
connection.close() | ||
if "cursor" in locals(): | ||
cursor.close() | ||
|
||
logger.info("Database connecion closed !") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
import smtplib | ||
from email.mime.text import MIMEText | ||
from email.mime.multipart import MIMEMultipart | ||
from email.mime.base import MIMEBase | ||
from email import encoders | ||
|
||
SMTP_SERVER = "smtp.gmail.com" | ||
SMTP_PORT = 587 | ||
|
||
def send_message(_from: str, password: str, to: str, subject: str, message: str, file_path: str = "None"): | ||
msg = MIMEMultipart() | ||
msg['From'] = _from | ||
msg['To'] = to | ||
msg['Subject'] = subject | ||
msg.attach(MIMEText(message, 'plain')) | ||
|
||
if (file_path != "None"): | ||
with open(file_path, 'r') as file: | ||
part = MIMEBase('application', 'octet-stream') | ||
part.set_payload(file.read()) | ||
encoders.encode_base64(part) | ||
part.add_header('Content-Disposition', f'attachment; filename={file_path}') | ||
msg.attach(part) | ||
|
||
text = msg.as_string() | ||
|
||
server = smtplib.SMTP(SMTP_SERVER, SMTP_PORT) | ||
server.starttls() | ||
server.login(_from, password) | ||
server.sendmail(_from, to, text) | ||
server.quit() |
Oops, something went wrong.