Skip to content

Commit

Permalink
Fix occasional "table already defined" in JobsView (#68)
Browse files Browse the repository at this point in the history
  • Loading branch information
my8100 committed Jul 13, 2019
1 parent 23ab8e3 commit 0492882
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 17 deletions.
12 changes: 10 additions & 2 deletions scrapydweb/utils/check_app_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,18 @@
import re

from ..common import handle_metadata, handle_slash, json_dumps, session
from ..models import create_jobs_table, db
from ..utils.scheduler import scheduler
from ..vars import (ALLOWED_SCRAPYD_LOG_EXTENSIONS, EMAIL_TRIGGER_KEYS,
SCHEDULER_STATE_DICT, STATE_PAUSED, STATE_RUNNING,
SCHEDULE_ADDITIONAL, UA_DICT)
SCHEDULE_ADDITIONAL, STRICT_NAME_PATTERN, UA_DICT,
jobs_table_map)
from .send_email import send_email
from .sub_process import init_logparser, init_poll


logger = logging.getLogger(__name__)

jobs_table_dict = {}
REPLACE_URL_NODE_PATTERN = re.compile(r'(:\d+/)\d+/')
EMAIL_PATTERN = re.compile(r'^[^@]+@[^@]+\.[^@]+$')
HASH = '#' * 100
Expand Down Expand Up @@ -103,6 +104,13 @@ def check_assert(key, default, is_instance, allow_zero=True, non_empty=False, co

# Scrapyd
check_scrapyd_servers(config)
# For JobsView
for node, scrapyd_server in enumerate(config['SCRAPYD_SERVERS'], 1):
# Note that check_app_config() is executed multiple times in test
if node not in jobs_table_map:
jobs_table_map[node] = create_jobs_table(re.sub(STRICT_NAME_PATTERN, '_', scrapyd_server))
db.create_all(bind='jobs')
logger.debug("Created %s tables for JobsView", len(jobs_table_map))

check_assert('SCRAPYD_LOGS_DIR', '', str)
check_assert('LOCAL_SCRAPYD_SERVER', '', str)
Expand Down
12 changes: 8 additions & 4 deletions scrapydweb/vars.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@
TIMER_TASKS_HISTORY_LOG = os.path.join(HISTORY_LOG, 'timer_tasks_history.log')


# For check_app_config.py and MyView
# For check_app_config() and MyView
ALLOWED_SCRAPYD_LOG_EXTENSIONS = ['.log', '.log.gz', '.txt', '.gz', '']
EMAIL_TRIGGER_KEYS = ['CRITICAL', 'ERROR', 'WARNING', 'REDIRECT', 'RETRY', 'IGNORE']

# Error: Project names must begin with a letter and contain only letters, numbers and underscores
STRICT_NAME_PATTERN = re.compile(r'[^0-9A-Za-z_]')
LEGAL_NAME_PATTERN = re.compile(r'[^0-9A-Za-z_-]')

# For schedule.py
# For ScheduleView
SCHEDULE_ADDITIONAL = "-d setting=CLOSESPIDER_TIMEOUT=60\r\n-d setting=CLOSESPIDER_PAGECOUNT=10\r\n-d arg1=val1"
UA_DICT = {
'custom': "Mozilla/5.0",
Expand All @@ -56,7 +56,7 @@
}


# For logs.py and items.py
# For LogsView and ItemsView
DIRECTORY_PATTERN = re.compile(r"""
<tr\sclass="(?P<odd_even>odd|even)">\n
\s+<td>(?P<filename>.*?)</td>\n
Expand All @@ -69,7 +69,11 @@
HREF_NAME_PATTERN = re.compile(r'href="(.+?)">(.+?)<')


# For timer task
# For JobsView
jobs_table_map = {}


# For Timer Tasks
APSCHEDULER_DATABASE_URI = 'sqlite:https:///' + os.path.join(DATABASE_PATH, 'apscheduler.db')
# http:https://flask-sqlalchemy.pocoo.org/2.3/binds/#binds
SQLALCHEMY_DATABASE_URI = 'sqlite:https:///' + os.path.join(DATABASE_PATH, 'timer_tasks.db')
Expand Down
20 changes: 12 additions & 8 deletions scrapydweb/views/overview/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from ...common import handle_metadata
from ...models import create_jobs_table, db
from ...vars import STRICT_NAME_PATTERN, jobs_table_map
from ..myview import MyView


Expand Down Expand Up @@ -83,6 +84,7 @@ def __init__(self):
self.pending_jobs = []
self.running_jobs = []
self.finished_jobs = []
self.jobs_pagination = None

self.Job = None # database class Job

Expand Down Expand Up @@ -162,14 +164,15 @@ def get_liststats_datas(self):
js['status_code'], js['status'], js.get('tip', ''))

def create_table(self):
self.Job = self.metadata.get(self.node)
if self.Job:
self.Job = jobs_table_map.get(self.node, None)
if self.Job is not None:
self.logger.debug("Got table: %s", self.Job.__tablename__)
else:
self.Job = create_jobs_table(re.sub(r'[^A-Za-z0-9_]', '_', self.SCRAPYD_SERVER))
self.Job = create_jobs_table(re.sub(STRICT_NAME_PATTERN, '_', self.SCRAPYD_SERVER))
# sqlite3.OperationalError: table "127_0_0_1_6800" already exists
db.create_all(bind='jobs')
self.metadata[self.node] = self.Job
jobs_table_map[self.node] = self.Job
self.logger.debug("Created table: %s", self.Job.__tablename__)

def handle_jobs_with_db(self):
Expand Down Expand Up @@ -297,11 +300,12 @@ def db_clean_pending_jobs(self):
def query_jobs(self):
current_running_job_pids = [int(job['pid']) for job in self.jobs_backup if job['pid']]
self.logger.debug("current_running_job_pids: %s", current_running_job_pids)
self.jobs = self.Job.query.filter_by(deleted=NOT_DELETED).order_by(
self.jobs_pagination = self.Job.query.filter_by(deleted=NOT_DELETED).order_by(
self.Job.status.asc(), self.Job.finish.desc(), self.Job.start.asc(), self.Job.id.asc()).paginate(
page=self.page, per_page=self.per_page, error_out=False)
with db.session.no_autoflush:
for index, job in enumerate(self.jobs.items, (self.jobs.page - 1) * self.jobs.per_page + 1):
for index, job in enumerate(self.jobs_pagination.items,
(self.jobs_pagination.page - 1) * self.jobs_pagination.per_page + 1):
# print(vars(job))
job.index = index
job.pid = job.pid or ''
Expand Down Expand Up @@ -341,7 +345,7 @@ def query_jobs(self):
job.url_delete = url_for('jobs.xhr', node=self.node, action='delete', id=job.id)

def set_jobs_dict(self):
for job in self.jobs.items: # paginate obj in query_jobs()
for job in self.jobs_pagination.items: # Pagination obj in handle_jobs_with_db() > query_jobs()
key = '%s/%s/%s' % (job.project, job.spider, job.job)
value = dict((k, v) for (k, v) in job.__dict__.items() if not k.startswith('_'))
for k, v in value.items():
Expand Down Expand Up @@ -402,7 +406,7 @@ def set_kwargs(self):
if self.style == 'database':
self.kwargs.update(dict(
url_jobs_classic=url_for('jobs', node=self.node, style='classic'),
jobs=self.jobs
jobs=self.jobs_pagination
))
return

Expand Down Expand Up @@ -430,7 +434,7 @@ def __init__(self):
self.id = self.view_args['id'] # <int:id>

self.js = {}
self.Job = self.metadata[self.node] # database class Job
self.Job = jobs_table_map[self.node] # database class Job

def dispatch_request(self, **kwargs):
job = self.Job.query.get(self.id)
Expand Down
2 changes: 0 additions & 2 deletions scrapydweb/views/overview/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,6 @@ def query_task_results(self):
with_job = all([task_result.fail_count + task_result.pass_count == 1 for task_result in task_results.items])

with db.session.no_autoflush:
# for task_result in task_results.items:
for index, task_result in enumerate(task_results.items,
(task_results.page - 1) * task_results.per_page + 1):
task_result.index = index
Expand Down Expand Up @@ -229,7 +228,6 @@ def query_task_job_results(self):
task_job_results = TaskJobResult.query.filter_by(task_result_id=self.task_result_id).order_by(
TaskJobResult.node.asc()).paginate(page=self.page, per_page=self.per_page, error_out=False)
with db.session.no_autoflush:
# for task_job_result in task_job_results.items:
for index, task_job_result in enumerate(task_job_results.items,
(task_job_results.page - 1) * task_job_results.per_page + 1):
task_job_result.index = index
Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
SMTP_SERVER='smtp.qq.com',
SMTP_PORT=465,
SMTP_OVER_SSL=True,
SMTP_CONNECTION_TIMEOUT=10,
SMTP_CONNECTION_TIMEOUT=30,
EMAIL_USERNAME=os.environ.get('EMAIL_USERNAME', '[email protected]'),
EMAIL_PASSWORD=os.environ.get('EMAIL_PASSWORD', 'password'),
FROM_ADDR=os.environ.get('FROM_ADDR', '[email protected]'),
Expand Down

0 comments on commit 0492882

Please sign in to comment.