Skip to content

Commit

Permalink
Set the default logging level to INFO
Browse files Browse the repository at this point in the history
  • Loading branch information
my8100 committed Aug 16, 2019
1 parent 6033e7e commit 039522b
Show file tree
Hide file tree
Showing 13 changed files with 52 additions and 49 deletions.
2 changes: 1 addition & 1 deletion scrapydweb/default_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@
# Note that use_reloader is set to False in run.py
DEBUG = False

# The default is False, set it to True to change the logging level from WARNING to DEBUG
# The default is False, set it to True to change the logging level from INFO to DEBUG
# for getting more information about how ScrapydWeb works, especially while debugging.
VERBOSE = False

Expand Down
20 changes: 10 additions & 10 deletions scrapydweb/utils/poll.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def __init__(self, url_scrapydweb, username, password,
if verbose:
self.logger.setLevel(logging.DEBUG)
else:
self.logger.setLevel(logging.WARNING)
self.logger.setLevel(logging.INFO)
self.exit_timeout = exit_timeout

self.init_time = time.time()
Expand Down Expand Up @@ -115,8 +115,8 @@ def fetch_jobs(self, node, url, auth):
running_jobs.append(job_tuple)
elif job['finish']:
finished_jobs_set.add(job_tuple)
self.logger.info("[node %s] got running_jobs: %s", node, len(running_jobs))
self.logger.info("[node %s] got finished_jobs_set: %s", node, len(finished_jobs_set))
self.logger.debug("[node %s] got running_jobs: %s", node, len(running_jobs))
self.logger.debug("[node %s] got finished_jobs_set: %s", node, len(finished_jobs_set))
return running_jobs, finished_jobs_set

def fetch_stats(self, node, job_tuple, finished_jobs):
Expand All @@ -139,7 +139,7 @@ def fetch_stats(self, node, job_tuple, finished_jobs):
self.logger.error("[node %s %s] fetch_stats failed: %s", node, self.scrapyd_servers[node-1], url)
if job_finished:
self.finished_jobs_dict[node].remove(job_tuple)
self.logger.warning("[node %s] retry in next round: %s", node, url)
self.logger.info("[node %s] retry in next round: %s", node, url)
else:
self.logger.debug("[node %s] fetch_stats got (%s) %s bytes from %s",
node, r.status_code, len(r.content), url)
Expand All @@ -156,7 +156,7 @@ def main(self):
self.logger.critical("GoodBye, exit_timeout: %s", self.exit_timeout)
break
else:
self.logger.warning("Sleeping for %ss", self.poll_round_interval)
self.logger.info("Sleeping for %ss", self.poll_round_interval)
time.sleep(self.poll_round_interval)
except KeyboardInterrupt:
self.logger.warning("Poll subprocess (pid: %s) cancelled by KeyboardInterrupt", self.poll_pid)
Expand Down Expand Up @@ -203,21 +203,21 @@ def run(self):

def update_finished_jobs(self, node, finished_jobs_set):
finished_jobs_set_previous = self.finished_jobs_dict.setdefault(node, set())
self.logger.info("[node %s] previous finished_jobs_set: %s", node, len(finished_jobs_set_previous))
self.logger.debug("[node %s] previous finished_jobs_set: %s", node, len(finished_jobs_set_previous))
# set([2,3]).difference(set([1,2])) => {3}
finished_jobs_set_new_added = finished_jobs_set.difference(finished_jobs_set_previous)
self.finished_jobs_dict[node] = finished_jobs_set
self.logger.info("[node %s] now finished_jobs_set: %s", node, len(self.finished_jobs_dict[node]))
self.logger.debug("[node %s] now finished_jobs_set: %s", node, len(self.finished_jobs_dict[node]))
if finished_jobs_set_new_added:
self.logger.warning("[node %s] new added finished_jobs_set: %s", node, finished_jobs_set_new_added)
else:
self.logger.info("[node %s] new added finished_jobs_set: %s", node, finished_jobs_set_new_added)
else:
self.logger.debug("[node %s] new added finished_jobs_set: %s", node, finished_jobs_set_new_added)

finished_jobs = []
ignore = self.ignore_finished_bool_list[node-1]
for job_tuple in finished_jobs_set_new_added:
if ignore:
self.logger.warning("[node %s] ignore finished job: %s", node, job_tuple)
self.logger.debug("[node %s] ignore finished job: %s", node, job_tuple)
else:
finished_jobs.append(job_tuple)
if ignore:
Expand Down
6 changes: 3 additions & 3 deletions scrapydweb/utils/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,11 @@ def my_listener(event):


def shutdown_scheduler():
apscheduler_logger.info("Scheduled tasks: %s", scheduler.get_jobs())
apscheduler_logger.debug("Scheduled tasks: %s", scheduler.get_jobs())
apscheduler_logger.warning("Shutting down the scheduler for timer tasks gracefully, "
"wait until all currently executing tasks are finished")
apscheduler_logger.info("The main pid is %s. Kill it manually if you don't want to wait",
handle_metadata().get('main_pid'))
apscheduler_logger.warning("The main pid is %s. Kill it manually if you don't want to wait",
handle_metadata().get('main_pid'))
scheduler.shutdown()
# apscheduler_logger.info("Waits until all currently executing jobs are finished. "
# "Press CTRL+C to force unclean shutdown")
Expand Down
12 changes: 7 additions & 5 deletions scrapydweb/utils/send_email.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,6 @@


logger = logging.getLogger('scrapydweb.utils.send_email') # __name__
_handler = logging.StreamHandler()
_formatter = logging.Formatter(fmt="[%(asctime)s] %(levelname)-8s in %(name)s: %(message)s")
_handler.setFormatter(_formatter)
logger.addHandler(_handler)
logger.setLevel(logging.DEBUG)


Expand Down Expand Up @@ -74,7 +70,7 @@ def send_email(**kwargs):
kwargs.update(to_retry=False, need_debug=True)
logger.debug("Retrying...")
time.sleep(3)
send_email(**kwargs)
return send_email(**kwargs)
else:
result = True
reason = "Sent"
Expand All @@ -90,4 +86,10 @@ def send_email(**kwargs):


if __name__ == '__main__':
# To avoid logging twice when importing the send_email function to send email.
_handler = logging.StreamHandler()
_formatter = logging.Formatter(fmt="[%(asctime)s] %(levelname)-8s in %(name)s: %(message)s")
_handler.setFormatter(_formatter)
logger.addHandler(_handler)

send_email(**json.loads(sys.argv[1]))
2 changes: 1 addition & 1 deletion scrapydweb/views/baseview.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def __init__(self, *args, **kwargs):
self.SQLALCHEMY_DATABASE_URI = app.config['SQLALCHEMY_DATABASE_URI']
self.SQLALCHEMY_BINDS = app.config['SQLALCHEMY_BINDS']

_level = logging.DEBUG if self.VERBOSE else logging.WARNING
_level = logging.DEBUG if self.VERBOSE else logging.INFO
self.logger.setLevel(_level)
logging.getLogger("requests").setLevel(_level)
logging.getLogger("urllib3").setLevel(_level)
Expand Down
8 changes: 4 additions & 4 deletions scrapydweb/views/dashboard/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ def handle_jobs_with_db(self):
self.metadata['style'] = self.style
handle_metadata('jobs_style', self.style)
msg = "Change style to %s" % self.style
self.logger.warning(msg)
self.logger.info(msg)
# flash(msg, self.WARN)

# Note that there may be jobs with the same combination of (project, spider, job) in the fetched Jobs
Expand Down Expand Up @@ -256,7 +256,7 @@ def db_insert_jobs(self):
record.deleted = NOT_DELETED
record.pages = None
record.items = None
self.logger.warning("Recover deleted job: %s", record)
self.logger.info("Recover deleted job: %s", record)
flash("Recover deleted job: %s" % job, self.WARN)
else:
record = self.Job()
Expand Down Expand Up @@ -300,7 +300,7 @@ def db_clean_pending_jobs(self):
if (record.project, record.spider, record.job) not in current_pending_jobs:
db.session.delete(record)
db.session.commit()
self.logger.warning("Deleted pending jobs %s", record)
self.logger.info("Deleted pending jobs %s", record)

def query_jobs(self):
current_running_job_pids = [int(job['pid']) for job in self.jobs_backup if job['pid']]
Expand Down Expand Up @@ -461,7 +461,7 @@ def dispatch_request(self, **kwargs):
self.js['message'] = str(err)
else:
self.js['status'] = self.OK
self.logger.warning(self.js.setdefault('tip', "Deleted %s" % job))
self.logger.info(self.js.setdefault('tip', "Deleted %s" % job))
else:
self.js['status'] = self.ERROR
self.js['message'] = "job #%s not found in the database" % self.id
Expand Down
8 changes: 4 additions & 4 deletions scrapydweb/views/files/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ def monitor_alert(self):
job_data_default = ([0] * 8, [False] * 6, False, time.time())
job_data = job_data_dict.setdefault(self.job_key, job_data_default)
(self.job_stats_previous, self.triggered_list, self.has_been_stopped, self.last_send_timestamp) = job_data
self.logger.info(job_data_dict)
self.logger.debug(job_data_dict)
self.job_stats = [self.kwargs['log_categories'][k.lower() + '_logs']['count']
for k in self.ALERT_TRIGGER_KEYS]
self.job_stats.extend([self.kwargs['pages'] or 0, self.kwargs['items'] or 0]) # May be None by LogParser
Expand Down Expand Up @@ -527,14 +527,14 @@ def send_alert(self):
def handle_data(self):
if self.flag:
# Update job_data_dict (last_send_timestamp would be updated only when flag is non-empty)
self.logger.info("Previous job_data['%s'] %s", self.job_key, job_data_dict[self.job_key])
self.logger.debug("Previous job_data['%s'] %s", self.job_key, job_data_dict[self.job_key])
job_data_dict[self.job_key] = (self.job_stats, self.triggered_list, self.has_been_stopped, time.time())
self.logger.info("Updated job_data['%s'] %s", self.job_key, job_data_dict[self.job_key])
self.logger.debug("Updated job_data['%s'] %s", self.job_key, job_data_dict[self.job_key])

if self.job_finished:
job_data_dict.pop(self.job_key)
od = job_finished_key_dict[self.node]
od[self.job_key] = None
if len(od) > self.jobs_to_keep:
od.popitem(last=False)
self.logger.info('job_finished: %s', self.job_key)
self.logger.info('job_finished: %s', self.job_key)
6 changes: 3 additions & 3 deletions scrapydweb/views/operations/deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def get_modification_times(self):
if timestamps:
max_timestamp_index = timestamps.index(max(timestamps))
self.latest_folder = self.folders[max_timestamp_index]
self.logger.info('latest_folder: %s', self.latest_folder)
self.logger.debug('latest_folder: %s', self.latest_folder)

def get_modification_time(self, path, func_walk=os.walk, retry=True):
# https://stackoverflow.com/a/29685234/10517783
Expand Down Expand Up @@ -171,8 +171,8 @@ def parse_scrapy_cfg(self):
diff = set(keys_all).difference(set(keys_exist))
for key in diff:
self.logger.debug('Pop %s, project %s', key, folder_project_dict.pop(key))
self.logger.info(self.json_dumps(folder_project_dict))
self.logger.info('folder_project_dict length: %s', len(folder_project_dict))
self.logger.debug(self.json_dumps(folder_project_dict))
self.logger.debug('folder_project_dict length: %s', len(folder_project_dict))


class DeployUploadView(BaseView):
Expand Down
12 changes: 6 additions & 6 deletions scrapydweb/views/operations/execute_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ def main(self):
continue
if index == 1:
# https://apscheduler.readthedocs.io/en/latest/userguide.html#shutting-down-the-scheduler
self.logger.info("Retry task #%s (%s) on nodes %s in %s seconds",
self.task_id, self.task_name, nodes, self.sleep_seconds_before_retry)
self.logger.warning("Retry task #%s (%s) on nodes %s in %s seconds",
self.task_id, self.task_name, nodes, self.sleep_seconds_before_retry)
time.sleep(self.sleep_seconds_before_retry)
self.logger.warning("Retrying task #%s (%s) on nodes %s", self.task_id, self.task_name, nodes)
for node in nodes:
Expand Down Expand Up @@ -119,7 +119,7 @@ def db_insert_task_job_result(self, js):
task_job_result.result = js.get('jobid', '') or js.get('message', '') or js.get('exception', '')
db.session.add(task_job_result)
db.session.commit()
self.logger.warning("Inserted %s", task_job_result)
self.logger.info("Inserted task_job_result: %s", task_job_result)

# https://stackoverflow.com/questions/13895176/sqlalchemy-and-sqlite-database-is-locked
def db_update_task_result(self):
Expand All @@ -133,18 +133,18 @@ def db_update_task_result(self):
url_delete_task_result = re.sub(r'/\d+/\d+/$', '/%s/%s/' % (self.task_id, self.task_result_id),
self.url_delete_task_result)
js = get_response_from_view(url_delete_task_result, auth=self.auth, data=self.data, as_json=True)
apscheduler_logger.warning("Delete task_result #%s [FAIL %s, PASS %s] of task #%s: %s",
apscheduler_logger.warning("Deleted task_result #%s [FAIL %s, PASS %s] of task #%s: %s",
self.task_result_id, self.fail_count, self.pass_count, self.task_id, js)
return
if not task_result:
apscheduler_logger.error("task_result #%s of task #%s not found", self.task_result_id, self.task_id)
apscheduler_logger.warning("Fail to update task_result #%s [FAIL %s, PASS %s] of task #%s",
apscheduler_logger.warning("Failed to update task_result #%s [FAIL %s, PASS %s] of task #%s",
self.task_result_id, self.fail_count, self.pass_count, self.task_id)
return
task_result.fail_count = self.fail_count
task_result.pass_count = self.pass_count
db.session.commit()
self.logger.warning("Inserted %s", task_result)
self.logger.info("Inserted task_result: %s", task_result)


def execute_task(task_id):
Expand Down
14 changes: 7 additions & 7 deletions scrapydweb/views/operations/schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ def __init__(self):
self.slot = slot

def dispatch_request(self, **kwargs):
self.logger.warning('request.form from %s\n%s', request.url, self.json_dumps(request.form))
self.logger.debug('request.form from %s\n%s', request.url, self.json_dumps(request.form))
self.prepare_data()
self.update_data_for_timer_task()
# self.logger.warning(self.json_dumps(self.data)) # TypeError: Object of type datetime is not JSON serializable
Expand Down Expand Up @@ -381,7 +381,7 @@ def handle_form(self):
self.data = pickle.loads(f.read())

def handle_action(self):
self.logger.warning(self.json_dumps(self.data))
self.logger.debug(self.json_dumps(self.data))
self.task_data = self.data.pop('__task_data', {}) # Now self.data is clean
self.logger.debug("task_data: %s", self.task_data)
if self.task_data: # For timer task
Expand Down Expand Up @@ -464,7 +464,7 @@ def add_update_task(self):
if self._action == 'add_fire':
# In case the task fires before db.session.commit()
if self.to_update_task:
self.logger.warning("Task #%s would be fired right after the apscheduler_job is updated", self.task_id)
self.logger.info("Task #%s would be fired right after the apscheduler_job is updated", self.task_id)
else:
self.task_data['next_run_time'] = datetime.now() # datetime.utcnow()
postfix = "Reload this page several seconds later to check out the execution result. "
Expand Down Expand Up @@ -501,15 +501,15 @@ def add_update_task(self):
self.logger.debug("Updated %s", self.task)
# In case the task fires before db.session.commit()
if self._action == 'add_fire':
self.logger.warning("Modify next_run_time of updated task #%s to fire it right now", self.task_id)
self.logger.info("Modifying next_run_time of updated task #%s to fire it right now", self.task_id)
job_instance.modify(next_run_time=datetime.now())
self.add_task_result = True
msg = u"{target} task #{task_id} ({task_name}) successfully, next run at {next_run_time}. ".format(
target="Update" if self.to_update_task else 'Add',
task_id=self.task_id, task_name=self.task_data['name'],
next_run_time=job_instance.next_run_time or self.NA)
self.add_task_flash = msg + postfix
apscheduler_logger.warning(msg)
apscheduler_logger.info(msg)
# TypeError: vars() argument must have __dict__ attribute
# apscheduler_logger.warning(vars(job_instance))
# pformat({k: getattr(job_instance, k) for k in job_instance.__slots__}, indent=4)
Expand All @@ -522,8 +522,8 @@ def add_update_task(self):
trigger=repr(job_instance.trigger),
next_run_time=repr(job_instance.next_run_time),
)
apscheduler_logger.warning("%s job_instance: \n%s", "Updated" if self.to_update_task else 'Added',
self.json_dumps(job_instance_dict))
apscheduler_logger.info("%s job_instance: \n%s", "Updated" if self.to_update_task else 'Added',
self.json_dumps(job_instance_dict))
finally:
if 'next_run_time' in self.task_data: # TypeError: Object of type datetime is not JSON serializable
self.task_data['next_run_time'] = str(self.task_data['next_run_time'] or self.NA)
Expand Down
6 changes: 3 additions & 3 deletions scrapydweb/views/utilities/send_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def send_email(self):
subject=self.channel_chatid_subject, text=self.text))
else:
self.js = dict(status=self.ERROR, result=dict(reason=reason), debug=self.EMAIL_KWARGS)
self.logger.error("Fail to send text via Email:\n%s", self.js)
self.logger.error("Fail to send text via Email:\n%s", self.json_dumps(self.js))

def send_slack(self):
if not self.SLACK_TOKEN:
Expand All @@ -113,7 +113,7 @@ def send_slack(self):
if self.SLACK_TOKEN:
self.js['debug'] = dict(token=self.SLACK_TOKEN, channel=self.channel_chatid_subject,
text=self.text)
self.logger.error("Fail to send text via Slack:\n%s", self.js)
self.logger.error("Fail to send text via Slack:\n%s", self.json_dumps(self.js))

def send_telegram(self):
if not self.TELEGRAM_TOKEN:
Expand All @@ -135,4 +135,4 @@ def send_telegram(self):
if self.TELEGRAM_TOKEN:
self.js['debug'] = dict(token=self.TELEGRAM_TOKEN, chat_id=self.channel_chatid_subject,
text=self.text)
self.logger.error("Fail to send text via Telegram:\n%s", self.js)
self.logger.error("Fail to send text via Telegram:\n%s", self.json_dumps(self.js))
2 changes: 1 addition & 1 deletion tests/test_send_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def check_pass(recipients=None, subject='Email from #scrapydweb', text=None):
assert js['result']['text'] == text
assert 'debug' not in js
assert js['when']
sleep(5)
sleep(10)

# 'email'
nos = ['debug', 'email_password']
Expand Down
3 changes: 2 additions & 1 deletion tests/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,8 @@ def test_edit_task(app, client):
task_id = metadata['task_id']
# http:https://127.0.0.1:5000/1/schedule/?task_id=1
req(app, client, view='schedule', kws=dict(node=NODE, task_id=task_id),
ins=["checked />[1] %s" % app.config['SCRAPYD_SERVERS'][0], "checked />[2] %s" % app.config['SCRAPYD_SERVERS'][-1]])
ins=["checked />[1] %s" % app.config['SCRAPYD_SERVERS'][0],
"checked />[2] %s" % app.config['SCRAPYD_SERVERS'][-1]])

check_data_ = dict(check_data)
check_data_.update(task_id=task_id, hour='6')
Expand Down

0 comments on commit 039522b

Please sign in to comment.