-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
These are the current versions of most of the scripts run by my crontab.
- Loading branch information
Showing
9 changed files
with
1,419 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
#!/bin/bash | ||
# | ||
# This script starts the Synda (sdt) daemon, if it had died. | ||
# Then it stops the daemon to get some cleanup (which is occasionally necessary to prevent stalls). | ||
# Then it starts the daemon again. | ||
# This script is suitable for being called by cron. | ||
# The use of systemctl works for Red Hat 7, not Red Hat 6. | ||
|
||
source /home/painter/.bash_profile | ||
|
||
if pgrep -f sddaemon > /dev/null 2>&1 | ||
then | ||
#echo `date` ok >> /var/log/synda/daemon/daemon_start.log 2>&1 | ||
# The "ok" logging isn't needed now that I know that this is working. Use a no-op instead: | ||
: | ||
else | ||
echo `date` starting daemon >> /var/log/synda/daemon/daemon_start.log 2>&1 | ||
sudo /usr/bin/systemctl start synda >> /var/log/synda/daemon/daemon_start.log 2>&1 | ||
sudo /usr/bin/systemctl stop synda >> /var/log/synda/daemon/daemon_start.log 2>&1 | ||
sudo /usr/bin/systemctl start synda >> /var/log/synda/daemon/daemon_start.log 2>&1 | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
#!/usr/bin/env python | ||
|
||
"""Backs up the Synda database from /var/lib/synda/sdt/sdt.db to /p/css03/painter/db/. | ||
The backup file will be named so as to reveal the date and the machine it came from. | ||
If it is the first of the month, the backup file will be made read-only.""" | ||
|
||
import sys, os, shutil, stat, grp | ||
import socket, datetime, subprocess | ||
import sqlite3 | ||
import sqlitebck | ||
import pdb, debug | ||
|
||
std_file_perms = stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IWGRP | stat.S_IROTH | ||
ro_file_perms = stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH | ||
|
||
hostname = socket.gethostname() | ||
if len(hostname)==8 and hostname[0:7]=='aimsdtn': | ||
hostname = hostname[7] # normally 5 or 6 for aimsdtn5 or aimsdtn6 | ||
|
||
date = str(datetime.datetime.now().date()) # e.g. '2019-07-19' | ||
|
||
tf = '_'.join(['sdt.db',hostname,date]) | ||
source = '/var/lib/synda/sdt/sdt.db' | ||
dest = '/p/css03/painter/db/' + tf | ||
|
||
# shutil.copy2( source, dest ) # preserves mod time, etc. | ||
# There isn't anything here to deal with rsync errors, but it will soon | ||
# go away anyway... | ||
#subprocess.call(['rsync', '-a', source, dest]) | ||
|
||
srccon = sqlite3.connect(source) | ||
dstcon = sqlite3.connect(dest) | ||
with dstcon: | ||
# Back up from source to dest, one page at a time. Break the source file into | ||
# 100 pages and allow 0.25 seconds between them so that other processes have | ||
# some access to the database. | ||
# When a recent Python 3.7 is available, this will be the best solution: | ||
# srccon.backup( dstcon, pages=100, sleep=0.25 ) | ||
# But for now this is it. The pages and sleep options are documented, but they don't work for me: | ||
sqlitebck.copy( srccon, dstcon ) | ||
dstcon.close() | ||
srccon.close() | ||
|
||
groupn = grp.getgrnam('synda')[2] # group number of 'synda', currently 20 | ||
os.chown( dest, -1, groupn ) # like "chgrp synda $dest" | ||
if len(date)==10 and date[8:10]=='01': | ||
# On the first of the month, make it read-only because this is more of | ||
# an archival database. | ||
os.chmod( dest, ro_file_perms ) | ||
else: | ||
# For other dates, I expect to delete the backup from time to time. | ||
os.chmod( dest, std_file_perms ) | ||
|
||
# Make another copy so we can always use the same name for the latest backup. | ||
# The permissions for this one should be standard (group-writable) even when | ||
# the original version is read-only. | ||
dest2 = '/p/css03/painter/db/sdt6.db' | ||
shutil.copy2( dest, dest2 ) | ||
os.chmod( dest2, std_file_perms ) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
-- SELECT filename,dataset_id,status FROM file WHERE | ||
UPDATE file SET status='obsolete' WHERE | ||
(status='waiting' OR status='error') AND | ||
dataset_id IN | ||
( SELECT [1st].dataset_id FROM | ||
( SELECT * FROM dataset ) [1st] | ||
INNER JOIN | ||
( SELECT * FROM dataset ) [2nd] | ||
ON [1st].path_without_version = [2nd].path_without_version | ||
WHERE [1st].version < [2nd].version | ||
) | ||
; | ||
UPDATE dataset SET status='incomplete,obsolete' WHERE status='in-progress' AND | ||
dataset_id IN (SELECT dataset_id FROM file WHERE status='obsolete'); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,158 @@ | ||
#!/usr/bin/env python | ||
|
||
"""Checks the error_history field of each file for whether it records file-specific errors which had | ||
been repeated over an extended period of time. Such files' statuses will be changed will be changed | ||
from 'error' to 'error-badurl' or 'error-checksum'""" | ||
|
||
import sys, pdb | ||
import argparse, logging | ||
import sqlite3 | ||
import debug | ||
from dateutil.parser import parse | ||
import datetime | ||
global conn, curs | ||
|
||
def setup( db='/var/lib/synda/sdt/sdt.db' ): | ||
"""Initializes the connection to the database, etc.""" | ||
# To test on a temporary copy of the database: | ||
#db = '/home/painter/db/sdt.db' | ||
global conn, curs | ||
conn = sqlite3.connect(db) | ||
curs = conn.cursor() | ||
|
||
def finish(): | ||
"""Closes connections to databases, etc.""" | ||
global conn, curs | ||
conn.commit() | ||
conn.close() | ||
|
||
def confirm_yesnoquit(): | ||
"""Returns True if the user types "yes" or something similar, False for "no", | ||
or None for "quit".""" | ||
# Note that raw_input returns the empty string for "enter" | ||
yes = {'yes','y', 'ye', ''} | ||
no = {'no','n'} | ||
quits = {'quit','q'} | ||
|
||
choice = raw_input().lower() # in Python 3, this is input().lower() | ||
if choice in yes: | ||
return True | ||
elif choice in no: | ||
return False | ||
elif choice in quits: | ||
return None | ||
else: | ||
sys.stdout.write("Please respond with 'yes', 'no', or 'quit'") | ||
|
||
def tobe_permanent( error_history, error_in='ERROR 404', min_interval=5, min_errors=3 ): | ||
"""If the error_history records repeated errors, that call for changing a file's status | ||
to a permanent error, return the new status. Otherwise return None. | ||
error_history should be a list of 2-tuples (date,error). The input error to check | ||
for should be supplied. Presently (and probably forever) if must be one of | ||
"bad checksum" or "ERROR 404". | ||
The minimum interval between errors may be supplied, and defaults to 5. | ||
The minimum number of errors may be supplied, and defaults to 3.""" | ||
if len(error_history)<min_errors: | ||
return None | ||
return_error = { 'ERROR 404':'error-badurl', 'bad checksum':'error-checksum' } | ||
assert error_in in return_error | ||
dates = [ e[0] for e in error_history if e[1]==error_in ] | ||
if len(dates)<min_errors: | ||
return None | ||
date_last_error = parse(dates[0]) | ||
nerrors = 1 | ||
dates.sort() | ||
for i in range(1,len(dates)): | ||
# Get the time since the last error. | ||
# Converting to totalseconds lets us support fractional days. | ||
interval = (parse(dates[i])- date_last_error).total_seconds()/3600./24 | ||
if interval>=min_interval: | ||
# dates[i] is at least min_interval days after the previous error date. | ||
nerrors += 1 | ||
date_last_error = parse(dates[i]) | ||
if nerrors>=min_errors: | ||
return return_error[error_in] | ||
else: | ||
return None | ||
|
||
def mark_permanent_errors( min_interval=5, nrepeats=3, dryrun=True, confirm=True ): | ||
"""Check the database for files with 'error' status, whose error_history represents | ||
repeated errors, either 'ERROR 404' or 'bad checksum'. For each such file, change its status | ||
to a permanent one (not affected by "synda retry"): 'error-badurl' or 'error-checksum'. | ||
The minimum interval between errors may be supplied, and defaults to 5. | ||
The minimum number of repeated errors may be supplied, and defaults to 3. | ||
""" | ||
global conn, curs | ||
# At present, the shortest possible non-null error string is 45 characters: | ||
# [('2020-06-08 15:17:13.121540', 'ERROR 404')] | ||
# The shortest possible one with two errors recorded is 90 characters: | ||
# [('2020-06-09 10:49:35.255064', 'ERROR 404'), ('2020-06-09 13:47:21.039614', 'ERROR 404')] | ||
# So if we want three errors we only need look at strings with >=45*nrepeats characters. | ||
cmd = "SELECT file_id, filename, error_history FROM file WHERE " +\ | ||
"status='error' AND error_history IS NOT NULL AND LENGTH(error_history)>=?" | ||
curs.execute( cmd, (45*nrepeats,) ) | ||
results = curs.fetchall() | ||
for result in results: | ||
if result is None: | ||
break | ||
file_id = result[0] | ||
filename = result[1] | ||
error_history = eval(result[2]) | ||
new_status = tobe_permanent(error_history,'ERROR 404',min_interval,nrepeats) | ||
if new_status is None: | ||
new_status = tobe_permanent(error_history,'bad checksum',min_interval,nrepeats) | ||
if new_status is not None: | ||
if dryrun: | ||
# print, don't log. This is a debugging mode. | ||
print "file %s is ready for permanent error status as %s"%(filename,new_status) | ||
print " error_history=%s"%error_history | ||
elif confirm: | ||
# Change the error status, but with user confirmation file-by-file | ||
# A filename may have multiple versions, but it's more understandable than file_id. | ||
print "change %s from status 'error' to '%s'?"%(filename,new_status) | ||
yesnoquit = confirm_yesnoquit() | ||
if yesnoquit==True: | ||
cmd = "UPDATE file SET status=? WHERE file_id=?" | ||
cmd_vars = ( new_status, file_id ) | ||
curs.execute( cmd, cmd_vars ) | ||
print "changed status to '%s'" % new_status | ||
logging.info( "changed status of %s to '%s'" % (filename, new_status) ) | ||
elif yesnoquit==False: | ||
print "leaving status at 'error'" | ||
else: | ||
print "leaving status at 'error' for this and subsequent files" | ||
break | ||
else: | ||
# Change the error status, without asking for confirmation. | ||
# A filename may have multiple versions, but it's more understandable than file_id. | ||
cmd = "UPDATE file SET status=? WHERE file_id=?" | ||
cmd_vars = ( new_status, file_id ) | ||
curs.execute( cmd, cmd_vars ) | ||
logging.info( "changed status of %s to '%s'" % (filename, new_status) ) | ||
|
||
if __name__ == '__main__': | ||
# Set up logging and arguments, then call the appropriate 'run' function. | ||
logfile = '/p/css03/scratch/logs/permanent_error.log' | ||
logging.basicConfig( filename=logfile, level=logging.INFO, format='%(asctime)s %(message)s' ) | ||
|
||
p = argparse.ArgumentParser( | ||
description="Convert repeated errors after multiple retries to permanent ones,"+ | ||
" in some cases." ) | ||
p.add_argument( "--interval", dest="interval", required=False, type=float, default=5, help= | ||
"minimum interval between errors for both to be considered; in days." ) | ||
p.add_argument( "--nrepeats", dest="nrepeats", required=False, type=int, default=3, help= | ||
"number of repeated errors to change the error to a permanent one." ) | ||
p.add_argument('--dryrun', dest='dryrun', action='store_true' ) | ||
p.add_argument('--no-dryrun', dest='dryrun', action='store_false' ) | ||
p.add_argument('--confirm', dest='confirm', action='store_true', default=False ) | ||
p.add_argument('--no-confirm', dest='confirm', action='store_false', default=False ) | ||
p.set_defaults( dryrun=False, confirm=False ) | ||
|
||
args = p.parse_args( sys.argv[1:] ) | ||
|
||
setup() | ||
logging.info( "started permanent_error_status, args=%s"%args ) | ||
mark_permanent_errors( args.interval, args.nrepeats, dryrun=args.dryrun, | ||
confirm=args.confirm ) | ||
finish() | ||
|
Oops, something went wrong.