Skip to content

Commit

Permalink
These are the current versions of most of the scripts run by my crontab.
Browse files Browse the repository at this point in the history
  • Loading branch information
painter1 committed Mar 8, 2021
1 parent 4cd3fc3 commit 5db6fec
Show file tree
Hide file tree
Showing 9 changed files with 1,419 additions and 0 deletions.
21 changes: 21 additions & 0 deletions daemon_start
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/bash
#
# This script starts the Synda (sdt) daemon, if it had died.
# Then it stops the daemon to get some cleanup (which is occasionally necessary to prevent stalls).
# Then it starts the daemon again.
# This script is suitable for being called by cron.
# The use of systemctl works for Red Hat 7, not Red Hat 6.

source /home/painter/.bash_profile

if pgrep -f sddaemon > /dev/null 2>&1
then
#echo `date` ok >> /var/log/synda/daemon/daemon_start.log 2>&1
# The "ok" logging isn't needed now that I know that this is working. Use a no-op instead:
:
else
echo `date` starting daemon >> /var/log/synda/daemon/daemon_start.log 2>&1
sudo /usr/bin/systemctl start synda >> /var/log/synda/daemon/daemon_start.log 2>&1
sudo /usr/bin/systemctl stop synda >> /var/log/synda/daemon/daemon_start.log 2>&1
sudo /usr/bin/systemctl start synda >> /var/log/synda/daemon/daemon_start.log 2>&1
fi
59 changes: 59 additions & 0 deletions db-backup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/usr/bin/env python

"""Backs up the Synda database from /var/lib/synda/sdt/sdt.db to /p/css03/painter/db/.
The backup file will be named so as to reveal the date and the machine it came from.
If it is the first of the month, the backup file will be made read-only."""

import sys, os, shutil, stat, grp
import socket, datetime, subprocess
import sqlite3
import sqlitebck
import pdb, debug

std_file_perms = stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IWGRP | stat.S_IROTH
ro_file_perms = stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH

hostname = socket.gethostname()
if len(hostname)==8 and hostname[0:7]=='aimsdtn':
hostname = hostname[7] # normally 5 or 6 for aimsdtn5 or aimsdtn6

date = str(datetime.datetime.now().date()) # e.g. '2019-07-19'

tf = '_'.join(['sdt.db',hostname,date])
source = '/var/lib/synda/sdt/sdt.db'
dest = '/p/css03/painter/db/' + tf

# shutil.copy2( source, dest ) # preserves mod time, etc.
# There isn't anything here to deal with rsync errors, but it will soon
# go away anyway...
#subprocess.call(['rsync', '-a', source, dest])

srccon = sqlite3.connect(source)
dstcon = sqlite3.connect(dest)
with dstcon:
# Back up from source to dest, one page at a time. Break the source file into
# 100 pages and allow 0.25 seconds between them so that other processes have
# some access to the database.
# When a recent Python 3.7 is available, this will be the best solution:
# srccon.backup( dstcon, pages=100, sleep=0.25 )
# But for now this is it. The pages and sleep options are documented, but they don't work for me:
sqlitebck.copy( srccon, dstcon )
dstcon.close()
srccon.close()

groupn = grp.getgrnam('synda')[2] # group number of 'synda', currently 20
os.chown( dest, -1, groupn ) # like "chgrp synda $dest"
if len(date)==10 and date[8:10]=='01':
# On the first of the month, make it read-only because this is more of
# an archival database.
os.chmod( dest, ro_file_perms )
else:
# For other dates, I expect to delete the backup from time to time.
os.chmod( dest, std_file_perms )

# Make another copy so we can always use the same name for the latest backup.
# The permissions for this one should be standard (group-writable) even when
# the original version is read-only.
dest2 = '/p/css03/painter/db/sdt6.db'
shutil.copy2( dest, dest2 )
os.chmod( dest2, std_file_perms )
14 changes: 14 additions & 0 deletions obsolete.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
-- SELECT filename,dataset_id,status FROM file WHERE
UPDATE file SET status='obsolete' WHERE
(status='waiting' OR status='error') AND
dataset_id IN
( SELECT [1st].dataset_id FROM
( SELECT * FROM dataset ) [1st]
INNER JOIN
( SELECT * FROM dataset ) [2nd]
ON [1st].path_without_version = [2nd].path_without_version
WHERE [1st].version < [2nd].version
)
;
UPDATE dataset SET status='incomplete,obsolete' WHERE status='in-progress' AND
dataset_id IN (SELECT dataset_id FROM file WHERE status='obsolete');
158 changes: 158 additions & 0 deletions permanent_error_status.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
#!/usr/bin/env python

"""Checks the error_history field of each file for whether it records file-specific errors which had
been repeated over an extended period of time. Such files' statuses will be changed will be changed
from 'error' to 'error-badurl' or 'error-checksum'"""

import sys, pdb
import argparse, logging
import sqlite3
import debug
from dateutil.parser import parse
import datetime
global conn, curs

def setup( db='/var/lib/synda/sdt/sdt.db' ):
"""Initializes the connection to the database, etc."""
# To test on a temporary copy of the database:
#db = '/home/painter/db/sdt.db'
global conn, curs
conn = sqlite3.connect(db)
curs = conn.cursor()

def finish():
"""Closes connections to databases, etc."""
global conn, curs
conn.commit()
conn.close()

def confirm_yesnoquit():
"""Returns True if the user types "yes" or something similar, False for "no",
or None for "quit"."""
# Note that raw_input returns the empty string for "enter"
yes = {'yes','y', 'ye', ''}
no = {'no','n'}
quits = {'quit','q'}

choice = raw_input().lower() # in Python 3, this is input().lower()
if choice in yes:
return True
elif choice in no:
return False
elif choice in quits:
return None
else:
sys.stdout.write("Please respond with 'yes', 'no', or 'quit'")

def tobe_permanent( error_history, error_in='ERROR 404', min_interval=5, min_errors=3 ):
"""If the error_history records repeated errors, that call for changing a file's status
to a permanent error, return the new status. Otherwise return None.
error_history should be a list of 2-tuples (date,error). The input error to check
for should be supplied. Presently (and probably forever) if must be one of
"bad checksum" or "ERROR 404".
The minimum interval between errors may be supplied, and defaults to 5.
The minimum number of errors may be supplied, and defaults to 3."""
if len(error_history)<min_errors:
return None
return_error = { 'ERROR 404':'error-badurl', 'bad checksum':'error-checksum' }
assert error_in in return_error
dates = [ e[0] for e in error_history if e[1]==error_in ]
if len(dates)<min_errors:
return None
date_last_error = parse(dates[0])
nerrors = 1
dates.sort()
for i in range(1,len(dates)):
# Get the time since the last error.
# Converting to totalseconds lets us support fractional days.
interval = (parse(dates[i])- date_last_error).total_seconds()/3600./24
if interval>=min_interval:
# dates[i] is at least min_interval days after the previous error date.
nerrors += 1
date_last_error = parse(dates[i])
if nerrors>=min_errors:
return return_error[error_in]
else:
return None

def mark_permanent_errors( min_interval=5, nrepeats=3, dryrun=True, confirm=True ):
"""Check the database for files with 'error' status, whose error_history represents
repeated errors, either 'ERROR 404' or 'bad checksum'. For each such file, change its status
to a permanent one (not affected by "synda retry"): 'error-badurl' or 'error-checksum'.
The minimum interval between errors may be supplied, and defaults to 5.
The minimum number of repeated errors may be supplied, and defaults to 3.
"""
global conn, curs
# At present, the shortest possible non-null error string is 45 characters:
# [('2020-06-08 15:17:13.121540', 'ERROR 404')]
# The shortest possible one with two errors recorded is 90 characters:
# [('2020-06-09 10:49:35.255064', 'ERROR 404'), ('2020-06-09 13:47:21.039614', 'ERROR 404')]
# So if we want three errors we only need look at strings with >=45*nrepeats characters.
cmd = "SELECT file_id, filename, error_history FROM file WHERE " +\
"status='error' AND error_history IS NOT NULL AND LENGTH(error_history)>=?"
curs.execute( cmd, (45*nrepeats,) )
results = curs.fetchall()
for result in results:
if result is None:
break
file_id = result[0]
filename = result[1]
error_history = eval(result[2])
new_status = tobe_permanent(error_history,'ERROR 404',min_interval,nrepeats)
if new_status is None:
new_status = tobe_permanent(error_history,'bad checksum',min_interval,nrepeats)
if new_status is not None:
if dryrun:
# print, don't log. This is a debugging mode.
print "file %s is ready for permanent error status as %s"%(filename,new_status)
print " error_history=%s"%error_history
elif confirm:
# Change the error status, but with user confirmation file-by-file
# A filename may have multiple versions, but it's more understandable than file_id.
print "change %s from status 'error' to '%s'?"%(filename,new_status)
yesnoquit = confirm_yesnoquit()
if yesnoquit==True:
cmd = "UPDATE file SET status=? WHERE file_id=?"
cmd_vars = ( new_status, file_id )
curs.execute( cmd, cmd_vars )
print "changed status to '%s'" % new_status
logging.info( "changed status of %s to '%s'" % (filename, new_status) )
elif yesnoquit==False:
print "leaving status at 'error'"
else:
print "leaving status at 'error' for this and subsequent files"
break
else:
# Change the error status, without asking for confirmation.
# A filename may have multiple versions, but it's more understandable than file_id.
cmd = "UPDATE file SET status=? WHERE file_id=?"
cmd_vars = ( new_status, file_id )
curs.execute( cmd, cmd_vars )
logging.info( "changed status of %s to '%s'" % (filename, new_status) )

if __name__ == '__main__':
# Set up logging and arguments, then call the appropriate 'run' function.
logfile = '/p/css03/scratch/logs/permanent_error.log'
logging.basicConfig( filename=logfile, level=logging.INFO, format='%(asctime)s %(message)s' )

p = argparse.ArgumentParser(
description="Convert repeated errors after multiple retries to permanent ones,"+
" in some cases." )
p.add_argument( "--interval", dest="interval", required=False, type=float, default=5, help=
"minimum interval between errors for both to be considered; in days." )
p.add_argument( "--nrepeats", dest="nrepeats", required=False, type=int, default=3, help=
"number of repeated errors to change the error to a permanent one." )
p.add_argument('--dryrun', dest='dryrun', action='store_true' )
p.add_argument('--no-dryrun', dest='dryrun', action='store_false' )
p.add_argument('--confirm', dest='confirm', action='store_true', default=False )
p.add_argument('--no-confirm', dest='confirm', action='store_false', default=False )
p.set_defaults( dryrun=False, confirm=False )

args = p.parse_args( sys.argv[1:] )

setup()
logging.info( "started permanent_error_status, args=%s"%args )
mark_permanent_errors( args.interval, args.nrepeats, dryrun=args.dryrun,
confirm=args.confirm )
finish()

Loading

0 comments on commit 5db6fec

Please sign in to comment.