#!/usr/bin/env python3 import os import sys import glob import time import sqlite3 import tarfile run_nb_str=str(sys.argv[1]) # The following are top level directories which should contain # subdirectories named for the run periods. srcdir = '/lustre/expphy/volatile/halld/offsite_prod/' destdir = '/cache/halld/' sqlite_file = '/lustre/expphy/volatile/halld/offsite_prod/move_to_tape_multi%s.sqlite' # %s will be replaced with date/time #TESTMODE = True TESTMODE = False # The following can be used to limit the script to only a few directories #primary_pattern = 'RunPeriod*/recon/ver*/RUN*/FILE*' #primary_pattern = 'RunPeriod*/recon/ver*/RUN071473/FILE00*/*/*' #primary_pattern = 'RunPeriod*/recon/ver*/RUN071474/FILE00*/*/*/' #primary_pattern = 'RunPeriod*/recon/ver*/RUN071475/FILE00*/*/*/' #primary_pattern = 'RunPeriod*/recon/ver*/RUN071477/FILE00*/*/*' #primary_pattern = 'RunPeriod*/recon/ver*/RUN071478/FILE00*/*/*' #primary_pattern = 'RunPeriod*/recon/ver*/RUN0' + run_nb_str + '/FILE*/*/*' primary_pattern = 'RunPeriod*/recon/ver*/RUN0' + run_nb_str + '/FILE*' #/lustre/expphy/volatile/halld/offsite_prod/RunPeriod-2019-11/recon/ver*/*/*/ #/lustre/expphy/volatile/halld/offsite_prod/RunPeriod*/recon/ver*/RUN071473/FILE*/*/*/hd_rawdata_*.random.evio #/lustre/expphy/volatile/halld/offsite_prod/RunPeriod-2019-11/recon/ver01/RUN071475/FILE163/RUN071475/FILE163/ VERBOSE = 1 ERROR_ON_DIR_PERMISSIONS = True ERROR_ON_FILE_EXISTING = False REPLACE_EXISTING_FILES = True MIN_AGE_SECONDS = 3600*4 # Dictionary of output file names and file location/name. # The key will be passed to glob() to find matching files # and the directory names will be used to determine the # run/file number. The destination will have %06d replaced # with the run number and %s replaced with the string: # RFSTR = '%06d_%03d' % (RUN, FILE) outfiles = {} outfiles['job_info_*.tgz' ] = 'job_info/%06d/job_info_%s.tgz' outfiles['tree_fcal_hadronic_eff.root' ] = 'tree_fcal_hadronic_eff/%06d/tree_fcal_hadronic_eff_%s.root' outfiles['tree_bcal_hadronic_eff.root' ] = 'tree_bcal_hadronic_eff/%06d/tree_bcal_hadronic_eff_%s.root' outfiles['tree_TS_scaler.root' ] = 'tree_TS_scaler/%06d/tree_TS_scaler_%s.root' outfiles['p3pi_excl_skim.root' ] = 'p3pi_excl_skim/%06d/p3pi_excl_skim_%s.root' outfiles['tree_trackeff.root' ] = 'tree_trackeff/%06d/tree_trackeff_%s.root' outfiles['tree_tof_eff.root' ] = 'tree_tof_eff/%06d/tree_tof_eff_%s.root' outfiles['tree_sc_eff.root' ] = 'tree_sc_eff/%06d/tree_sc_eff_%s.root' outfiles['tree_TPOL.root' ] = 'tree_TPOL/%06d/tree_TPOL_%s.root' outfiles['tree_PSFlux.root' ] = 'tree_PSFlux/%06d/tree_PSFlux_%s.root' outfiles['hd_rawdata_*.sync.evio' ] = 'sync/%06d/sync_%s.evio' outfiles['hd_rawdata_*.random.evio' ] = 'random/%06d/random_%s.evio' outfiles['hd_rawdata_*.omega.evio' ] = 'omega/%06d/omega_%s.evio' outfiles['hd_rawdata_*.ps.evio' ] = 'ps/%06d/ps_%s.evio' outfiles['hd_rawdata_*.FCAL-LED.evio' ] = 'FCAL-LED/%06d/FCAL-LED_%s.evio' outfiles['hd_rawdata_*.DIRC-LED.evio' ] = 'DIRC-LED/%06d/DIRC-LED_%s.evio' outfiles['hd_rawdata_*.CCAL-LED.evio' ] = 'CCAL-LED/%06d/CCAL-LED_%s.evio' outfiles['hd_rawdata_*.BCAL-LED.evio' ] = 'BCAL-LED/%06d/BCAL-LED_%s.evio' outfiles['dana_rest.hddm' ] = 'REST/%06d/dana_rest_%s.hddm' outfiles['hd_root.root' ] = 'hists/%06d/hd_root_%s.root' outfiles['converted_random_*.hddm' ] = 'converted_random/%06d' # Open SQLite file to keep track of what we do date_time_str = time.strftime('%Y.%m.%d-%H:%M') if TESTMODE: date_time_str += '-TESTMODE' sqlite_file = sqlite_file.replace('%s', date_time_str) conn = sqlite3.connect(sqlite_file) c = conn.cursor() sql = ''' CREATE TABLE IF NOT EXISTS file_moves( run INTEGER, file INTEGER, from_fname TEXT, to_fname TEXT, time TEXT );''' c.execute(sql) sql = ''' CREATE TABLE IF NOT EXISTS dir_creates( dirname TEXT, time TEXT );''' c.execute(sql) sql = ''' CREATE TABLE IF NOT EXISTS exitcodes( exitcode INTEGER, run INTEGER, file INTEGER, fname TEXT, file_modification_time TEXT, time TEXT );''' c.execute(sql) conn.commit() # First get exit codes of hd_root for all directories. We only # want to move files that exited cleanly. These files will be # stored in the job_info_*.tgz files so we must open them and # extract the exitcode file from each. pattern = srcdir + primary_pattern + '/job_info*.tgz' print('Looking for ' + pattern + ' ...') if VERBOSE>1 : print('pattern: ' + pattern) job_info_files = sorted(glob.glob( pattern )) dirs_with_good_exit_codes = [] for f in job_info_files: try: t = tarfile.open( f, 'r' ) for exitcode_filename in [x.name for x in t.getmembers() if x.name.endswith('/exitcode')]: fil = t.extractfile( exitcode_filename ) exitcode = int( fil.read(10) ) # Read exitcode file contents and convert to int run = int(f.split('/RUN')[1][:6]) # assume first occurance of /RUNXXXXXX/ has run number fil = int(f.split('/FILE')[1][:3]) # assume first occurance of /FILEYYY/ has file number mtime = time.strftime('%Y.%m.%d-%H:%M', time.localtime(os.path.getmtime(f))) now = time.strftime('%Y.%m.%d-%H:%M') sql = 'INSERT INTO exitcodes (exitcode,run,file,fname,file_modification_time,time) VALUES(%d,%d,%d,"%s","%s","%s")' % (exitcode,run,fil,f,mtime,now) c.execute(sql) if exitcode == 0 : runfile_rel = os.path.split(f.split(srcdir)[1])[0] # e.g. RunPeriod-2019-11/recon/ver01/RUN071526/FILE027 dirs_with_good_exit_codes += [runfile_rel] if VERBOSE>1: print('%s exitcode: %d' % (f, exitcode)) except Exception as e: print(e) file_count = {} # dictionary with key being file pattern (i.e. key from outfiles) and val being number of matching files Ndirs_made = 0 Nfiles_to_move = 0 Nfiles_moved = 0 Nfiles_not_replaced = 0 Nfiles_to_replace = 0 Nfiles_too_young = 0 # Loop over source file patterns dirs_without_good_exit_codes = set([]) dirs_made = set([]) for srcpat,destpat in outfiles.items(): pattern = srcdir + primary_pattern + '/' + srcpat print('Looking for ' + pattern + ' ...') if VERBOSE>1 : print('pattern: ' + pattern) files = sorted(glob.glob( pattern )) file_count[srcpat] = len(files) # Flush commands to sqlite DB conn.commit() # Loop over source files for this pattern for f in files: # Check file age age = time.time() - os.path.getmtime(f) if age < MIN_AGE_SECONDS: if VERBOSE>1 : print('skipping file that was modified too recently: ' + fullreldir) Nfiles_too_young += 1 continue # Check if file is in directory that had a good exit code for hd_root fullrelpath = f.split(srcdir)[1] # path relative to srcdir (e.g. RunPeriod-2019-11/recon/ver01/RUN071562/FILE001/hd_rawdata_071562_001.sync.evio) fullreldir = os.path.split(fullrelpath)[0] # e.g. RunPeriod-2019-11/recon/ver01/RUN071562/FILE001 if fullreldir not in dirs_with_good_exit_codes: if VERBOSE>1 : print('skipping file with no or bad exit code: ' + fullreldir) dirs_without_good_exit_codes.add(fullreldir) continue # No good exit code was found # Extract important info from path dirs = fullrelpath.split('/') RUN = int(dirs[3].split('RUN')[1]) FILE = int(dirs[4].split('FILE')[1]) if VERBOSE>1 : print('RUN: %d FILE:%d' % (RUN,FILE)) # Path relative to both srcdir and destdir. e.g. RunPeriod-2019-11/recon/ver01 relpath = fullrelpath.split('/RUN')[0] # Determine final filename with fullpath to destination RUNSTR = '%06d' % RUN FILESTR = '%03d' % FILE RFSTR = '%06d_%03d' % (RUN, FILE) dest_fname = destpat dest_fname = dest_fname.replace('%06d', RUNSTR) dest_fname = dest_fname.replace('%03d', FILESTR) dest_fname = dest_fname.replace('%s' , RFSTR) dest_fname = destdir + relpath + '/' + dest_fname # Check if destination directory exists and if not, make it d_final,f_final = os.path.split(dest_fname) if not os.path.isdir(d_final): try: if d_final not in dirs_made: if VERBOSE>1 : print('making directory: ' + d_final) if not TESTMODE: os.makedirs(d_final) Ndirs_made += 1 now = time.strftime('%Y.%m.%d-%H:%M') sql = 'INSERT INTO dir_creates (dirname,time) VALUES("%s","%s")' % (d_final,now) c.execute(sql) dirs_made.add(d_final) except: print('Error making destination directory: ' + d_final) sys.exit(1) # Check that we have write permission to the destination directory pretend_dir_is_ok = TESTMODE and (d_final in dirs_made) if (not os.access(d_final, os.W_OK)) and (not pretend_dir_is_ok): if ERROR_ON_DIR_PERMISSIONS: print('ERROR_ON_DIR_PERMISSIONS: Don\'t have write permission to: ' + d_final) sys.exit(2) if VERBOSE>1 : print('WARNING: Don\'t have write permission to: ' + d_final) # Check if file already exists at destination if os.path.exists(dest_fname): if ERROR_ON_FILE_EXISTING: # File exists but we're configured to consider that an error print('ERROR_ON_FILE_EXISTING: Don\'t have write permission to: ' + dest_fname) sys.exit(3) if not REPLACE_EXISTING_FILES: # File exists and we're configured not to overwrite it if VERBOSE>1 : print('keeping existing file: ' + dest_fname) Nfiles_not_replaced += 1 continue if REPLACE_EXISTING_FILES and not os.access(dest_fname, os.W_OK): # File exists and we're configured to overwrite it, but don't have permission print('ERROR: Don\'t have write permission to replace: ' + dest_fname) sys.exit(4) Nfiles_to_replace += 1 # At this point, we should have permission to move the file, # replacing any existing if necessary. if VERBOSE>1 : print('Moving: ' + f + ' => ' + dest_fname) run = int(f.split('/RUN')[1][:6]) # assume first occurance of /RUNXXXXXX/ has run number fil = int(f.split('/FILE')[1][:3]) # assume first occurance of /FILEYYY/ has file number Nfiles_to_move += 1 sql = 'INSERT INTO file_moves (run,file,from_fname,to_fname, time) VALUES(%d,%d,"%s","%s","%s")' % (run, fil, f, dest_fname, now) c.execute(sql) if not TESTMODE: try: os.replace(f, dest_fname) Nfiles_moved += 1 now = time.strftime('%Y.%m.%d-%H:%M') except Exception as e: print('ERROR moving file: ' + f + ' => ' + dest_fname) sys.exit(5) # Flush commands to sqlite DB conn.commit() print('====================================================================') print('Summary: ', end = '') if TESTMODE: print(' *** TEST MODE ***', end='') print('') print('-------------------------------------------\n') print(' source directory top: ' + srcdir) print('destination directory top: ' + destdir) print(' primary match pattern: ' + primary_pattern) print('') print('Number of files by pattern:') for pat,N in file_count.items(): print('%5d %s' % (N, pat)) print('\nStats: ') print('-------------------------------------------') print(' Num. directories made: %d' % Ndirs_made) print(' Num. files targeted for move: %d' % Nfiles_to_move) print(' Num. files actually moved: %d' % Nfiles_moved) print(' Num. files already existing: %d' % (Nfiles_not_replaced+Nfiles_to_replace)) print(' Num. files skipped due to already existing: %d' % Nfiles_not_replaced) print('Num. directories without good exit hd_root code: %d' % len(dirs_without_good_exit_codes)) print(' Num. files skipped due to modification time: %d' % Nfiles_too_young) print('') print(' SQLite file: %s' % sqlite_file) print('====================================================================')