#!/usr/bin/env python # # 2014/11/10 Sean Dobbs (s-dobbs@northwestern.edu) # # This script should be run after offline monitoring ROOT files are generated # Runs through offline monitoring ROOT files, generates PNG images and inserts summary data to the website # # Takes three arguments: # # 1. the date string that specifies when the monitoring jobs were started # 2. the base directory where the monitoring files are stored # We assume a directory of the form /volatile/halld/RunPeriod-2014-10/offline_monitoring/$DATE/$DATATYPE/RRRRRR/ # 3. the directory to store the PNG files (and other associated processing files) # import sys,os,errno from os import listdir from os.path import isfile, join from optparse import OptionParser # monitoring libraries from datamon_db import datamon_db import make_monitoring_plots import process_monitoring_data import process_run_conditions ############################################ ### GLOBALS #PROCESSED_RUN_LIST_FILE = "processedrun.lst" ROOTFILE_DIR = "ROOT" VERSION_NUMBER = -1 MAKE_PLOTS = True MAKE_DB_SUMMARY = True MAKE_RUN_CONDITIONS = False FORCE_PROCESSING = False RUN_NUMBER = None NEWDIR_MODE = "775" MIN_RUN = -1 MAX_RUN = 1000000 ############################################ def mkdir_p(path): try: os.makedirs(path) except OSError as exc: # Python >2.5 if exc.errno == errno.EEXIST and os.path.isdir(path): pass else: raise ### START PROCESSING # read in command line args parser = OptionParser(usage = "process_new_offline_data.py input_directory output_directory") parser.add_option("-p","--disable_plots", dest="disable_plotting", action="store_true", help="Don't make PNG files for web display") parser.add_option("-d","--disable_summary", dest="disable_db_summary", action="store_true", help="Don't calculate summary information and store it in the DB") #parser.add_option("-c","--disable_conditions", dest="disable_run_conditions", action="store_true", # help="Don't process and store run conditions information") parser.add_option("-C","--process_conditions", dest="process_run_conditions", action="store_true", help="Process and store run conditions information") parser.add_option("-f","--force", dest="force", action="store_true", help="Ignore list of already processed runs") parser.add_option("-R","--run_number", dest="run_number", help="Process only this particular run number") parser.add_option("-V","--version_number", dest="version_number", help="Save summary results with this DB version ID") parser.add_option("-v","--version", dest="version_string", help="Save summary results with a particular data version, specified using the string \"RunPeriod,Revision\", e.g., \"RunPeriod-2014-10,5\"") parser.add_option("-b","--min_run", dest="min_run", help="Minimum run number to process") parser.add_option("-e","--max_run", dest="max_run", help="Maximum run number to process") (options, args) = parser.parse_args(sys.argv) if(len(args) < 3): parser.print_help() sys.exit(0) REVISION = args[1] INPUT_DIRECTORY = args[2] OUTPUT_DIRECTORY = args[3] # initialize DB db = datamon_db() if(options.disable_plotting): MAKE_PLOTS = False if(options.disable_db_summary): MAKE_DB_SUMMARY = False #if(options.disable_run_conditions): # MAKE_RUN_CONDITIONS = False if(options.process_run_conditions): MAKE_RUN_CONDITIONS = True if(options.force): FORCE_PROCESSING = True if(options.run_number): try: RUN_NUMBER = int(options.run_number) except ValueError: print "Invalid run number = " + options.run_number sys.exit(0) if RUN_NUMBER <= 0: print "Invalid run number = " + options.run_number sys.exit(0) if(options.version_string): try: revision = -1 (run_period,revision_str) = options.version_string.split(",") try: revision = int(revision_str) except ValueError: print "Invalid revision = " + revision sys.exit(0) VERSION_NUMBER = db.GetVersionIDRunPeriod(run_period, revision) if(VERSION_NUMBER<0): print "version not found in DB = " + options.version_string sys.exit(0) except: print "Invalid version specification = " + options.version_string sys.exit(0) print "Configured RunPeriod = %s Revision = %d -> VersionID = %d" % (run_period,revision,VERSION_NUMBER) if(options.version_number): try: VERSION_NUMBER = int(options.version_number) except ValueError: print "Invalid version number = " + options.version_number sys.exit(0) if VERSION_NUMBER <= 0: print "Invalid version number = " + options.version_number sys.exit(0) if options.min_run: MIN_RUN = int(options.min_run) if options.max_run: MAX_RUN = int(options.max_run) # check to see if the input directory is real if not os.path.isdir(INPUT_DIRECTORY): print "Invalid input directory specified = " + INPUT_DIRECTORY sys.exit(0) # make the output directiory if it doesn't already exist if os.path.exists(OUTPUT_DIRECTORY) and not os.path.isdir(OUTPUT_DIRECTORY): print "File already exists and is not a directory = " + OUTPUT_DIRECTORY sys.exit(0) if not os.path.exists(OUTPUT_DIRECTORY): print "Creating directory " + OUTPUT_DIRECTORY + " ... " os.system("mkdir -m"+NEWDIR_MODE+" -p " + OUTPUT_DIRECTORY) ## need error checks # allow for incremental processing ... #run_list = [] #if not FORCE_PROCESSING and os.path.exists( join(OUTPUT_DIRECTORY,PROCESSED_RUN_LIST_FILE) ): # # read in list of runs we've already processed # try: # runlist_file = open(join(OUTPUT_DIRECTORY,PROCESSED_RUN_LIST_FILE)) # for line in runlist_file: # try: # runnum = int(line.strip()) # except ValueError: # print "Unexpected value in run file = " + line.strip() + " , skipping..." # else: # #print "processed run number = " + str(runnum) # run_list.append( runnum ) # runlist_file.close() # except IOError as e: # print "I/O error({0}): {1}".format(e.errno, e.strerror) # except: # print "Unexpected error:", sys.exc_info()[0] # sys.exit(0) ### rundirs_on_disk = [] dirs_on_disk = [ d for d in listdir(join(INPUT_DIRECTORY,REVISION,ROOTFILE_DIR)) if os.path.isdir(join(INPUT_DIRECTORY,REVISION,ROOTFILE_DIR,d)) ] for dirname in sorted(dirs_on_disk): try: runnum = int(dirname) except ValueError: print "skipping directory " + dirname + " ..." else: #if runnum not in run_list : #print "run number = " + str(runnum) if RUN_NUMBER is None: rundirs_on_disk.append(dirname) else: if runnum == RUN_NUMBER: rundirs_on_disk.append(dirname) # save processed runs #try: # runlist_file = open(join(OUTPUT_DIRECTORY,PROCESSED_RUN_LIST_FILE),'a') #except IOError as e: # print "I/O error({0}): {1}".format(e.errno, e.strerror) #except: # print "Unexpected error:", sys.exc_info()[0] # sys.exit(0) # do the heavy work for each directory - one run per directory for rundir in rundirs_on_disk: runnum = int(rundir) if runnumMAX_RUN: continue print "checking run " + str(runnum) ## add blank run to DB if it doesn't exist if(db.GetRunID(runnum) < 0): db.CreateRun(runnum) rootfilespath = join(INPUT_DIRECTORY,REVISION,ROOTFILE_DIR) root_files = [ join(rootfilespath,rundir,f) for f in listdir(join(rootfilespath,rundir)) if (isfile(join(rootfilespath,rundir,f))and(f[-5:]=='.root')) ] # add directory if it doesn't exist misc_dir = join(INPUT_DIRECTORY,REVISION,"misc",rundir) if not os.path.exists(misc_dir): os.system("mkdir -p " + misc_dir) if not os.path.isdir(misc_dir): print "file %s exists and is not a directory, skipping this run ..."%misc_dir rootfilelist_fname = join(misc_dir,"rootfiles.txt") ## only run over files that haven't already been processed ## we use the "rootfiles.txt" file as a store of which files have been already processed new_files_exist = False monitoring_files = {} new_monitoring_files = {} processed_files = [] if isfile(rootfilelist_fname): rootfilelist_file = open(rootfilelist_fname) processed_files = rootfilelist_file.read().splitlines() rootfilelist_file.close() #print "procssed files = " + str(processed_files) #print "current files = " + str(root_files) for filepath in sorted(root_files): fname = filepath.split('/')[-1] filenum = -1 fname_fields = fname[:-5].split("_") # sanity checks if(len(fname_fields) < 4): print "invalid filename = " + fname + ", skipping ..." continue if( (fname_fields[0]!="hd") or (fname_fields[1]!="root") ): print "invalid filename = " + fname + ", skipping ..." continue try: file_runnum = int(fname_fields[2]) filenum = int(fname_fields[3]) except ValueError: print "invalid filename = " + fname + ", skipping ..." continue if file_runnum != runnum : print "invalid filename = " + fname + ", skipping ..." continue # save a mapping of the files to processed with their number within the run monitoring_files[filepath] = filenum # check to see if we've processed this file already if filepath not in processed_files: new_monitoring_files[filepath] = filenum new_files_exist = True ## skip further processing if it's not needed if not FORCE_PROCESSING and not new_files_exist: continue if len(monitoring_files) == 0: continue files_to_process = {} if FORCE_PROCESSING: files_to_process = monitoring_files else: files_to_process = new_monitoring_files # loop over the files and do any pre-file processing we need to do for (fname,filenum) in monitoring_files.items(): # we are good! let's get some work done print "processing run " + str(runnum) + " file " + str(filenum) + " ..." # process monitoring data for each file if MAKE_DB_SUMMARY: cmdargs = "--file_number " + str(filenum) + " " + str(runnum) + " " + str(VERSION_NUMBER) + " " + fname print " analyzing DB info..." print "process_monitoring_data " + cmdargs process_monitoring_data.main(cmdargs.split()) # sum all the files and place them in a web-viewable location summed_rootfile = join(OUTPUT_DIRECTORY,"rootfiles","hd_root_" + rundir + ".root") if isfile(summed_rootfile): os.system("rm -f " + summed_rootfile) os.system("mkdir -m"+NEWDIR_MODE+" -p " + join(OUTPUT_DIRECTORY,"rootfiles")) # note hadd -k skips corrupt or missing files - we want to do our best but not fail here os.system("hadd -v 0 " + " ".join([summed_rootfile] + monitoring_files.keys() )) # save the current list of files monitoring_file_list = open(rootfilelist_fname,"w") for fname in sorted(monitoring_files.keys()): print>>monitoring_file_list, fname monitoring_file_list.close() # make plots for the sum of all files in the run if MAKE_PLOTS: cmdargs = " --histogram_list histograms_to_monitor" cmdargs += " --macro_list macros_to_monitor " monitoring_data_dir = join(OUTPUT_DIRECTORY,("Run%06d" % runnum)) #mkdir_p(monitoring_data_dir) os.system("mkdir -m"+NEWDIR_MODE+" -p " + monitoring_data_dir) ## need error checks cmdargs += " --output_dir " + monitoring_data_dir cmdargs += " --file_list " + rootfilelist_fname print " creating plots..." make_monitoring_plots.main(cmdargs.split()) if MAKE_RUN_CONDITIONS: # update the run metadata cmdargs = str(runnum) print " saving conditions..." process_run_conditions.main(cmdargs.split()) # cleanup #runlist_file.close()