#!/usr/bin/env python #-*- coding: ISO-8859-1 -*- # # Ported to GlueX by Sean Dobbs (s-dobbs@northwestern.edu), 2014 # # Copyright 2004, 2005 # Cornell University, Ithaca, NY 14853. All rights reserved. # # Author: Valentin Kuznetsov, 2004 # # This utility counts events for given run/grade/timeStamp in EventStore """ESEventCounter counts events for given run/grade/timeStamp in EventStore""" import os, sys, string, re, time import esdb_auth, os_path_util, es_init, gen_util, sql_util, key_dump from es_init import ESInit, checkArg import SOAPpy # include python profiler only if python version greater then 2.3 if es_init.checkPythonVersion("2.3"): import hotshot # Python profiler import hotshot.stats # profiler statistics ##### FIX THIS class MetaData_Services: def __init__(self): CLEO_wsdl = 'http://cougar.cs.cornell.edu/CLEO/CLEO_WS.asmx?WSDL' CLEO_namespace = 'http://cleo.lepp.cornell.edu/CLEO' config = SOAPpy.SOAPConfig () config.debug = 0 config.buildWithNamespacePrefix = False self.proxy = SOAPpy.WSDL.Proxy(CLEO_wsdl,config,namespace=CLEO_namespace,noroot=1) meths = self.proxy.methods.values() for m in meths: m.namespace = CLEO_namespace def GetRuns(self,datasetName,energy=''): dict = self.proxy.GetRuns(Dataset_Name=datasetName,Energy_Range_Name=energy) returnList = dict.__dict__.items() # returnList consists of set of tuples: # type, keyVal, ns, name, typed, cache, data, keyord, attrs retType, runList = returnList[1] return runList def GetDatasetNames(self): dict = self.proxy.GetDatasetNames() returnList = dict.__dict__.items() # returnList consists of set of tuples: # type, keyVal, ns, name, typed, cache, data, keyord, attrs retType, runList = returnList[1] return runList def GetEnergyRangeNames(self): dict = self.proxy.GetEnergyRangeNames() returnList = dict.__dict__.items() # returnList consists of set of tuples: # type, keyVal, ns, name, typed, cache, data, keyord, attrs retType, runList = returnList[1] return runList def printSOAPMethods(self): print '%d methods in WSDL:' % len(self.proxy.methods) + '\n' for key in self.proxy.methods.keys(): print key print def countRecords(keyFileDict): sDict = {} runList = keyFileDict.keys() runList.sort() for run in runList: sList = [] firstFile = "" for file in keyFileDict[run]: if not len(sList): sList = key_dump.countEvents(file) firstFile = file else: tmpList = key_dump.countEvents(file) for idx in xrange(0,len(sList)): if sList[idx][0]!=tmpList[idx][0]: msg = "records mismatch between:" msg+= firstFile msg+= sList msg+= file msg+= tmpList raise msg rec = sList[idx] newRec = (rec[0],rec[1]+tmpList[idx][1]) sList[idx] = newRec msg = "Run %s :"%run counter=0 for item in sList: stream = item[0] nRec = item[1] if inStream=="all": if not counter: msg+=" %s in %s's\n"%(nRec,stream) else: msg+=" %s in %s's\n"%(nRec,stream) counter+=1 elif stream==inStream: msg+=" %s in %s's"%(nRec,stream) if sDict.has_key(stream): total = sDict[stream]+nRec sDict[stream]=total else: sDict[stream]=nRec print msg print "----------------------------------------------" counter=0 for key in sDict.keys(): if inStream=="all": if not counter: print "Total : %s in %s's"%(sDict[key],key) else: print " %s in %s's"%(sDict[key],key) counter+=1 elif inStream==key: print "Total : %s in %s's"%(sDict[key],key) def getKeyFileList(runList,graphid,view): # get list of key files which cover required run range keyFileList = [] for run in runList: query = "SELECT keyFileId FROM KeyFile WHERE graphid='%s' AND run=%s AND view='%s'"%(graphid,run,view) tup = sqlIn.fetchAll(query) if verbose: print query print tup for item in tup: query = "SELECT fileName FROM FileID WHERE fileId=%s"%item[0] tup = sqlIn.fetchOne(query) if verbose: print query print tup if tup and tup[0]: keyFileList.append((run,tup[0])) return keyFileList def getKeyFileDict(runList,graphid,view): keyFileDict = {} for run in runList: query = "SELECT keyFileId FROM KeyFile WHERE graphid='%s' AND run=%s AND view='%s'"%(graphid,run,view) tup = sqlIn.fetchAll(query) for item in tup: query = "SELECT fileName FROM FileID WHERE fileId=%s"%item[0] tup = sqlIn.fetchOne(query) if tup and tup[0]: if keyFileDict.has_key(run): eList = keyFileDict[run] eList+=[tup[0]] keyFileDict[run]=eList else: keyFileDict[run]=[tup[0]] return keyFileDict # # main # if __name__ == "__main__": # initialize user options localOpt=["[ -grade ] [ -time ] [ -skim ]"] localOpt+=["[ -runRange -dataVersionName |"] localOpt+=[" -dataset -energy ]"] localOpt+=["[ -instream ]"] localOpt.append("") usage=es_init.helpMsg("ESEventCounter",localOpt) usageDescription=""" Option description (required options marked with '*'): * -grade: specifies the grade, e.g. "physics", "p2-unchecked" you need to provide two of them, 'old' from which you're reading and 'new' to which you're moving * -time: specifies the timeStamp, e.g. 20090227 you may provide one or two of them, 'old' from which you're reading and 'new' to which you're moving -dataVersionName specifies a data version name associated with given grade. It can be found by using ESDump command. -runRange specifies a run range within ESVersionManager will operate -dataset specifies a data set name, e.g. data32 -energy specifies an enenrgy name, e.g. psi(2S) -skim specifies the view you want to use, e.g. "2photon" -instream specifies the stream you're interesting, e.g. -instream beginrun once dropped ESEventCounter will count everything in every found stream """ examples=""" ESEventCounter -db EventStore@lnx150.lns.cornell.edu -grade physics -time 20050520 -runRange 213038 213040 -view bhagam count events(records) in every stream for runs in 213038-213040 range for physics grade and time stamp closed to 20050520. ESEventCounter -db EventStore@lnx150.lns.cornell.edu -grade physics -time 20050520 -runRange 213038 213038 -instream event count only events (events in event stream) for run 213038, physics grade and time stamp closed to 20050520. ESEventCounter -db EventStore@lnx150.lns.cornell.edu -grade physics -time 20050520 -dataset data39 -instream event count only events (events in event stream) for runs from data39, physics grade and time stamp closed to 20050520. ESEventCounter -db EventStore@lnx150.lns.cornell.edu -grade physics -time 20050520 -energy "psi(2S)" count only events (events in event stream) for runs in psi(2S), physics grade and time stamp closed to 20050520. """ userCommand="ESEventCounter.py" # get EventStore option list optList,dictOpt = es_init.ESOptions(userCommand,sys.argv,usage,usageDescription,examples) dbName,dbHost,userName,userPass,dbPort,dbSocket = optList[0] historyFile,logFile,verbose,profile = optList[1] userCommand = optList[2]+" " userName = 'cleo' userPass = 'cleoc' outputLog, globalLog = es_init.ESOutputLog(logFile) dbInHost=bInName=dbOutHost=dbOutHost=oldDir=newDir="" grade = "" timeS = "" minR = 0 maxR = 0 svName = "" view = "all" inStream = "all" dataset = "" energy = "" badRunList = 0 goodRunList= 0 x = 1 fileRunList= "" while x < len(sys.argv): try: if string.lower(sys.argv[x]) == "-db" : dbInName,dbInHost,dbInPort,dbInSocket=es_init.decodeHostNameString(sys.argv[x+1]) x+=2 continue if sys.argv[x]=="-runRange": minR = int(sys.argv[x+1]) msg = "you need to use -runRange " if x+2=minR and run<=maxR: runList.append(run) if not len(runList): continue if verbose: print "minR",minR,"maxR",maxR,"runList",runList # get list of key files which cover required run range keyFileDict = getKeyFileDict(runList,graphid,view) if not len(keyFileDict.keys()): print "Runs in '%s': N/A\n"%svName else: print "Runs in '%s':"%svName countRecords(keyFileDict)