#!/usr/bin/env python ############################################################### # # This script will generate a tarball containing binaries # and scripts that can be run to benchmark a system using # GlueX simulation and reconstruction software. For this # to work, you need to run it on a "host" system where # the sim-recon package has already been built and is pointed # to by the HALLD_HOME environment variable. The complete # list of environment variables that need to be defined is: # # HALLD_HOME # BMS_OSNAME # JANA_CALIB_URL # JANA_GEOMETRY_URL # ROOTSYS # # # The following environment variables are used, but optional: # # HOST # USER # # Run this script in any directory and it will produce a # tarball in the current directory. This will take several # minutes since it needs to run a few events through each # program in order to gather the resource files needed # so they can be included in the tarball. # # > mk_benchmark_tarball.py # # # To use it, unpack the tarball on the "target" system # and read the README file it contains for further # instructions. # # The primary motivation for doing this is to provide a # mechanism that can be used to benchmark computer systems # one is considering purchasing or with different system # configurations in order to optimize them for GlueX use. # In principle, the binaries should run on most Linux systems # so long as the bitness of the kernel is not less than # that of the binaries (e.g. binaries from a 64bit host # run on a 32bit target). It should be OK the other way # around though (binaries from 32bit host on 64bit target). # The kernel generation may also matter (e.g. 2.4 vs. 2.6). # Only limited testing of this has been done. # # # contact: # davidl@jlab.org x5567 # ############################################################### import subprocess import sys,os import string import stat import time Nevents = 5000 # Number of events to process all_libs = [] ld_linux_so = '' # dynamic linker #------------------------------------------------- # AddLibraries # # Run the 'ldd' command on the given binary and # add any shared libraries not already in the # all_libs global list to it. #------------------------------------------------- def AddLibraries(binname): global all_libs, ld_linux_so libs = [] result = subprocess.Popen(["ldd", binname], stdout=subprocess.PIPE).communicate()[0] lines = result.rstrip().split('\n') for line in lines: tokens = string.split(line) # Look for normal libraries if len(tokens)>2 : libname = tokens[2] if string.find(libname, "/") == 0 : if libname not in all_libs: all_libs.append(libname) # look for dynamic linker if len(tokens)==2 : if string.find(tokens[0], '/ld-linux') >=0 : ld_linux_so = tokens[0] if ld_linux_so not in all_libs: all_libs.append(ld_linux_so) pos = string.rfind(ld_linux_so, '/') if pos>0 : ld_linux_so = ld_linux_so[pos+1:] #------------------------------------------------- # WriteStringToFile # # Write the given string to the specified file. # If the make_executable flag is set to True, then # permissions will be set to make the executable # world readable. #------------------------------------------------- def WriteStringToFile(fname, str, make_executable=False): f = open(fname, "w"); f.write(str) f.close() if(make_executable): os.chmod(fname, stat.S_IRWXU + stat.S_IRWXG + stat.S_IRWXO) #------------------------------------------------------------------- # Check that JANA_CALIB_URL is set and points to SQLite file JANA_CALIB_URL = os.getenv("JANA_CALIB_URL", "not defined") if not string.find(JANA_CALIB_URL, "sqlite:///")==0 : print "JANA_CALIB_URL environment variable does not point to" print "SQLite file. An SQLite file is required. Set the" print "environment variable to something like:" print "" print "setenv JANA_CALIB_URL sqlite:////path/to/calib.sqlite" print "" print "(n.b. the four slashes ('/') )" print "" sys.exit(-1) sqlitefile = JANA_CALIB_URL[10:] if not os.path.isfile(sqlitefile): print 'The SQLite file "%s"' % sqlitefile print '(obtained from JANA_CALIB_URL environment variable)' print 'does not seem to exist. Double check the setting of' print 'JANA_CALIB_URL' sys.exit(-1) # Get short file name (without directory) of SQLite file sqlitefile_short = sqlitefile pos = string.rfind(sqlitefile, '/') if pos>=0 : sqlitefile_short = sqlitefile[pos+1:] # Get the directory of the HDDS geometry from JANA_GEOMETRY_URL JANA_GEOMETRY_URL = os.getenv("JANA_GEOMETRY_URL", "not defined") if not string.find(JANA_GEOMETRY_URL, "xmlfile://")==0 : print "JANA_GEOMETRY_URL environment variable does not start" print "with 'xmlfile://'. Set the" print "environment variable to something like:" print "" print "setenv JANA_GEOMETRY_URL xmlfile:///path/to/main_HDDS.xml" print "" print "(n.b. the three slashes ('/') )" print "" sys.exit(-1) main_HDDS_file = JANA_GEOMETRY_URL[10:] if not os.path.isfile(main_HDDS_file): print 'The HDDS xml file "%s"' % main_HDDS_file print '(obtained from JANA_GEOMETRY_URL environment variable)' print 'does not seem to exist. Double check the setting of' print 'JANA_GEOMETRY_URL' sys.exit(-1) # Get short file name (without directory) of main_HDDS.xml file main_HDDS_file_short = main_HDDS_file hdds_dir = '.' pos = string.rfind(main_HDDS_file, '/') if pos>=0 : main_HDDS_file_short = main_HDDS_file[pos+1:] hdds_dir = main_HDDS_file[:pos] # Make directory structure try: os.mkdir('gluex_benchmark') os.mkdir('gluex_benchmark/lib') os.mkdir('gluex_benchmark/bin') os.mkdir('gluex_benchmark/plugins') os.mkdir('gluex_benchmark/work') os.mkdir('gluex_benchmark/resources') os.mkdir('gluex_benchmark/calib') os.mkdir('gluex_benchmark/root') os.mkdir('gluex_benchmark/root/etc') os.mkdir('gluex_benchmark/root/etc/plugins') os.mkdir('gluex_benchmark/batch') os.mkdir('gluex_benchmark/tmp') except OSError: pass # Define binary locations hd_ana = "%s/%s/bin/hd_ana" % (os.getenv("HALLD_HOME"), os.getenv("BMS_OSNAME")) danarest = "%s/%s/plugins/danarest.so" % (os.getenv("HALLD_HOME"), os.getenv("BMS_OSNAME")) rawevent = "%s/%s/plugins/rawevent.so" % (os.getenv("HALLD_HOME"), os.getenv("BMS_OSNAME")) bggen = "%s/%s/bin/bggen" % (os.getenv("HALLD_HOME"), os.getenv("BMS_OSNAME")) hdgeant = "%s/%s/bin/hdgeant" % (os.getenv("HALLD_HOME"), os.getenv("BMS_OSNAME")) mcsmear = "%s/%s/bin/mcsmear" % (os.getenv("HALLD_HOME"), os.getenv("BMS_OSNAME")) # Find all dependencies for the executables and plugins print "Finding library dependencies ..." AddLibraries(hd_ana) AddLibraries(danarest) AddLibraries(rawevent) AddLibraries(bggen) AddLibraries(hdgeant) AddLibraries(mcsmear) # Look for dependencies of the dependencies iteratively # until we have found them all Nlibs = 0 while len(all_libs) > Nlibs: Nlibs = len(all_libs) tmp = all_libs for lib in tmp: AddLibraries(lib) # Copy shared libraries for lib in all_libs: print "copying %s ..." % lib res = subprocess.Popen(['cp', lib, 'gluex_benchmark/lib']).communicate()[0] # Copy binaries bins = [hd_ana, bggen, hdgeant, mcsmear] for bin in bins: print "copying %s ..." % bin res = subprocess.Popen(['cp', bin, 'gluex_benchmark/bin']).communicate()[0] # Copy plugins plugins = [danarest, rawevent] for plugin in plugins: print "copying %s ..." % plugin res = subprocess.Popen(['cp', plugin, 'gluex_benchmark/plugins']).communicate()[0] # Copy ROOT plugins rootplugins = ['TVirtualStreamerInfo'] for rootplugin in rootplugins: plugin = '%s/etc/plugins/%s' % (os.getenv('ROOTSYS','.'), rootplugin) print "copying %s ..." % plugin res = subprocess.Popen(['cp', '-r', plugin, 'gluex_benchmark/root/etc/plugins']).communicate()[0] # Copy SQLite file print "copying %s ..." % sqlitefile res = subprocess.Popen(['cp', sqlitefile, 'gluex_benchmark/calib']).communicate()[0] # Copy hdds directory print "copying hdds geometry ..." res = subprocess.Popen(['cp', '-r', hdds_dir, 'gluex_benchmark/hdds']).communicate()[0] res = subprocess.Popen(['rm', '-rf', 'gluex_benchmark/hdds/.svn']).communicate()[0] #================== Create test scripts ================== print "Creating test scripts ..." #------- README README =""" README generated: %s platform: %s host: %s user: %s This file and the tarball containing it were generated by the script: https://halldsvn.jlab.org/repos/trunk/scripts/mk_benchmark_tarball.py To run the benchmark, run "run_all_tests" from the directory containing this README file: > ./run_all_tests The script will cd into the work directory and run the programs from there. The output of each program is captured in an output file named with a ".out" suffix. Upon successful completion of all programs, a directory named "results" is created (parallel to the "work" directory) and all of the "*.out" files are copied there. If the /usr/bin/time program is available on the system then it is used to run the programs and the resource usage it gathers is appended to the bottom of the output files. This benchmarking is done using binaries that were compiled on one system and then run on another. This means the system you are benchmarking does not need to have any specific software installed, not even a compiler. All shared libraries and the dynamic linker are copied into the lib directory. This likely includes libc and possibly other critical system libraries the binaries will need to run. Calibration constants and resource files are also included in the bundle. The resources were obtained by running the full program set with a few events on the host system so in principle the target system should not need internet access. contact: David Lawrence x5567 davidl@jlab.org """ t = time.strftime("%Y-%m-%d %H:%M:%S") platform = os.getenv('BMS_OSNAME', 'unknown') host = os.getenv('HOST', 'unknown') user = os.getenv('USER','unknown') WriteStringToFile("gluex_benchmark/README", README % (t, platform, host, user)) #------- run_all_tests run_all_tests = """#!/bin/bash export NTHREADS=$1 export PATH=$PWD/bin:$PATH export LD_LIBRARY_PATH=$PWD/lib:$LD_LIBRARY_PATH export JANA_PLUGIN_PATH=$PWD/plugins export JANA_RESOURCE_DIR=$PWD/resources export JANA_CALIB_URL=sqlite:///${PWD}/calib/%s export JANA_GEOMETRY_URL=xmlfile://${PWD}/hdds/%s export ROOTSYS=$PWD/root export ld_linux_so=$PWD/lib/%s export bindir=$PWD/bin if [ -z "$NTHREADS" ]; then export NTHREADS=1 fi # If /usr/bin/time is present, then use it to gather resource usage export timer="/usr/bin/time --verbose" if [ ! -f /usr/bin/time ]; then export timer="" fi # run programs using the linker from the orginal system export prefix="$timer $ld_linux_so" # Run programs in work directory cd %s echo "-------- Running bggen --------" $prefix $bindir/bggen &> bggen.out wait echo "-------- Running hdgeant --------" $prefix $bindir/hdgeant &> hdgeant.out wait echo "-------- Running mcsmear --------" $prefix $bindir/mcsmear hdgeant.hddm &> mcsmear.out wait echo "-------- Running hd_ana (HDDM) --------" $prefix $bindir/hd_ana --config=jana.conf -PNTHREADS=$NTHREADS hdgeant_smeared.hddm &> hd_ana_hddm.out wait echo "-------- Running hd_ana (generate EVIO) --------" $prefix $bindir/hd_ana -PPLUGINS=rawevent hdgeant_smeared.hddm &> hd_ana_convert_to_evio.out wait echo "-------- Running hd_ana (EVIO) --------" $prefix $bindir/hd_ana --config=jana.conf -PNTHREADS=$NTHREADS rawevent_000002.evio &> hd_ana_evio.out wait # Unset LD_LIBRARY_PATH since it likely includes # a libc version incompatible with the mkdir and cp # commands below unset LD_LIBRARY_PATH # Copy all output files to "results" directory in parent mkdir -p ../results cp *.out ../results # Record some info about the target system running this test env > ../results/environment.txt cat /proc/cpuinfo > ../results/cpuinfo cat /proc/meminfo > ../results/meminfo uname -a > ../results/platform dmesg > ../results/dmesg """ WriteStringToFile("gluex_benchmark/run_all_tests", run_all_tests % (sqlitefile_short, main_HDDS_file_short, ld_linux_so, 'work'), True) #------- jana.conf jana_conf = """ THREAD_TIMEOUT_FIRST_EVENT 300 THREAD_TIMEOUT 300 NTHREADS 1 PLUGINS danarest """ WriteStringToFile("gluex_benchmark/work/jana.conf", jana_conf) #------- fort.15 fort_15 = """ LIST C C === INPUT file for BGGEN C TRIG %d number of events to simulate C We expect 395kHz of hadronic rate at high luminosity C -- writing out events C HDDM simple ntuple WROUT 1 0 0 NPRIEV 100 number of events to print EPHLIM 0.15 12. energy range in GeV RNDMSEQ 0 random number sequence integer values EELEC 12. electron beam energy EPEAK 9. coherent peak energy ZCOLLIM 7600. distance to the collimator in cm EPYTHMIN 3. minimal energy for PYTHIA simulation RUNNO 2 specify run number STOP """ WriteStringToFile("gluex_benchmark/work/fort.15", fort_15 % Nevents) #------- pythia.dat bggendir = "%s/src/programs/Simulation/bggen/run" % os.getenv("HALLD_HOME") res = subprocess.Popen(['cp', '%s/pythia.dat' % bggendir, 'gluex_benchmark/work']).communicate()[0] #------- pythia-geant.map res = subprocess.Popen(['cp', '%s/pythia-geant.map' % bggendir, 'gluex_benchmark/work']).communicate()[0] #------- particle.dat res = subprocess.Popen(['cp', '%s/particle.dat' % bggendir, 'gluex_benchmark/work']).communicate()[0] #------- control.in str = """ INFILE 'bggen.hddm' TRIG %d OUTFILE 'hdgeant.hddm' BEAM 12. 9. BGRATE 1.10 BGGATE -200. 800. RNDM 121 CUTS 1e-4 1e-4 1e-3 1e-3 1e-4 SWIT 0 0 0 0 0 0 0 0 0 0 GELH 1 0.2 1.0 4 0.160 CKOV 1 LABS 1 ABAN 0 DEBU 1 10 1000 SAVEHITS 0 NOSECONDARIES 0 SHOWERSINCOL 0 DRIFTCLUSTERS 0 END """ WriteStringToFile("gluex_benchmark/work/control.in", str % Nevents) #------- batch/README str = """ This directory contains files that can be used to submit a job to the JLab farm to run the benchmark. Note that this may not be terribly accurate since it will depend on the number of jobs already running on the node. If you want to ensure at least one physical core is dedicated to the job, then set the value of the "core" attribute of the CPU tag in the farm_benchmark.xml file to be (Nslots-Ncores-1) where Nslots is the number of job slots expected on the target farm node, Ncores is the number of physical cores on the node. For example, the farm14 nodes have 42 job slots and 24 physical cores. Thus, the relevant line in farm_benchmark.xml should look like this: You can check the number of slots on a node type here: http://scicomp.jlab.org/scicomp/#/operations/nodes You can try and guess the number of physical cores two ways: 1.) by assuming either a 3/4 or 7/8 model was used to calculate the number of slots. e.g. Ncores(1 + 3/4) = 42 --> Ncores = 42/1.75 = 24 2.) Looking at a node in ganglia (link below) and clicking on "Host Overview". In the case of farm14, the "cpu_num" value includes hyper threads so you have to divide by 2. (This is probably always the case.) The farm job works by copying the entire gluex_benchmark.tgz file to the node and unpacking it there before use. Thus any changes made need to be tarred back up into the tarball before submitting the job. Here are the steps you need to take to submit the farm job: 1. Create a working directory for the input/output of the jobs: > setenv workdir /work/halld/home/$USER/benchmark_results > mkdir -p $workdir > cd $workdir 2. Upack the gluex_benchmark.tgz file into $workdir and modify the file: gluex_benchmark/batch/farm_benchmark.xml - Change the input_dir_base and output_dir_base variables to be the value of $workdir - Change the Email address - Change the CPU tag to have the correct "core" attribute as described above - Change the OS tag to the type of node you want to test - Change any other values you think appropriate 3. Recreate the gluex_benchmark tarball: > tar czf gluex_benchmark.tgz gluex_benchmark n.b. Do NOT delete the gluex_benchmark directory after regenerating the tarball. The job needs to use the files: $workdir/gluex_benchmark/batch/farm_benchmark.sh $workdir/gluex_benchmark/batch/farm_benchmark.xml 4. Submit the farm job > jsub -xml $workdir/gluex_benchmark/batch/farm_benchmark.xml This file was generated automatically by the mk_benchmark_tarball.py script. contact: David Lawrence x5567 davidl@jlab.org """ WriteStringToFile("gluex_benchmark/batch/README", str) #------- batch/farm_benchmark.sh str = """ #!/bin/csh -f export NTHREADS=$1 if [ -z "$NTHREADS" ]; then export NTHREADS=1 fi echo "starting........ " date echo "Unpacking gluex_benchmark.tgz" tar xzf gluex_benchmark.tgz cd gluex_benchmark echo "working dir = "$PWD echo "starting run_all_tests script ............" date ./run_all_tests $NTHREADS echo "done ............." date tar czf ../results.tgz results echo "ending job ............." """ WriteStringToFile("gluex_benchmark/batch/farm_benchmark.sh", str, True) #------- batch/farm_benchmark.xml str = """ ./farm_benchmark.sh """ WriteStringToFile("gluex_benchmark/batch/farm_benchmark.xml", str) #------- batch/farm_benchmark_multithread.xml str = """ 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 ./farm_benchmark.sh ${nthreads} """ WriteStringToFile("gluex_benchmark/batch/farm_benchmark_multithread.xml", str) # Copy all of the work directory files into the tmp directory, but # overwrite the fort.15 file to give it less events print "Copying config files into tmp directory ..." res = subprocess.Popen(['cp', '-r', 'gluex_benchmark/work', 'gluex_benchmark/tmp']).communicate()[0] WriteStringToFile("gluex_benchmark/tmp/work/fort.15", fort_15 % 10); WriteStringToFile("gluex_benchmark/run_all_tests_tmp", run_all_tests % (sqlitefile_short, main_HDDS_file_short, ld_linux_so, 'tmp/work'), True); # Run all tests in the tmp directory in order to download all resources # into the resource directory. print "Running minimal events in order to download resources" os.chdir('gluex_benchmark') res = subprocess.Popen(['./run_all_tests_tmp']).communicate()[0] os.chdir('../') print "Bundling..." res = subprocess.Popen(['rm', '-rf', 'gluex_benchmark/tmp']).communicate()[0] res = subprocess.Popen(['rm', '-rf', 'gluex_benchmark/run_all_tests_tmp']).communicate()[0] res = subprocess.Popen(['tar', 'czf', 'gluex_benchmark.tgz', 'gluex_benchmark']).communicate()[0] res = subprocess.Popen(['rm', '-rf', 'gluex_benchmark']).communicate()[0]