#!/usr/bin/env python ############################################################### # # This script will generate a tarball containing binaries # and scripts that can be run to benchmark a system using # GlueX simulation and reconstruction software. For this # to work, you need to run it on a "host" system where # the sim-recon package has already been built and is pointed # to by the HALLD_HOME environment variable. The complete # list of environment variables that need to be defined is: # # HALLD_HOME # BMS_OSNAME # JANA_CALIB_URL # JANA_GEOMETRY_URL # ROOTSYS # # # The following environment variables are used, but optional: # # HOST # USER # # Run this script in any directory and it will produce a # tarball in the current directory. This will take several # minutes since it needs to run a few events through each # program in order to gather the resource files needed # so they can be included in the tarball. # # > mk_benchmark_tarball.py # # # To use it, unpack the tarball on the "target" system # and read the README file it contains for further # instructions. # # The primary motivation for doing this is to provide a # mechanism that can be used to benchmark computer systems # one is considering purchasing or with different system # configurations in order to optimize them for GlueX use. # In principle, the binaries should run on most Linux systems # so long as the bitness of the kernel is not less than # that of the binaries (e.g. binaries from a 64bit host # run on a 32bit target). It should be OK the other way # around though (binaries from 32bit host on 64bit target). # The kernel generation may also matter (e.g. 2.4 vs. 2.6). # Only limited testing of this has been done. # # # contact: # davidl@jlab.org x5567 # ############################################################### import subprocess import sys,os import string import stat import time Nevents = 5000 # Number of events to process all_libs = [] ld_linux_so = '' # dynamic linker #------------------------------------------------- # AddLibraries # # Run the 'ldd' command on the given binary and # add any shared libraries not already in the # all_libs global list to it. #------------------------------------------------- def AddLibraries(binname): global all_libs, ld_linux_so libs = [] result = subprocess.Popen(["ldd", binname], stdout=subprocess.PIPE).communicate()[0] lines = result.rstrip().split('\n') for line in lines: tokens = string.split(line) # Look for normal libraries if len(tokens)>2 : libname = tokens[2] if string.find(libname, "/") == 0 : if libname not in all_libs: all_libs.append(libname) # look for dynamic linker if len(tokens)==2 : if string.find(tokens[0], '/ld-linux') >=0 : ld_linux_so = tokens[0] if ld_linux_so not in all_libs: all_libs.append(ld_linux_so) pos = string.rfind(ld_linux_so, '/') if pos>0 : ld_linux_so = ld_linux_so[pos+1:] #------------------------------------------------- # WriteStringToFile # # Write the given string to the specified file. # If the make_executable flag is set to True, then # permissions will be set to make the executable # world readable. #------------------------------------------------- def WriteStringToFile(fname, str, make_executable=False): f = open(fname, "w"); f.write(str) f.close() if(make_executable): os.chmod(fname, stat.S_IRWXU + stat.S_IRWXG + stat.S_IRWXO) #------------------------------------------------------------------- # Check that JANA_CALIB_URL is set and points to SQLite file JANA_CALIB_URL = os.getenv("JANA_CALIB_URL", "not defined") if not string.find(JANA_CALIB_URL, "sqlite:///")==0 : print "JANA_CALIB_URL environment variable does not point to" print "SQLite file. An SQLite file is required. Set the" print "environment variable to something like:" print "" print "setenv JANA_CALIB_URL sqlite:////path/to/calib.sqlite" print "" print "(n.b. the four slashes ('/') )" print "" sys.exit(-1) sqlitefile = JANA_CALIB_URL[10:] if not os.path.isfile(sqlitefile): print 'The SQLite file "%s"' % sqlitefile print '(obtained from JANA_CALIB_URL environment variable)' print 'does not seem to exist. Double check the setting of' print 'JANA_CALIB_URL' sys.exit(-1) # Get short file name (without directory) of SQLite file sqlitefile_short = sqlitefile pos = string.rfind(sqlitefile, '/') if pos>=0 : sqlitefile_short = sqlitefile[pos+1:] # Get the directory of the HDDS geometry from JANA_GEOMETRY_URL JANA_GEOMETRY_URL = os.getenv("JANA_GEOMETRY_URL", "not defined") if not string.find(JANA_GEOMETRY_URL, "xmlfile://")==0 : print "JANA_GEOMETRY_URL environment variable does not start" print "with 'xmlfile://'. Set the" print "environment variable to something like:" print "" print "setenv JANA_GEOMETRY_URL xmlfile:///path/to/main_HDDS.xml" print "" print "(n.b. the three slashes ('/') )" print "" sys.exit(-1) main_HDDS_file = JANA_GEOMETRY_URL[10:] if not os.path.isfile(main_HDDS_file): print 'The HDDS xml file "%s"' % main_HDDS_file print '(obtained from JANA_GEOMETRY_URL environment variable)' print 'does not seem to exist. Double check the setting of' print 'JANA_GEOMETRY_URL' sys.exit(-1) # Get short file name (without directory) of main_HDDS.xml file main_HDDS_file_short = main_HDDS_file hdds_dir = '.' pos = string.rfind(main_HDDS_file, '/') if pos>=0 : main_HDDS_file_short = main_HDDS_file[pos+1:] hdds_dir = main_HDDS_file[:pos] # Make directory structure try: os.mkdir('gluex_benchmark') os.mkdir('gluex_benchmark/lib') os.mkdir('gluex_benchmark/bin') os.mkdir('gluex_benchmark/plugins') os.mkdir('gluex_benchmark/work') os.mkdir('gluex_benchmark/resources') os.mkdir('gluex_benchmark/calib') os.mkdir('gluex_benchmark/root') os.mkdir('gluex_benchmark/root/etc') os.mkdir('gluex_benchmark/root/etc/plugins') os.mkdir('gluex_benchmark/tmp') except OSError: pass # Define binary locations hd_ana = "%s/%s/bin/hd_ana" % (os.getenv("HALLD_HOME"), os.getenv("BMS_OSNAME")) danarest = "%s/%s/plugins/danarest.so" % (os.getenv("HALLD_HOME"), os.getenv("BMS_OSNAME")) bggen = "%s/%s/bin/bggen" % (os.getenv("HALLD_HOME"), os.getenv("BMS_OSNAME")) hdgeant = "%s/%s/bin/hdgeant" % (os.getenv("HALLD_HOME"), os.getenv("BMS_OSNAME")) mcsmear = "%s/%s/bin/mcsmear" % (os.getenv("HALLD_HOME"), os.getenv("BMS_OSNAME")) # Find all dependencies for the executables and plugins print "Finding library dependencies ..." AddLibraries(hd_ana) AddLibraries(danarest) AddLibraries(bggen) AddLibraries(hdgeant) AddLibraries(mcsmear) # Look for dependencies of the dependencies iteratively # until we have found them all Nlibs = 0 while len(all_libs) > Nlibs: Nlibs = len(all_libs) tmp = all_libs for lib in tmp: AddLibraries(lib) # Copy shared libraries for lib in all_libs: print "copying %s ..." % lib res = subprocess.Popen(['cp', lib, 'gluex_benchmark/lib']).communicate()[0] # Copy binaries bins = [hd_ana, bggen, hdgeant, mcsmear] for bin in bins: print "copying %s ..." % bin res = subprocess.Popen(['cp', bin, 'gluex_benchmark/bin']).communicate()[0] # Copy plugins plugins = [danarest] for plugin in plugins: print "copying %s ..." % plugin res = subprocess.Popen(['cp', plugin, 'gluex_benchmark/plugins']).communicate()[0] # Copy ROOT plugins rootplugins = ['TVirtualStreamerInfo'] for rootplugin in rootplugins: plugin = '%s/etc/plugins/%s' % (os.getenv('ROOTSYS','.'), rootplugin) print "copying %s ..." % plugin res = subprocess.Popen(['cp', '-r', plugin, 'gluex_benchmark/root/etc/plugins']).communicate()[0] # Copy SQLite file print "copying %s ..." % sqlitefile res = subprocess.Popen(['cp', sqlitefile, 'gluex_benchmark/calib']).communicate()[0] # Copy hdds directory print "copying hdds geometry ..." res = subprocess.Popen(['cp', '-r', hdds_dir, 'gluex_benchmark/hdds']).communicate()[0] res = subprocess.Popen(['rm', '-rf', 'gluex_benchmark/hdds/.svn']).communicate()[0] #================== Create test scripts ================== print "Creating test scripts ..." #------- README README =""" README generated: %s platform: %s host: %s user: %s This file and the tarball containing it were generated by the script: https://halldsvn.jlab.org/repos/trunk/scripts/mk_benchmark_tarball.py To run the benchmark, run "run_all_tests" from the directory containing this README file: > ./run_all_tests The script will cd into the work directory and run the programs from there. The output of each program is captured in an output file named with a ".out" suffix. Upon successful completion of all programs, a directory named "results" is created (parallel to the "work" directory) and all of the "*.out" files are copied there. If the /usr/bin/time program is available on the system then it is used to run the programs and the resource usage it gathers is appended to the bottom of the output files. This benchmarking is done using binaries that were compiled on one system and then run on another. This means the system you are benchmarking does not need to have any specific software installed, not even a compiler. All shared libraries and the dynamic linker are copied into the lib directory. This likely includes libc and possibly other critical system libraries the binaries will need to run. Calibration constants and resource files are also included in the bundle. The resources were obtained by running the full program set with a few events on the host system so in principle the target system should not need internet access. contact: David Lawrence x5567 davidl@jlab.org """ t = time.strftime("%Y-%m-%d %H:%M:%S") platform = os.getenv('BMS_OSNAME', 'unknown') host = os.getenv('HOST', 'unknown') user = os.getenv('USER','unknown') WriteStringToFile("gluex_benchmark/README", README % (t, platform, host, user)) #------- run_all_tests run_all_tests = """#!/bin/bash export PATH=$PWD/bin:$PATH export LD_LIBRARY_PATH=$PWD/lib:$LD_LIBRARY_PATH export JANA_PLUGIN_PATH=$PWD/plugins export JANA_RESOURCE_DIR=$PWD/resources export JANA_CALIB_URL=sqlite:///${PWD}/calib/%s export JANA_GEOMETRY_URL=xmlfile://${PWD}/hdds/%s export ROOTSYS=$PWD/root export ld_linux_so=$PWD/lib/%s export bindir=$PWD/bin # If /usr/bin/time is present, then use it to gather resource usage export timer="/usr/bin/time --verbose" if [ ! -f /usr/bin/time ]; then export timer="" fi # run programs using the linker from the orginal system export prefix="$timer $ld_linux_so" # Run programs in work directory cd %s echo "-------- Running bggen --------" $prefix $bindir/bggen &> bggen.out wait echo "-------- Running hdgeant --------" $prefix $bindir/hdgeant &> hdgeant.out wait echo "-------- Running mcsmear --------" $prefix $bindir/mcsmear hdgeant.hddm &> mcsmear.out wait echo "-------- Running hd_ana --------" $prefix $bindir/hd_ana --config=jana.conf hdgeant_smeared.hddm &> hd_ana.out wait # Unset LD_LIBRARY_PATH since it likely includes # a libc version incompatible with the mkdir and cp # commands below unset LD_LIBRARY_PATH # Copy all output files to "results" directory in parent mkdir -p ../results cp *.out ../results # Record some info about the target system running this test env > ../results/environment.txt cat /proc/cpuinfo > ../results/cpuinfo cat /proc/meminfo > ../results/meminfo uname -a > ../results/platform dmesg > ../results/dmesg """ WriteStringToFile("gluex_benchmark/run_all_tests", run_all_tests % (sqlitefile_short, main_HDDS_file_short, ld_linux_so, 'work'), True); #------- jana.conf jana_conf = """ THREAD_TIMEOUT_FIRST_EVENT 300 THREAD_TIMEOUT 300 NTHREADS Ncores PLUGINS danarest """ WriteStringToFile("gluex_benchmark/work/jana.conf", jana_conf); #------- fort.15 fort_15 = """#!/bin/tcsh -f LIST C C === INPUT file for BGGEN C TRIG %d number of events to simulate C We expect 395kHz of hadronic rate at high luminosity C -- writing out events C HDDM simple ntuple WROUT 1 0 1 NPRIEV 100 number of events to print EPHLIM 0.15 12. energy range in GeV RNDMSEQ 0 random number sequence integer values EELEC 12. electron beam energy EPEAK 9. coherent peak energy ZCOLLIM 7600. distance to the collimator in cm EPYTHMIN 3. minimal energy for PYTHIA simulation RUNNO 2 specify run number STOP """ WriteStringToFile("gluex_benchmark/work/fort.15", fort_15 % Nevents); #------- pythia.dat bggendir = "%s/src/programs/Simulation/bggen/run" % os.getenv("HALLD_HOME") res = subprocess.Popen(['cp', '%s/pythia.dat' % bggendir, 'gluex_benchmark/work']).communicate()[0] #------- pythia-geant.map res = subprocess.Popen(['cp', '%s/pythia-geant.map' % bggendir, 'gluex_benchmark/work']).communicate()[0] #------- particle.dat res = subprocess.Popen(['cp', '%s/particle.dat' % bggendir, 'gluex_benchmark/work']).communicate()[0] #------- control.in str = """ INFILE 'bggen.hddm' TRIG %d OUTFILE 'hdgeant.hddm' BEAM 12. 9. BGRATE 1.10 BGGATE -200. 800. RNDM 121 CUTS 1e-4 1e-4 1e-3 1e-3 1e-4 SWIT 0 0 0 0 0 0 0 0 0 0 GELH 1 0.2 1.0 4 0.160 CKOV 1 LABS 1 ABAN 0 DEBU 1 10 1000 SAVEHITS 0 NOSECONDARIES 0 SHOWERSINCOL 0 DRIFTCLUSTERS 0 END """ WriteStringToFile("gluex_benchmark/work/control.in", str % Nevents); # Copy all of the work directory files into the tmp directory, but # overwrite the fort.15 file to give it less events print "Copying config files into tmp directory ..." res = subprocess.Popen(['cp', '-r', 'gluex_benchmark/work', 'gluex_benchmark/tmp']).communicate()[0] WriteStringToFile("gluex_benchmark/tmp/work/fort.15", fort_15 % 10); WriteStringToFile("gluex_benchmark/run_all_tests_tmp", run_all_tests % (sqlitefile_short, main_HDDS_file_short, ld_linux_so, 'tmp/work'), True); # Run all tests in the tmp directory in order to download all resources # into the resource directory. print "Running minimal events in order to download resources" os.chdir('gluex_benchmark') res = subprocess.Popen(['./run_all_tests_tmp']).communicate()[0] os.chdir('../') print "Bundling..." res = subprocess.Popen(['rm', '-rf', 'gluex_benchmark/tmp']).communicate()[0] res = subprocess.Popen(['rm', '-rf', 'gluex_benchmark/run_all_tests_tmp']).communicate()[0] res = subprocess.Popen(['tar', 'czf', 'gluex_benchmark.tgz', 'gluex_benchmark']).communicate()[0] res = subprocess.Popen(['rm', '-rf', 'gluex_benchmark']).communicate()[0]