#!/usr/bin/env python
###############################################################
#
# This script will generate a tarball containing binaries
# and scripts that can be run to benchmark a system using
# GlueX simulation and reconstruction software. For this
# to work, you need to run it on a "host" system where
# the sim-recon package has already been built and is pointed
# to by the HALLD_HOME environment variable. The complete
# list of environment variables that need to be defined is:
#
# HALLD_HOME
# BMS_OSNAME
# JANA_CALIB_URL
# JANA_GEOMETRY_URL
# ROOTSYS
#
#
# The following environment variables are used, but optional:
#
# HOST
# USER
#
# Run this script in any directory and it will produce a
# tarball in the current directory. This will take several
# minutes since it needs to run a few events through each
# program in order to gather the resource files needed
# so they can be included in the tarball.
#
# > mk_benchmark_tarball.py
#
#
# To use it, unpack the tarball on the "target" system
# and read the README file it contains for further
# instructions.
#
# The primary motivation for doing this is to provide a
# mechanism that can be used to benchmark computer systems
# one is considering purchasing or with different system
# configurations in order to optimize them for GlueX use.
# In principle, the binaries should run on most Linux systems
# so long as the bitness of the kernel is not less than
# that of the binaries (e.g. binaries from a 64bit host
# run on a 32bit target). It should be OK the other way
# around though (binaries from 32bit host on 64bit target).
# The kernel generation may also matter (e.g. 2.4 vs. 2.6).
# Only limited testing of this has been done.
#
#
# contact:
# davidl@jlab.org x5567
#
###############################################################
import subprocess
import sys,os
import string
import stat
import time
Nevents = 5000 # Number of events to process
all_libs = []
ld_linux_so = '' # dynamic linker
#-------------------------------------------------
# AddLibraries
#
# Run the 'ldd' command on the given binary and
# add any shared libraries not already in the
# all_libs global list to it.
#-------------------------------------------------
def AddLibraries(binname):
global all_libs, ld_linux_so
libs = []
result = subprocess.Popen(["ldd", binname], stdout=subprocess.PIPE).communicate()[0]
lines = result.rstrip().split('\n')
for line in lines:
tokens = string.split(line)
# Look for normal libraries
if len(tokens)>2 :
libname = tokens[2]
if string.find(libname, "/") == 0 :
if libname not in all_libs: all_libs.append(libname)
# look for dynamic linker
if len(tokens)==2 :
if string.find(tokens[0], '/ld-linux') >=0 :
ld_linux_so = tokens[0]
if ld_linux_so not in all_libs: all_libs.append(ld_linux_so)
pos = string.rfind(ld_linux_so, '/')
if pos>0 : ld_linux_so = ld_linux_so[pos+1:]
#-------------------------------------------------
# WriteStringToFile
#
# Write the given string to the specified file.
# If the make_executable flag is set to True, then
# permissions will be set to make the executable
# world readable.
#-------------------------------------------------
def WriteStringToFile(fname, str, make_executable=False):
f = open(fname, "w");
f.write(str)
f.close()
if(make_executable):
os.chmod(fname, stat.S_IRWXU + stat.S_IRWXG + stat.S_IRWXO)
#-------------------------------------------------------------------
# Check that JANA_CALIB_URL is set and points to SQLite file
JANA_CALIB_URL = os.getenv("JANA_CALIB_URL", "not defined")
if not string.find(JANA_CALIB_URL, "sqlite:///")==0 :
print "JANA_CALIB_URL environment variable does not point to"
print "SQLite file. An SQLite file is required. Set the"
print "environment variable to something like:"
print ""
print "setenv JANA_CALIB_URL sqlite:////path/to/calib.sqlite"
print ""
print "(n.b. the four slashes ('/') )"
print ""
sys.exit(-1)
sqlitefile = JANA_CALIB_URL[10:]
if not os.path.isfile(sqlitefile):
print 'The SQLite file "%s"' % sqlitefile
print '(obtained from JANA_CALIB_URL environment variable)'
print 'does not seem to exist. Double check the setting of'
print 'JANA_CALIB_URL'
sys.exit(-1)
# Get short file name (without directory) of SQLite file
sqlitefile_short = sqlitefile
pos = string.rfind(sqlitefile, '/')
if pos>=0 : sqlitefile_short = sqlitefile[pos+1:]
# Get the directory of the HDDS geometry from JANA_GEOMETRY_URL
JANA_GEOMETRY_URL = os.getenv("JANA_GEOMETRY_URL", "not defined")
if not string.find(JANA_GEOMETRY_URL, "xmlfile://")==0 :
print "JANA_GEOMETRY_URL environment variable does not start"
print "with 'xmlfile://'. Set the"
print "environment variable to something like:"
print ""
print "setenv JANA_GEOMETRY_URL xmlfile:///path/to/main_HDDS.xml"
print ""
print "(n.b. the three slashes ('/') )"
print ""
sys.exit(-1)
main_HDDS_file = JANA_GEOMETRY_URL[10:]
if not os.path.isfile(main_HDDS_file):
print 'The HDDS xml file "%s"' % main_HDDS_file
print '(obtained from JANA_GEOMETRY_URL environment variable)'
print 'does not seem to exist. Double check the setting of'
print 'JANA_GEOMETRY_URL'
sys.exit(-1)
# Get short file name (without directory) of main_HDDS.xml file
main_HDDS_file_short = main_HDDS_file
hdds_dir = '.'
pos = string.rfind(main_HDDS_file, '/')
if pos>=0 :
main_HDDS_file_short = main_HDDS_file[pos+1:]
hdds_dir = main_HDDS_file[:pos]
# Make directory structure
try:
os.mkdir('gluex_benchmark')
os.mkdir('gluex_benchmark/lib')
os.mkdir('gluex_benchmark/bin')
os.mkdir('gluex_benchmark/plugins')
os.mkdir('gluex_benchmark/work')
os.mkdir('gluex_benchmark/resources')
os.mkdir('gluex_benchmark/calib')
os.mkdir('gluex_benchmark/root')
os.mkdir('gluex_benchmark/root/etc')
os.mkdir('gluex_benchmark/root/etc/plugins')
os.mkdir('gluex_benchmark/batch')
os.mkdir('gluex_benchmark/tmp')
except OSError:
pass
# Define binary locations
hd_ana = "%s/%s/bin/hd_ana" % (os.getenv("HALLD_HOME"), os.getenv("BMS_OSNAME"))
danarest = "%s/%s/plugins/danarest.so" % (os.getenv("HALLD_HOME"), os.getenv("BMS_OSNAME"))
rawevent = "%s/%s/plugins/rawevent.so" % (os.getenv("HALLD_HOME"), os.getenv("BMS_OSNAME"))
bggen = "%s/%s/bin/bggen" % (os.getenv("HALLD_HOME"), os.getenv("BMS_OSNAME"))
hdgeant = "%s/%s/bin/hdgeant" % (os.getenv("HALLD_HOME"), os.getenv("BMS_OSNAME"))
mcsmear = "%s/%s/bin/mcsmear" % (os.getenv("HALLD_HOME"), os.getenv("BMS_OSNAME"))
# Find all dependencies for the executables and plugins
print "Finding library dependencies ..."
AddLibraries(hd_ana)
AddLibraries(danarest)
AddLibraries(rawevent)
AddLibraries(bggen)
AddLibraries(hdgeant)
AddLibraries(mcsmear)
# Look for dependencies of the dependencies iteratively
# until we have found them all
Nlibs = 0
while len(all_libs) > Nlibs:
Nlibs = len(all_libs)
tmp = all_libs
for lib in tmp:
AddLibraries(lib)
# Copy shared libraries
for lib in all_libs:
print "copying %s ..." % lib
res = subprocess.Popen(['cp', lib, 'gluex_benchmark/lib']).communicate()[0]
# Copy binaries
bins = [hd_ana, bggen, hdgeant, mcsmear]
for bin in bins:
print "copying %s ..." % bin
res = subprocess.Popen(['cp', bin, 'gluex_benchmark/bin']).communicate()[0]
# Copy plugins
plugins = [danarest, rawevent]
for plugin in plugins:
print "copying %s ..." % plugin
res = subprocess.Popen(['cp', plugin, 'gluex_benchmark/plugins']).communicate()[0]
# Copy ROOT plugins
rootplugins = ['TVirtualStreamerInfo']
for rootplugin in rootplugins:
plugin = '%s/etc/plugins/%s' % (os.getenv('ROOTSYS','.'), rootplugin)
print "copying %s ..." % plugin
res = subprocess.Popen(['cp', '-r', plugin, 'gluex_benchmark/root/etc/plugins']).communicate()[0]
# Copy SQLite file
print "copying %s ..." % sqlitefile
res = subprocess.Popen(['cp', sqlitefile, 'gluex_benchmark/calib']).communicate()[0]
# Copy hdds directory
print "copying hdds geometry ..."
res = subprocess.Popen(['cp', '-r', hdds_dir, 'gluex_benchmark/hdds']).communicate()[0]
res = subprocess.Popen(['rm', '-rf', 'gluex_benchmark/hdds/.svn']).communicate()[0]
#================== Create test scripts ==================
print "Creating test scripts ..."
#------- README
README ="""
README
generated: %s
platform: %s
host: %s
user: %s
This file and the tarball containing it were generated by
the script:
https://halldsvn.jlab.org/repos/trunk/scripts/mk_benchmark_tarball.py
To run the benchmark, run "run_all_tests" from the directory
containing this README file:
> ./run_all_tests
The script will cd into the work directory and run the
programs from there. The output of each program is captured
in an output file named with a ".out" suffix. Upon successful
completion of all programs, a directory named "results" is
created (parallel to the "work" directory) and all of the
"*.out" files are copied there.
If the /usr/bin/time program is available on the system then
it is used to run the programs and the resource usage it gathers
is appended to the bottom of the output files.
This benchmarking is done using binaries that were compiled on
one system and then run on another. This means the system you
are benchmarking does not need to have any specific software
installed, not even a compiler. All shared libraries and the
dynamic linker are copied into the lib directory. This likely
includes libc and possibly other critical system libraries
the binaries will need to run. Calibration constants and
resource files are also included in the bundle. The resources
were obtained by running the full program set with a few events
on the host system so in principle the target system should
not need internet access.
contact:
David Lawrence x5567
davidl@jlab.org
"""
t = time.strftime("%Y-%m-%d %H:%M:%S")
platform = os.getenv('BMS_OSNAME', 'unknown')
host = os.getenv('HOST', 'unknown')
user = os.getenv('USER','unknown')
WriteStringToFile("gluex_benchmark/README", README % (t, platform, host, user))
#------- run_all_tests
run_all_tests = """#!/bin/bash
export NTHREADS=$1
export PATH=$PWD/bin:$PATH
export LD_LIBRARY_PATH=$PWD/lib:$LD_LIBRARY_PATH
export JANA_PLUGIN_PATH=$PWD/plugins
export JANA_RESOURCE_DIR=$PWD/resources
export JANA_CALIB_URL=sqlite:///${PWD}/calib/%s
export JANA_GEOMETRY_URL=xmlfile://${PWD}/hdds/%s
export ROOTSYS=$PWD/root
export ld_linux_so=$PWD/lib/%s
export bindir=$PWD/bin
if [ -z "$NTHREADS" ]; then
export NTHREADS=1
fi
# If /usr/bin/time is present, then use it to gather resource usage
export timer="/usr/bin/time --verbose"
if [ ! -f /usr/bin/time ]; then
export timer=""
fi
# run programs using the linker from the orginal system
export prefix="$timer $ld_linux_so"
# Run programs in work directory
cd %s
echo "-------- Running bggen --------"
$prefix $bindir/bggen &> bggen.out
wait
echo "-------- Running hdgeant --------"
$prefix $bindir/hdgeant &> hdgeant.out
wait
echo "-------- Running mcsmear --------"
$prefix $bindir/mcsmear hdgeant.hddm &> mcsmear.out
wait
echo "-------- Running hd_ana (HDDM) --------"
$prefix $bindir/hd_ana --config=jana.conf -PNTHREADS=$NTHREADS hdgeant_smeared.hddm &> hd_ana_hddm.out
wait
echo "-------- Running hd_ana (generate EVIO) --------"
$prefix $bindir/hd_ana -PPLUGINS=rawevent hdgeant_smeared.hddm &> hd_ana_convert_to_evio.out
wait
echo "-------- Running hd_ana (EVIO) --------"
$prefix $bindir/hd_ana --config=jana.conf -PNTHREADS=$NTHREADS rawevent_000002.evio &> hd_ana_evio.out
wait
# Unset LD_LIBRARY_PATH since it likely includes
# a libc version incompatible with the mkdir and cp
# commands below
unset LD_LIBRARY_PATH
# Copy all output files to "results" directory in parent
mkdir -p ../results
cp *.out ../results
# Record some info about the target system running this test
env > ../results/environment.txt
cat /proc/cpuinfo > ../results/cpuinfo
cat /proc/meminfo > ../results/meminfo
uname -a > ../results/platform
dmesg > ../results/dmesg
"""
WriteStringToFile("gluex_benchmark/run_all_tests", run_all_tests % (sqlitefile_short, main_HDDS_file_short, ld_linux_so, 'work'), True)
#------- jana.conf
jana_conf = """
THREAD_TIMEOUT_FIRST_EVENT 300
THREAD_TIMEOUT 300
NTHREADS 1
PLUGINS danarest
"""
WriteStringToFile("gluex_benchmark/work/jana.conf", jana_conf)
#------- fort.15
fort_15 = """
LIST
C
C === INPUT file for BGGEN
C
TRIG %d number of events to simulate
C We expect 395kHz of hadronic rate at high luminosity
C -- writing out events
C HDDM simple ntuple
WROUT 1 0 0
NPRIEV 100 number of events to print
EPHLIM 0.15 12. energy range in GeV
RNDMSEQ 0 random number sequence integer values
EELEC 12. electron beam energy
EPEAK 9. coherent peak energy
ZCOLLIM 7600. distance to the collimator in cm
EPYTHMIN 3. minimal energy for PYTHIA simulation
RUNNO 2 specify run number
STOP
"""
WriteStringToFile("gluex_benchmark/work/fort.15", fort_15 % Nevents)
#------- pythia.dat
bggendir = "%s/src/programs/Simulation/bggen/run" % os.getenv("HALLD_HOME")
res = subprocess.Popen(['cp', '%s/pythia.dat' % bggendir, 'gluex_benchmark/work']).communicate()[0]
#------- pythia-geant.map
res = subprocess.Popen(['cp', '%s/pythia-geant.map' % bggendir, 'gluex_benchmark/work']).communicate()[0]
#------- particle.dat
res = subprocess.Popen(['cp', '%s/particle.dat' % bggendir, 'gluex_benchmark/work']).communicate()[0]
#------- control.in
str = """
INFILE 'bggen.hddm'
TRIG %d
OUTFILE 'hdgeant.hddm'
BEAM 12. 9.
BGRATE 1.10
BGGATE -200. 800.
RNDM 121
CUTS 1e-4 1e-4 1e-3 1e-3 1e-4
SWIT 0 0 0 0 0 0 0 0 0 0
GELH 1 0.2 1.0 4 0.160
CKOV 1
LABS 1
ABAN 0
DEBU 1 10 1000
SAVEHITS 0
NOSECONDARIES 0
SHOWERSINCOL 0
DRIFTCLUSTERS 0
END
"""
WriteStringToFile("gluex_benchmark/work/control.in", str % Nevents)
#------- batch/README
str = """
This directory contains files that can be used to submit a job to the
JLab farm to run the benchmark. Note that this may not be terribly
accurate since it will depend on the number of jobs already running
on the node. If you want to ensure at least one physical core is
dedicated to the job, then set the value of the "core" attribute of
the CPU tag in the farm_benchmark.xml file to be (Nslots-Ncores-1)
where Nslots is the number of job slots expected on the target
farm node, Ncores is the number of physical cores on the node. For
example, the farm14 nodes have 42 job slots and 24 physical cores.
Thus, the relevant line in farm_benchmark.xml should look like this:
You can check the number of slots on a node type here:
http://scicomp.jlab.org/scicomp/#/operations/nodes
You can try and guess the number of physical cores two ways:
1.) by assuming either a 3/4 or 7/8 model was used to calculate the
number of slots. e.g. Ncores(1 + 3/4) = 42 --> Ncores = 42/1.75 = 24
2.) Looking at a node in ganglia (link below) and clicking on
"Host Overview". In the case of farm14, the "cpu_num" value
includes hyper threads so you have to divide by 2. (This is
probably always the case.)
The farm job works by copying the entire gluex_benchmark.tgz file to
the node and unpacking it there before use. Thus any changes made
need to be tarred back up into the tarball before submitting the job.
Here are the steps you need to take to submit the farm job:
1. Create a working directory for the input/output of the jobs:
> setenv workdir /work/halld/home/$USER/benchmark_results
> mkdir -p $workdir
> cd $workdir
2. Upack the gluex_benchmark.tgz file into $workdir and modify
the file:
gluex_benchmark/batch/farm_benchmark.xml
- Change the input_dir_base and output_dir_base variables to
be the value of $workdir
- Change the Email address
- Change the CPU tag to have the correct "core" attribute as
described above
- Change the OS tag to the type of node you want to test
- Change any other values you think appropriate
3. Recreate the gluex_benchmark tarball:
> tar czf gluex_benchmark.tgz gluex_benchmark
n.b. Do NOT delete the gluex_benchmark directory after regenerating
the tarball. The job needs to use the files:
$workdir/gluex_benchmark/batch/farm_benchmark.sh
$workdir/gluex_benchmark/batch/farm_benchmark.xml
4. Submit the farm job
> jsub -xml $workdir/gluex_benchmark/batch/farm_benchmark.xml
This file was generated automatically by the mk_benchmark_tarball.py script.
contact:
David Lawrence x5567
davidl@jlab.org
"""
WriteStringToFile("gluex_benchmark/batch/README", str)
#------- batch/farm_benchmark.sh
str = """
#!/bin/csh -f
export NTHREADS=$1
if [ -z "$NTHREADS" ]; then
export NTHREADS=1
fi
echo "starting........ "
date
echo "Unpacking gluex_benchmark.tgz"
tar xzf gluex_benchmark.tgz
cd gluex_benchmark
echo "working dir = "$PWD
echo "starting run_all_tests script ............"
date
./run_all_tests $NTHREADS
echo "done ............."
date
tar czf ../results.tgz results
echo "ending job ............."
"""
WriteStringToFile("gluex_benchmark/batch/farm_benchmark.sh", str, True)
#------- batch/farm_benchmark.xml
str = """
./farm_benchmark.sh
"""
WriteStringToFile("gluex_benchmark/batch/farm_benchmark.xml", str)
#------- batch/farm_benchmark_multithread.xml
str = """
1 2 3 4 5 6 7 8 9 10
11 12 13 14 15 16 17 18 19 20
21 22 23 24 25 26 27 28 29 30
31 32 33 34 35 36 37 38 39 40
41 42 43 44 45 46 47 48 49 50
./farm_benchmark.sh ${nthreads}
"""
WriteStringToFile("gluex_benchmark/batch/farm_benchmark_multithread.xml", str)
# Copy all of the work directory files into the tmp directory, but
# overwrite the fort.15 file to give it less events
print "Copying config files into tmp directory ..."
res = subprocess.Popen(['cp', '-r', 'gluex_benchmark/work', 'gluex_benchmark/tmp']).communicate()[0]
WriteStringToFile("gluex_benchmark/tmp/work/fort.15", fort_15 % 10);
WriteStringToFile("gluex_benchmark/run_all_tests_tmp", run_all_tests % (sqlitefile_short, main_HDDS_file_short, ld_linux_so, 'tmp/work'), True);
# Run all tests in the tmp directory in order to download all resources
# into the resource directory.
print "Running minimal events in order to download resources"
os.chdir('gluex_benchmark')
res = subprocess.Popen(['./run_all_tests_tmp']).communicate()[0]
os.chdir('../')
print "Bundling..."
res = subprocess.Popen(['rm', '-rf', 'gluex_benchmark/tmp']).communicate()[0]
res = subprocess.Popen(['rm', '-rf', 'gluex_benchmark/run_all_tests_tmp']).communicate()[0]
res = subprocess.Popen(['tar', 'czf', 'gluex_benchmark.tgz', 'gluex_benchmark']).communicate()[0]
res = subprocess.Popen(['rm', '-rf', 'gluex_benchmark']).communicate()[0]