#!/usr/bin/env python

###############################################################
#
# This script will generate a tarball containing binaries
# and scripts that can be run to benchmark a system using
# GlueX simulation and reconstruction software. For this
# to work, you need to run it on a "host" system where
# the sim-recon package has already been built and is pointed
# to by the HALLD_HOME environment variable. The complete 
# list of environment variables that need to be defined is:
#
#  HALLD_HOME
#  BMS_OSNAME
#  JANA_CALIB_URL
#  JANA_GEOMETRY_URL
#  ROOTSYS
#
#
# The following environment variables are used, but optional:
#
#  HOST
#  USER
#
# Run this script in any directory and it will produce a
# tarball in the current directory. This will take several
# minutes since it needs to run a few events through each
# program in order to gather the resource files needed
# so they can be included in the tarball. 
#
#  > mk_benchmark_tarball.py
#
#
# To use it, unpack the tarball on the "target" system 
# and read the README file it contains for further 
# instructions.
#
# The primary motivation for doing this is to provide a
# mechanism that can be used to benchmark computer systems
# one is considering purchasing or with different system
# configurations in order to optimize them for GlueX use.
# In principle, the binaries should run on most Linux systems
# so long as the bitness of the kernel is not less than
# that of the binaries (e.g. binaries from a 64bit host
# run on a 32bit target). It should be OK the other way
# around though (binaries from 32bit host on 64bit target).
# The kernel generation may also matter (e.g. 2.4 vs. 2.6).
# Only limited testing of this has been done.
# 
#
# contact: 
# davidl@jlab.org   x5567
#
###############################################################


import subprocess
import sys,os
import string
import stat
import time

Nevents = 5000  # Number of events to process

all_libs = []
ld_linux_so = ''  # dynamic linker

#-------------------------------------------------
# AddLibraries
#
# Run the 'ldd' command on the given binary and
# add any shared libraries not already in the
# all_libs global list to it.
#-------------------------------------------------
def AddLibraries(binname):
	global all_libs, ld_linux_so
	libs = []
	result = subprocess.Popen(["ldd", binname], stdout=subprocess.PIPE).communicate()[0]
	lines = result.rstrip().split('\n')
	for line in lines:
		tokens = string.split(line)
		
		# Look for normal libraries
		if len(tokens)>2 : 
			libname = tokens[2]
			if string.find(libname, "/") == 0 :
				if libname not in all_libs: all_libs.append(libname)

		# look for dynamic linker
		if len(tokens)==2 :
			if string.find(tokens[0], '/ld-linux') >=0 :
				ld_linux_so = tokens[0]
				if ld_linux_so not in all_libs: all_libs.append(ld_linux_so)
				pos = string.rfind(ld_linux_so, '/')
				if pos>0 : ld_linux_so = ld_linux_so[pos+1:]

#-------------------------------------------------
# WriteStringToFile
#
# Write the given string to the specified file.
# If the make_executable flag is set to True, then
# permissions will be set to make the executable
# world readable.
#-------------------------------------------------
def WriteStringToFile(fname, str, make_executable=False):
	f = open(fname, "w");
	f.write(str)
	f.close()
	if(make_executable): 
		os.chmod(fname, stat.S_IRWXU + stat.S_IRWXG + stat.S_IRWXO)


#-------------------------------------------------------------------

# Check that JANA_CALIB_URL is set and points to SQLite file
JANA_CALIB_URL = os.getenv("JANA_CALIB_URL", "not defined")
if not string.find(JANA_CALIB_URL, "sqlite:///")==0 :
	print "JANA_CALIB_URL environment variable does not point to"
	print "SQLite file. An SQLite file is required. Set the"
	print "environment variable to something like:"
	print ""
	print "setenv JANA_CALIB_URL sqlite:////path/to/calib.sqlite"
	print ""
	print "(n.b. the four slashes ('/') )"
	print ""
	sys.exit(-1)
sqlitefile = JANA_CALIB_URL[10:]
if not os.path.isfile(sqlitefile):
	print 'The SQLite file "%s"' % sqlitefile
	print '(obtained from JANA_CALIB_URL environment variable)'
	print 'does not seem to exist. Double check the setting of'
	print 'JANA_CALIB_URL'
	sys.exit(-1)

# Get short file name (without directory) of SQLite file
sqlitefile_short = sqlitefile
pos = string.rfind(sqlitefile, '/')
if pos>=0 : sqlitefile_short = sqlitefile[pos+1:]

# Get the directory of the HDDS geometry from JANA_GEOMETRY_URL
JANA_GEOMETRY_URL = os.getenv("JANA_GEOMETRY_URL", "not defined")
if not string.find(JANA_GEOMETRY_URL, "xmlfile://")==0 :
	print "JANA_GEOMETRY_URL environment variable does not start"
	print "with 'xmlfile://'. Set the"
	print "environment variable to something like:"
	print ""
	print "setenv JANA_GEOMETRY_URL xmlfile:///path/to/main_HDDS.xml"
	print ""
	print "(n.b. the three slashes ('/') )"
	print ""
	sys.exit(-1)
main_HDDS_file = JANA_GEOMETRY_URL[10:]
if not os.path.isfile(main_HDDS_file):
	print 'The HDDS xml file "%s"' % main_HDDS_file
	print '(obtained from JANA_GEOMETRY_URL environment variable)'
	print 'does not seem to exist. Double check the setting of'
	print 'JANA_GEOMETRY_URL'
	sys.exit(-1)

# Get short file name (without directory) of main_HDDS.xml file
main_HDDS_file_short = main_HDDS_file
hdds_dir = '.'
pos = string.rfind(main_HDDS_file, '/')
if pos>=0 :
	main_HDDS_file_short = main_HDDS_file[pos+1:]
	hdds_dir = main_HDDS_file[:pos]


# Make directory structure
try:
	os.mkdir('gluex_benchmark')
	os.mkdir('gluex_benchmark/lib')
	os.mkdir('gluex_benchmark/bin')
	os.mkdir('gluex_benchmark/plugins')
	os.mkdir('gluex_benchmark/work')
	os.mkdir('gluex_benchmark/resources')
	os.mkdir('gluex_benchmark/calib')
	os.mkdir('gluex_benchmark/root')
	os.mkdir('gluex_benchmark/root/etc')
	os.mkdir('gluex_benchmark/root/etc/plugins')
	os.mkdir('gluex_benchmark/batch')
	os.mkdir('gluex_benchmark/tmp')
except OSError:
	pass

# Define binary locations
hd_ana   = "%s/%s/bin/hd_ana" % (os.getenv("HALLD_HOME"), os.getenv("BMS_OSNAME"))
danarest = "%s/%s/plugins/danarest.so" % (os.getenv("HALLD_HOME"), os.getenv("BMS_OSNAME"))
rawevent = "%s/%s/plugins/rawevent.so" % (os.getenv("HALLD_HOME"), os.getenv("BMS_OSNAME"))
bggen    = "%s/%s/bin/bggen" % (os.getenv("HALLD_HOME"), os.getenv("BMS_OSNAME"))
hdgeant   = "%s/%s/bin/hdgeant" % (os.getenv("HALLD_HOME"), os.getenv("BMS_OSNAME"))
mcsmear  = "%s/%s/bin/mcsmear" % (os.getenv("HALLD_HOME"), os.getenv("BMS_OSNAME"))

# Find all dependencies for the executables and plugins
print "Finding library dependencies ..."
AddLibraries(hd_ana)
AddLibraries(danarest)
AddLibraries(rawevent)
AddLibraries(bggen)
AddLibraries(hdgeant)
AddLibraries(mcsmear)

# Look for dependencies of the dependencies iteratively
# until we have found them all
Nlibs = 0
while len(all_libs) > Nlibs:
	Nlibs = len(all_libs)
	tmp = all_libs
	for lib in tmp:
		AddLibraries(lib)

# Copy shared libraries
for lib in all_libs:
	print "copying %s ..." % lib
	res = subprocess.Popen(['cp', lib, 'gluex_benchmark/lib']).communicate()[0]

# Copy binaries
bins = [hd_ana, bggen, hdgeant, mcsmear]
for bin in bins:
	print "copying %s ..." % bin
	res = subprocess.Popen(['cp', bin, 'gluex_benchmark/bin']).communicate()[0]

# Copy plugins
plugins = [danarest, rawevent]
for plugin in plugins:
	print "copying %s ..." % plugin
	res = subprocess.Popen(['cp', plugin, 'gluex_benchmark/plugins']).communicate()[0]

# Copy ROOT plugins
rootplugins = ['TVirtualStreamerInfo']
for rootplugin in rootplugins:
	plugin = '%s/etc/plugins/%s' % (os.getenv('ROOTSYS','.'), rootplugin)
	print "copying %s ..." % plugin
	res = subprocess.Popen(['cp', '-r', plugin, 'gluex_benchmark/root/etc/plugins']).communicate()[0]

# Copy SQLite file
print "copying %s ..." % sqlitefile
res = subprocess.Popen(['cp', sqlitefile, 'gluex_benchmark/calib']).communicate()[0]

# Copy hdds directory
print "copying hdds geometry ..."
res = subprocess.Popen(['cp', '-r', hdds_dir, 'gluex_benchmark/hdds']).communicate()[0]
res = subprocess.Popen(['rm', '-rf', 'gluex_benchmark/hdds/.svn']).communicate()[0]


#================== Create test scripts ================== 
print "Creating test scripts ..."

#------- README
README ="""

README

generated: %s
 platform: %s
     host: %s
     user: %s

This file and the tarball containing it were generated by
the script:

https://halldsvn.jlab.org/repos/trunk/scripts/mk_benchmark_tarball.py


To run the benchmark, run "run_all_tests" from the directory
containing this README file:

> ./run_all_tests

The script will cd into the work directory and run the
programs from there. The output of each program is captured
in an output file named with a ".out" suffix. Upon successful
completion of all programs, a directory named "results" is
created (parallel to the "work" directory) and all of the
"*.out" files are copied there.

If the /usr/bin/time program is available on the system then
it is used to run the programs and the resource usage it gathers
is appended to the bottom of the output files.

This benchmarking is done using binaries that were compiled on
one system and then run on another. This means the system you
are benchmarking does not need to have any specific software
installed, not even a compiler. All shared libraries and the
dynamic linker are copied into the lib directory. This likely
includes libc and possibly other critical system libraries
the binaries will need to run. Calibration constants and
resource files are also included in the bundle. The resources
were obtained by running the full program set with a few events
on the host system so in principle the target system should
not need internet access.


contact:
David Lawrence  x5567
davidl@jlab.org


"""
t = time.strftime("%Y-%m-%d %H:%M:%S")
platform = os.getenv('BMS_OSNAME', 'unknown')
host = os.getenv('HOST', 'unknown')
user = os.getenv('USER','unknown')
WriteStringToFile("gluex_benchmark/README", README % (t, platform, host, user))

#------- run_all_tests
run_all_tests = """#!/bin/bash

export NTHREADS=$1
export PATH=$PWD/bin:$PATH
export LD_LIBRARY_PATH=$PWD/lib:$LD_LIBRARY_PATH
export JANA_PLUGIN_PATH=$PWD/plugins
export JANA_RESOURCE_DIR=$PWD/resources
export JANA_CALIB_URL=sqlite:///${PWD}/calib/%s
export JANA_GEOMETRY_URL=xmlfile://${PWD}/hdds/%s
export ROOTSYS=$PWD/root
export ld_linux_so=$PWD/lib/%s
export bindir=$PWD/bin

if [ -z "$NTHREADS" ]; then
	export NTHREADS=1
fi

# If /usr/bin/time is present, then use it to gather resource usage
export timer="/usr/bin/time --verbose"
if [ ! -f /usr/bin/time ]; then
	export timer=""
fi

# run programs using the linker from the orginal system
export prefix="$timer $ld_linux_so"


# Run programs in work directory
cd %s

echo "-------- Running bggen --------"
$prefix $bindir/bggen &> bggen.out
wait

echo "-------- Running hdgeant --------"
$prefix $bindir/hdgeant &> hdgeant.out
wait

echo "-------- Running mcsmear --------"
$prefix $bindir/mcsmear hdgeant.hddm &> mcsmear.out
wait

echo "-------- Running hd_ana (HDDM) --------"
$prefix $bindir/hd_ana --config=jana.conf -PNTHREADS=$NTHREADS hdgeant_smeared.hddm &> hd_ana_hddm.out
wait

echo "-------- Running hd_ana (generate EVIO) --------"
$prefix $bindir/hd_ana -PPLUGINS=rawevent hdgeant_smeared.hddm &> hd_ana_convert_to_evio.out
wait

echo "-------- Running hd_ana (EVIO) --------"
$prefix $bindir/hd_ana --config=jana.conf -PNTHREADS=$NTHREADS rawevent_000002.evio &> hd_ana_evio.out
wait


# Unset LD_LIBRARY_PATH since it likely includes 
# a libc version incompatible with the mkdir and cp
# commands below
unset LD_LIBRARY_PATH

# Copy all output files to "results" directory in parent
mkdir -p ../results
cp *.out ../results

# Record some info about the target system running this test
env > ../results/environment.txt
cat /proc/cpuinfo > ../results/cpuinfo
cat /proc/meminfo > ../results/meminfo
uname -a > ../results/platform
dmesg > ../results/dmesg

""" 
WriteStringToFile("gluex_benchmark/run_all_tests", run_all_tests % (sqlitefile_short, main_HDDS_file_short, ld_linux_so, 'work'), True)

#------- jana.conf
jana_conf = """
THREAD_TIMEOUT_FIRST_EVENT 300
THREAD_TIMEOUT 300
NTHREADS 1
PLUGINS danarest
"""
WriteStringToFile("gluex_benchmark/work/jana.conf", jana_conf)

#------- fort.15
fort_15 = """
LIST
C
C ===    INPUT file for BGGEN
C
TRIG     %d         number of events to simulate
C                       We expect 395kHz of hadronic rate at high luminosity
C -- writing out events
C        HDDM  simple  ntuple
WROUT      1      0     0   

NPRIEV   100            number of events to print
EPHLIM   0.15 12.       energy range in GeV

RNDMSEQ    0            random number sequence     integer values

EELEC     12.           electron beam energy
EPEAK      9.           coherent peak energy
ZCOLLIM   7600.         distance to the collimator in cm

EPYTHMIN     3.         minimal energy for PYTHIA simulation

RUNNO     2             specify run number

STOP
"""
WriteStringToFile("gluex_benchmark/work/fort.15", fort_15 % Nevents)

#------- pythia.dat
bggendir = "%s/src/programs/Simulation/bggen/run" % os.getenv("HALLD_HOME")
res = subprocess.Popen(['cp', '%s/pythia.dat' % bggendir, 'gluex_benchmark/work']).communicate()[0]
#------- pythia-geant.map
res = subprocess.Popen(['cp', '%s/pythia-geant.map' % bggendir, 'gluex_benchmark/work']).communicate()[0]
#------- particle.dat
res = subprocess.Popen(['cp', '%s/particle.dat' % bggendir, 'gluex_benchmark/work']).communicate()[0]


#------- control.in
str = """
INFILE 'bggen.hddm'
TRIG %d
OUTFILE 'hdgeant.hddm'

BEAM 12. 9.
BGRATE 1.10
BGGATE -200. 800.

RNDM 121
CUTS 1e-4 1e-4 1e-3 1e-3 1e-4
SWIT 0 0 0 0 0 0 0 0 0 0
GELH  1     0.2     1.0     4     0.160
CKOV 1
LABS 1
ABAN 0
DEBU 1 10 1000
SAVEHITS  0
NOSECONDARIES 0
SHOWERSINCOL 0
DRIFTCLUSTERS 0

END
"""
WriteStringToFile("gluex_benchmark/work/control.in", str % Nevents)


#------- batch/README
str = """

This directory contains files that can be used to submit a job to the 
JLab farm to run the benchmark. Note that this may not be terribly
accurate since it will depend on the number of jobs already running
on the node. If you want to ensure at least one physical core is
dedicated to the job, then set the value of the "core" attribute of
the CPU tag in the farm_benchmark.xml file to be (Nslots-Ncores-1)
where Nslots is the number of job slots expected on the target
farm node, Ncores is the number of physical cores on the node. For
example, the farm14 nodes have 42 job slots and 24 physical cores.
Thus, the relevant line in farm_benchmark.xml should look like this:

<CPU core="17"/>

You can check the number of slots on a node type here:

http://scicomp.jlab.org/scicomp/#/operations/nodes

You can try and guess the number of physical cores two ways:
1.) by assuming either a 3/4 or 7/8 model was used to calculate the
    number of slots. e.g. Ncores(1 + 3/4) = 42  --> Ncores = 42/1.75 = 24
	
2.) Looking at a node in ganglia (link below) and clicking on
    "Host Overview". In the case of farm14, the "cpu_num" value
	includes hyper threads so you have to divide by 2. (This is
	probably always the case.)


The farm job works by copying the entire gluex_benchmark.tgz file to
the node and unpacking it there before use. Thus any changes made 
need to be tarred back up into the tarball before submitting the job.
Here are the steps you need to take to submit the farm job:

1. Create a working directory for the input/output of the jobs:

 > setenv workdir /work/halld/home/$USER/benchmark_results
 > mkdir -p $workdir
 > cd $workdir

2. Upack the gluex_benchmark.tgz file into $workdir and modify
   the file: 
   
	   gluex_benchmark/batch/farm_benchmark.xml

   - Change the input_dir_base and output_dir_base variables to
	 be the value of $workdir
   - Change the Email address
   - Change the CPU tag to have the correct "core" attribute as
     described above
   - Change the OS tag to the type of node you want to test
   - Change any other values you think appropriate

3. Recreate the gluex_benchmark tarball:

  > tar czf gluex_benchmark.tgz gluex_benchmark
  
  n.b. Do NOT delete the gluex_benchmark directory after regenerating
  the tarball. The job needs to use the files:

   $workdir/gluex_benchmark/batch/farm_benchmark.sh
   $workdir/gluex_benchmark/batch/farm_benchmark.xml


4. Submit the farm job

  > jsub -xml $workdir/gluex_benchmark/batch/farm_benchmark.xml


This file was generated automatically by the mk_benchmark_tarball.py script.

contact:
David Lawrence  x5567
davidl@jlab.org
"""
WriteStringToFile("gluex_benchmark/batch/README", str)


#------- batch/farm_benchmark.sh
str = """
#!/bin/csh -f

export NTHREADS=$1
if [ -z "$NTHREADS" ]; then
	export NTHREADS=1
fi

echo "starting........ "
date

echo "Unpacking gluex_benchmark.tgz"
tar xzf gluex_benchmark.tgz
cd gluex_benchmark

echo "working dir = "$PWD

echo "starting run_all_tests script ............"
date
./run_all_tests $NTHREADS
echo "done ............."
date

tar czf ../results.tgz results

echo "ending job ............."
"""
WriteStringToFile("gluex_benchmark/batch/farm_benchmark.sh", str, True)


#------- batch/farm_benchmark.xml
str = """
<Request>

	<Variable name="input_dir_base"  value="/work/halld/home/davidl/2015.01.20.farm14_node_benchmark"/>
	<Variable name="output_dir_base" value="/work/halld/home/davidl/2015.01.20.farm14_node_benchmark"/>

	<Email email="davidl@jlab.org" request="false" job="false"/>
	<Project name="gluex"/>
	<Track name="simulation"/>
	<TimeLimit unit="minutes" time="240"/>
	<DiskSpace space="10" unit="GB"/>
	<Memory space="3" unit="GB"/>
	<CPU core="1"/>
	<OS name="centos65"/>

	<Input src="${input_dir_base}/gluex_benchmark/batch/farm_benchmark.sh"   dest="farm_benchmark.sh"/>
	<Input src="${input_dir_base}/gluex_benchmark.tgz" dest="gluex_benchmark.tgz"/>

	<Job>
		<Name name="gluex_benchmark"/>
		<Command>./farm_benchmark.sh</Command>
		<Output src="results.tgz" dest="${output_dir_base}/results.tgz"/>
		<Stdout dest="${output_dir_base}/log/stdout.out"/>
		<Stderr dest="${output_dir_base}/log/stderr.err"/>
	</Job>

</Request>
"""
WriteStringToFile("gluex_benchmark/batch/farm_benchmark.xml", str)


#------- batch/farm_benchmark_multithread.xml
str = """
<Request>

	<Variable name="input_dir_base"  value="/work/halld/home/davidl/2015.01.20.farm14_node_benchmark"/>
	<Variable name="output_dir_base" value="/work/halld/home/davidl/2015.01.20.farm14_node_benchmark"/>

	<Email email="davidl@jlab.org" request="false" job="false"/>
	<Project name="gluex"/>
	<Track name="simulation"/>
	<TimeLimit unit="minutes" time="240"/>
	<DiskSpace space="10" unit="GB"/>
	<Memory space="6" unit="GB"/>
	<CPU core="1"/>
	<OS name="centos65"/>

	<Input src="${input_dir_base}/gluex_benchmark/batch/farm_benchmark.sh"   dest="farm_benchmark.sh"/>
	<Input src="${input_dir_base}/gluex_benchmark.tgz" dest="gluex_benchmark.tgz"/>

	<List name="nthreads">
		 1  2  3  4  5  6  7  8  9 10
		11 12 13 14 15 16 17 18 19 20
		21 22 23 24 25 26 27 28 29 30
		31 32 33 34 35 36 37 38 39 40
		41 42 43 44 45 46 47 48 49 50
	</List>

	<ForEach list="nthreads">
		<Job>
			<Name name="gluex_benchmark_${nthreads}threads"/>
			<Command>./farm_benchmark.sh ${nthreads}</Command>
			<Output src="results.tgz" dest="${output_dir_base}/results_${nthreads}threads.tgz"/>
			<Output src="gluex_benchmark/results/hd_ana_hddm.out" dest="${output_dir_base}/${nthreads}threads_hddm.out"/>
			<Output src="gluex_benchmark/results/hd_ana_evio.out" dest="${output_dir_base}/${nthreads}threads_evio.out"/>
			<Stdout dest="${output_dir_base}/log/stdout_${nthreads}.out"/>
			<Stderr dest="${output_dir_base}/log/stderr_${nthreads}.err"/>
		</Job>
	</ForEach>

</Request>
"""
WriteStringToFile("gluex_benchmark/batch/farm_benchmark_multithread.xml", str)


# Copy all of the work directory files into the tmp directory, but
# overwrite the fort.15 file to give it less events
print "Copying config files into tmp directory ..."
res = subprocess.Popen(['cp', '-r', 'gluex_benchmark/work', 'gluex_benchmark/tmp']).communicate()[0]
WriteStringToFile("gluex_benchmark/tmp/work/fort.15", fort_15 % 10);
WriteStringToFile("gluex_benchmark/run_all_tests_tmp", run_all_tests % (sqlitefile_short, main_HDDS_file_short, ld_linux_so, 'tmp/work'), True);

# Run all tests in the tmp directory in order to download all resources 
# into the resource directory.
print "Running minimal events in order to download resources"
os.chdir('gluex_benchmark')
res = subprocess.Popen(['./run_all_tests_tmp']).communicate()[0]
os.chdir('../')


print "Bundling..."
res = subprocess.Popen(['rm', '-rf', 'gluex_benchmark/tmp']).communicate()[0]
res = subprocess.Popen(['rm', '-rf', 'gluex_benchmark/run_all_tests_tmp']).communicate()[0]
res = subprocess.Popen(['tar', 'czf', 'gluex_benchmark.tgz', 'gluex_benchmark']).communicate()[0]
res = subprocess.Popen(['rm', '-rf', 'gluex_benchmark']).communicate()[0]