#!/usr/bin/env python # # $Id:$ # # hd_raidmonitor.py # # This script will check the remaining disk space on the # RAID disk on the gluon cluster and set the EPICS variable # HD:coda:daq:availableRAID to the number of TB remaining. The # EPICS alarm system is then used to raise an alarm if the # value drops below a certain level. This script will loop # infinitely in order to keep this value updated by checking # every 2 seconds. The heartbeat variable HD:coda:daq:heartbeat will # also be updated so the EPICS system can know that this program # is still reporting. # # This file is kept under version control at the following location: # # https://halldsvn.jlab.org/repos/trunk/online/daq/tools/raidutils/hd_raidmonitor.py # # # It should be run on gluonraid1 regardless of where the ER is # run. This allows us to know where to look for it and to ensure # only one of these is running. It will observe other disks via # NFS. # # This can be started using the procServ program so that it will # be automatically restarted if the process should die. It is # currently not set up to start automatically on reboot so one # should start it "by hand" from the hdops account on gluonraid1 # like this: # # procServ -n "DAQ RAID Monitor" 26001 /gluex/builds/devel/Linux_RHEL6-x86_64-gcc4.9.2/bin/hd_raidmonitor.py # import os import sys import time import getopt from epics import caget, caput MONITOR_DIR = '/gluex/data/rawdata/curr' # directory to use for checking the remaining disk space VERBOSE = False PERIOD = 2 # seconds between checks (and heartbeats) EPICS_VAR = 'HD:coda:daq:availableRAID' EPICS_HEARTBEAT = 'HD:coda:daq:heartbeat' def Usage(err): print 'Usage:' print ' hd_raidmonitor.py [-v] [-p period_secs] [-d directory]' print ' ' sys.exit(err) # Parse command line arguments try: opts, args = getopt.getopt(sys.argv[1:], 'p:d:vh', ['pause=','dir=','verbose','help']) except getopt.GetoptError: Usage(2) for opt,arg in opts: if opt in ("-h", "--help") : Usage(0) if opt in ("-p", "--pause") : PERIOD = int(arg) if opt in ("-d", "--dir") : MONITOR_DIR = arg if opt in ("-v", "--verbose") : VERBOSE = True # Loop forever first_iteration = True heartbeat = 0 while True: # Make sure MONITOR_DIR exists if not os.path.exists(MONITOR_DIR) : print '-------------------------------------' print 'path "%s" does not exist!' % MONITOR_DIR print 'hd_raidmonitor.py exiting ..' print '-------------------------------------' break # Get remaining disk space s = os.statvfs(MONITOR_DIR) Nbytes = float(s.f_bavail * s.f_frsize) NTB = Nbytes/1024/1024/1024/1024 # Print info only on first iteration if first_iteration : print '-------------------------------------' print 'hd_raidmonitor.py starting' print '' print ' VERBOSE: %s' % VERBOSE print ' PERIOD: %d' % PERIOD print ' MONITOR_DIR: %s' % MONITOR_DIR print ' EPICS PV names: %s' % EPICS_VAR print ' %s' % EPICS_HEARTBEAT print 'available space: %6.3f TB' % NTB print '-------------------------------------' first_iteration = False # Write remaining space to EPICS caput(EPICS_VAR, NTB) # Update heartbeat myheartbeat = caget(EPICS_HEARTBEAT) if myheartbeat == None: print 'Failed to read hearbeat value from EPICS variable: %s setting to %d' % (EPICS_HEARTBEAT, heartbeat) else: heartbeat = myheartbeat heartbeat += 1 if heartbeat>1: heartbeat = 0 caput(EPICS_HEARTBEAT, heartbeat) if VERBOSE : print '%s : %6.3f TB' % (EPICS_VAR, NTB) # Sleep time.sleep(PERIOD)