#!/usr/bin/env python # # # This script will walk the active, staging, and volatile directory trees # on all of the raider servers/partitions in order to summarize the current # disk usage. It will report the effective space available by adding the # available space reported by the file system to the space used by the # volatile directories which may be cleared if needed. Space in the staging # directories is subtracted from the volatile space since it is assumed those # file are hardlinks pointing to both places. # # It's best to run this on one of the raid servers themselves so at least some # paritions can be accessed locally. gluonraid4 or gluonraid2 are good choices. # # > ./raid_summary.py # import os def GetDirSizeBytes(path): total_size = 0 seen = set() print 'Calculating size of ' + path for dirpath, dirnames, filenames in os.walk(path): for f in filenames: fp = os.path.join(dirpath, f) try: stat = os.stat(fp) except OSError: continue if stat.st_ino in seen:continue seen.add(stat.st_ino) total_size += stat.st_size return total_size # size in bytes # # def GetDirSizeBytes(path): # # The following one-liner taken from SO: https://stackoverflow.com/questions/1392413/calculating-a-directorys-size-using-python # # It was the answer from Martyn Bristow # print 'Calculating size of ' + path # return sum([sum(map(lambda fname: os.path.getsize(os.path.join(directory, fname)), files)) for directory, folders, files in os.walk(path)]) # # for directory, folders, files in os.walk(path): # fullpath = os.path.join(directory, fname) # if os.path.exists(fullpath): dir_sizes = { 'gluonraid1':{}, 'gluonraid2':{}, 'gluonraid3':{}, 'gluonraid4':{} } for dtype in ['active', 'staging', 'volatile']: if dtype not in dir_sizes['gluonraid1'].keys(): dir_sizes['gluonraid1'][dtype] = [] if dtype not in dir_sizes['gluonraid2'].keys(): dir_sizes['gluonraid2'][dtype] = [] if dtype not in dir_sizes['gluonraid3'].keys(): dir_sizes['gluonraid3'][dtype] = [] if dtype not in dir_sizes['gluonraid4'].keys(): dir_sizes['gluonraid4'][dtype] = [] dir_sizes['gluonraid1'][dtype].append( GetDirSizeBytes('/gluonraid1/rawdata/'+dtype) ) dir_sizes['gluonraid2'][dtype].append( GetDirSizeBytes('/gluonraid2/rawdata/'+dtype) ) for ipart in range(1,5): dir_sizes['gluonraid3'][dtype].append( GetDirSizeBytes(('/gluonraid3/data%d/rawdata/' % ipart)+dtype) ) dir_sizes['gluonraid4'][dtype].append( GetDirSizeBytes(('/gluonraid4/data%d/rawdata/' % ipart)+dtype) ) for (server, tvals) in dir_sizes.iteritems(): for (dtype, vals) in tvals.iteritems(): tot_size = sum(vals) print server + (':%8s' % dtype) + ' - %4.1fTB' % (tot_size/1E12) # Calculate available space on gluonraid3 and gluonraid4. # The available space is calculated from the free space on # the disk plus anything in volatile that is not in staging. # It's assumed everything in staging is also in volatile. # We sum over all partitions on the server. partition_sizes = { 'gluonraid1':{}, 'gluonraid2':{}, 'gluonraid3':{}, 'gluonraid4':{} } for server in partition_sizes.keys(): partition_sizes[server] = {'size':[], 'avail':[]} if server=='gluonraid1' or server=='gluonraid2': statvfs = os.statvfs( '/%s' % server ) partition_sizes[server]['size' ].append( statvfs.f_frsize * statvfs.f_blocks ) partition_sizes[server]['avail'].append( statvfs.f_frsize * statvfs.f_bavail ) else: for ipart in range(1,5): statvfs = os.statvfs( '/%s/data%d' % (server,ipart) ) partition_sizes[server]['size' ].append( statvfs.f_frsize * statvfs.f_blocks ) partition_sizes[server]['avail'].append( statvfs.f_frsize * statvfs.f_bavail ) gluonraid1_size = sum(partition_sizes['gluonraid1']['size']) gluonraid1_avail = sum(partition_sizes['gluonraid1']['avail']) gluonraid1_deletable = sum(dir_sizes['gluonraid1']['volatile']) - sum(dir_sizes['gluonraid1']['staging']) gluonraid2_size = sum(partition_sizes['gluonraid2']['size']) gluonraid2_avail = sum(partition_sizes['gluonraid2']['avail']) gluonraid2_deletable = sum(dir_sizes['gluonraid2']['volatile']) - sum(dir_sizes['gluonraid2']['staging']) gluonraid3_size = sum(partition_sizes['gluonraid3']['size']) gluonraid3_avail = sum(partition_sizes['gluonraid3']['avail']) gluonraid3_deletable = sum(dir_sizes['gluonraid3']['volatile']) - sum(dir_sizes['gluonraid3']['staging']) gluonraid4_size = sum(partition_sizes['gluonraid4']['size']) gluonraid4_avail = sum(partition_sizes['gluonraid4']['avail']) gluonraid4_deletable = sum(dir_sizes['gluonraid4']['volatile']) - sum(dir_sizes['gluonraid4']['staging']) tot_avail_gluonraid1 = gluonraid1_avail + gluonraid1_deletable tot_avail_gluonraid2 = gluonraid2_avail + gluonraid2_deletable tot_avail_gluonraid3 = gluonraid3_avail + gluonraid3_deletable tot_avail_gluonraid4 = gluonraid4_avail + gluonraid4_deletable print '' print 'Summary (n.b. "available" includes volatile)' print '------------------------------------------------------------------' print 'Total available space gluonraid1: %4.1fTB (%3.1f%%)' % (tot_avail_gluonraid1/1E12 , 100.0*tot_avail_gluonraid1/gluonraid1_size) print 'Total available space gluonraid2: %4.1fTB (%3.1f%%)' % (tot_avail_gluonraid2/1E12 , 100.0*tot_avail_gluonraid2/gluonraid2_size) print 'Total available space gluonraid3: %4.1fTB (%3.1f%%)' % (tot_avail_gluonraid3/1E12 , 100.0*tot_avail_gluonraid3/gluonraid3_size) print 'Total available space gluonraid4: %4.1fTB (%3.1f%%)' % (tot_avail_gluonraid4/1E12 , 100.0*tot_avail_gluonraid4/gluonraid4_size)