pulser_rates.py

import ROOT
import os
import json
import numpy as np
import pandas as pd
import argparse
from matplotlib import pyplot as plt
from ROOT import gROOT, gSystem, gInterpreter, gStyle, gPad
from plot_utils import prepare_canvas, prepare_figure, my_style, get_hist_contents, nthreads

ROOT.ROOT.EnableImplicitMT(nthreads)

# make sure the relative path for root loading is correct
owd = os.getcwd()
script_dir = os.path.dirname(os.path.realpath(__file__))
os.chdir(os.path.join(script_dir, '..'))
print('working directory is ' + os.getcwd())

# batch mode
gROOT.SetBatch(True)
# rootlogon macro
gROOT.Macro('rootlogon.C')

# the functions that process data are defined in scripts/root/dataframe_analysis.C
func_code = open(os.path.join(script_dir, 'root', 'dataframe_analysis.C')).read()
gInterpreter.Declare(func_code)

# global style
my_style.cd()
bprops = dict(boxstyle='round', facecolor='wheat', alpha=0.2)

# argument parser
parser = argparse.ArgumentParser('ECal peak counts')
parser.add_argument('root_file', help='root file output from analyzer')
parser.add_argument('--layout', dest='layout', default=os.path.join(script_dir, '..', 'database/channels_layout.json'),
                    help='json data for channels layout')
parser.add_argument('--groups', dest='groups', default='Calorimeter', type=str, help='channel groups, separated by \",\"')
parser.add_argument('--start', dest='start', default=-1, type=int, help='start sample number')
parser.add_argument('--end', dest='end', default=-1, type=int, help='end sample number')
parser.add_argument('--thres', dest='thres', default='100', type=str, help='threshold for peaks, separated by \",\"')
parser.add_argument('--output', dest='output', default='rates.csv', type=str, help='output path')
args = parser.parse_args()

# recover paths
for attr in ['root_file', 'layout']:
    setattr(args, attr, os.path.join(owd, getattr(args, attr)))

# root dataframe
rdf = ROOT.RDataFrame('EvTree', args.root_file)
start = args.start if args.start > 0 else 0
end = args.end if args.end > 0 else 63
time = rdf.Count().GetValue()*(end - start + 1)*4*1e-9

with open(args.layout, 'r') as f:
    channels_layout = json.load(f)

channels = []
for grp in args.groups.split(','):
    channels += sorted([c for ch in channels_layout[grp.strip()] for c in ch])

data = []
for thres in args.thres.split(','):
    th = float(thres.strip())
    tdf = rdf
    for ch in channels:
        tdf = tdf.Define('{}_npeaks'.format(ch), 'count_peaks({}.peaks, {}, {}, {})'.format(ch, start, end, th))
        counts = tdf.Sum('{}_npeaks'.format(ch)).GetValue()
        # print('(\'{}\', {:.4f}, {:.4f}),'.format(ch, counts/time/1e3, np.sqrt(counts)/time/1e3))
        data.append((ch, counts/time/1e3, np.sqrt(counts)/time/1e3, th))

pd.DataFrame(columns=['ch', 'rate', 'err', 'thres'], data=data).to_csv(args.output, index=False)