Skip to content
Snippets Groups Projects
Commit e0bb61b1 authored by Chao Peng's avatar Chao Peng
Browse files

add analysis script for ecal benchmarks

parent 5fae2c48
No related branches found
No related tags found
1 merge request!122add analysis script for ecal benchmarks
......@@ -21,98 +21,77 @@ def load_root_macros(arg_macros):
print('Loading root macros: \'{}\' does not exist, skip loading it.'.format(path))
def thrown_particles_figure(df, save, mcbranch="mcparticles"):
# read from RDataFrame and flatten a given collection, return pandas dataframe
def flatten_collection(rdf, collection, cols=None):
if not cols:
cols = [str(c) for c in rdf.GetColumnNames() if str(c).startswith('{}.'.format(collection))]
else:
cols = ['{}.{}'.format(collection, c) for c in cols]
data = rdf.AsNumpy(cols)
# flatten the data, add an event id to identify clusters from different events
evns = []
for i, vec in enumerate(data[cols[0]]):
evns += [i]*vec.size()
for n, vals in data.items():
data[n] = np.asarray([v for vec in vals for v in vec])
# build data frame
dfp = pd.DataFrame(columns=cols, data=np.vstack(list(data.values())).T)
dfp.loc[:, 'event'] = evns
return dfp
def thrown_particles_figure(rdf, save, mcbranch="mcparticles"):
# define truth particle info
df = df.Define('isThrown', '{}.genStatus == 1'.format(mcbranch))\
.Define('thrownParticles', '{}[isThrown]'.format(mcbranch))\
.Define('thrownP', 'fourvec(thrownParticles)')\
.Define('thrownPID', 'pid(thrownParticles)')\
.Define('thrownEta', 'eta(thrownParticles)')\
.Define('thrownTheta', 'theta(thrownP)')\
.Define('thrownMomentum', 'momentum(thrownP)')
df = flatten_collection(rdf, mcbranch, ['genStatus', 'pdgID', 'psx', 'psy', 'psz', 'mass'])
df.rename(columns={c: c.replace(mcbranch + '.', '') for c in df.columns}, inplace=True)
# select thrown particles
df = df[df['genStatus'] == 1]
# figure
fig, axs = plt.subplots(2, 2, figsize=(16, 12), dpi=120)
# pid info need some special treatment
pids = df.AsNumpy(['thrownPID'])['thrownPID']
# unpack vectors
pids = np.asarray([v for vec in pids for v in vec], dtype=int)
# build a dictionary for particle id and particle name
# convert pid to particle names
pdgbase = ROOT.TDatabasePDG()
pdict = {pid: pdgbase.GetParticle(int(pid)).GetName() for pid in np.unique(pids)}
pmap = {pid: i*2 for i, pid in enumerate(list(pdict))}
# convert pid to index in dictionary
pmaps = [pmap[i] for i in pids]
get_pname = np.vectorize(lambda pid: pdgbase.GetParticle(int(pid)).GetName())
# enumerate particle names
df.loc[:, 'pname'] = get_pname(df['pdgID'].values)
penum = {pname: i for i, pname in enumerate(df['pname'].unique())}
df.loc[:, 'pname_id'] = df['pname'].map(penum)
ax = axs.flat[0]
ax.hist(pmaps, bins=np.arange(-4, len(pmap)*2 + 4), align='left', ec='k')
ax.hist(df['pname_id'].values, bins=np.arange(-4, len(penum)*2 + 4), align='left', ec='k')
ax.set_ylabel('Particle Counts', fontsize=24)
ax.tick_params(labelsize=22)
ax.set_axisbelow(True)
ax.grid(linestyle=':', which='both', axis='y')
ax.xaxis.set_major_locator(ticker.FixedLocator(list(pmap.values())))
ax.set_xticklabels(list(pdict.values()))
# kinematics
data = df.AsNumpy(['thrownMomentum', 'thrownTheta', 'thrownEta'])
labels = {
'thrownMomentum': (r'$p$ (GeV)', 'Counts'),
'thrownTheta': (r'$\theta$ (degree)', 'Counts'),
'thrownEta': (r'$\eta$', 'Counts'),
}
for ax, (name, vals) in zip(axs.flat[1:], data.items()):
hvals = [v for vec in vals for v in vec]
ax.hist(hvals, bins=50, ec='k')
ax.tick_params(labelsize=22, direction='in', which='both')
ax.grid(linestyle=':', which='both')
label = labels[name]
ax.set_axisbelow(True)
ax.set_xlabel(label[0], fontsize=24)
ax.set_ylabel(label[1], fontsize=24)
fig.text(0.5, 0.95, 'Thrown Particles', ha='center', fontsize=24)
fig.savefig(save)
plt.close(fig)
def general_clusters_figure(df, collection, save, min_nhits=3):
data = df.AsNumpy([
'{}.nhits'.format(collection),
'{}.energy'.format(collection),
'{}.polar.theta'.format(collection),
'{}.polar.phi'.format(collection),
])
# build a dataframe
evns = []
for i, vec in enumerate(data['{}.nhits'.format(collection)]):
evns += [i]*vec.size()
for n, vals in data.items():
data[n] = np.asarray([v for vec in vals for v in vec])
dfp = pd.DataFrame(columns=['nhits', 'edep', 'theta', 'phi'],
data=np.vstack(list(data.values())).T)
dfp.loc[:, 'evn'] = evns
# select the max. energy cluster for each event
dfc = dfp.loc[dfp.groupby('evn')['edep'].idxmax()]
dfc = dfc.loc[dfc['nhits'] >= min_nhits]
# figure
fig, axs = plt.subplots(2, 2, figsize=(16, 12), dpi=120)
labels = [
('Number of Hits', 'Counts'),
(r'$E$ (GeV)', 'Counts'),
(r'$\theta$ (rad)', 'Counts'),
(r'$\phi$ (rad)', 'Counts'),
ax.xaxis.set_major_locator(ticker.FixedLocator(list(penum.values())))
ax.set_xticklabels(list(penum), rotation=30)
# calculate kinematics
get_4vecs = np.vectorize(lambda x, y, z, m: ROOT.Math.PxPyPzMVector(x, y, z, m))
fourvecs = get_4vecs(*df[['psx', 'psy', 'psz', 'mass']].values.T)
df.loc[:, 'p'] = [v.P() for v in fourvecs]
df.loc[:, 'eta'] = [v.Eta() for v in fourvecs]
df.loc[:, 'pt'] = [v.Pt() for v in fourvecs]
# column name, x label, y label, bins
plots = [
('p', r'$P$ (GeV)', 'Counts', 50),
('eta', r'$\eta$', 'Counts', 50),
('pt', r'$P_T$ (GeV)', 'Counts', 50),
]
for ax, label, vals in zip(axs.flat, labels, dfc[['nhits', 'edep', 'theta', 'phi']].values.T):
ax.hist(vals, bins=50, ec='k')
for ax, (col, xlabel, ylabel, bins) in zip(axs.flat[1:], plots):
ax.hist(df[col].values, bins=bins, align='left', ec='k')
ax.tick_params(labelsize=22, direction='in', which='both')
ax.grid(linestyle=':', which='both')
ax.set_axisbelow(True)
ax.set_xlabel(label[0], fontsize=24)
ax.set_ylabel(label[1], fontsize=24)
ax.set_xlabel(xlabel, fontsize=24)
ax.set_ylabel(ylabel, fontsize=24)
fig.text(0.5, 0.95, collection, ha='center', fontsize=24)
fig.text(0.5, 0.95, 'Thrown Particles', ha='center', fontsize=24)
fig.savefig(save)
plt.close(fig)
return dfp
if __name__ == '__main__':
......@@ -132,30 +111,67 @@ if __name__ == '__main__':
print('CPU available {}, using {}'.format(os.cpu_count(), nthreads))
# declare some functions from c++ script, needed for data frame processing
script_dir = os.path.dirname(os.path.realpath(__file__))
fcode = open(os.path.join(script_dir, 'barrel_clusters.cxx')).read()
ROOT.gInterpreter.Declare(fcode)
# script_dir = os.path.dirname(os.path.realpath(__file__))
# fcode = open(os.path.join(script_dir, 'barrel_clusters.cxx')).read()
# ROOT.gInterpreter.Declare(fcode)
os.makedirs(args.outdir, exist_ok=True)
# load macros (add libraries/headers root cannot automatically found here)
load_root_macros(args.macros)
rdf_sim = ROOT.RDataFrame('events', args.sim_file)
rdf_rec = ROOT.RDataFrame('events', args.rec_file)
thrown_particles_figure(rdf_sim, save=os.path.join(args.outdir, 'thrown_particles.png'), mcbranch=args.mc)
general_clusters_figure(rdf_rec, collection='EcalEndcapNClusters', min_nhits=10,
save=os.path.join(args.outdir, 'ecal_electron_endcap_clusters.png'))
general_clusters_figure(rdf_rec, collection='EcalEndcapPClusters', min_nhits=5,
save=os.path.join(args.outdir, 'ecal_hadron_endcap_clusters.png'))
general_clusters_figure(rdf_rec, collection='EcalBarrelClusters',
save=os.path.join(args.outdir, 'ecal_barrel_clusters.png'))
general_clusters_figure(rdf_rec, collection='HcalElectronEndcapClusters', min_nhits=5,
save=os.path.join(args.outdir, 'hcal_electron_endcap_clusters.png'))
general_clusters_figure(rdf_rec, collection='HcalHadronEndcapClusters', min_nhits=10,
save=os.path.join(args.outdir, 'hcal_hadron_endcap_clusters.png'))
general_clusters_figure(rdf_rec, collection='HcalBarrelClusters',
save=os.path.join(args.outdir, 'hcal_barrel_clusters.png'))
rdf_rec = ROOT.RDataFrame('events', args.rec_file)
# cluster collections
colls = [
'EcalEndcapNClusters',
'EcalEndcapPClusters',
'EcalBarrelClusters',
'HcalElectronEndcapClusters',
'HcalHadronEndcapClusters',
'HcalBarrelClusters',
]
# a function to extract eta
ROOT.gInterpreter.Declare('''
template<typename T>
ROOT::VecOps::RVec<T> eta(const ROOT::VecOps::RVec<T> &theta) {
ROOT::VecOps::RVec<double> result;
for (size_t i = 0; i < theta.size(); ++ i) { result.push_back(-std::log(std::tan(theta[i]/2.))); }
return result;
}
''')
# branch name, x label, x bins
plots = [
('nhits', 'Number of Hits', 50),
('energy', 'Energy (GeV)', 50),
('eta', '$\eta$', 50),
]
for coll in colls:
df = flatten_collection(rdf_rec, coll)
if not df.shape[0]:
print('{} do not have valid entries, skip it'.format(coll))
continue
df.rename(columns={c: c.replace(coll + '.', '') for c in df.columns}, inplace=True)
# calculate eta
if 'eta' not in df.columns:
df.loc[:, 'eta'] = -np.log(np.tan(df['polar.theta'].values/2.))
fig, axs = plt.subplots(2, 2, figsize=(12, 8), dpi=160)
ncl = df.groupby('event')['clusterID'].nunique().values
axs[0][0].hist(ncl, weights=np.repeat(1./float(ncl.shape[0]), ncl.shape[0]),
bins=np.arange(0, 10), align='mid', ec='k')
axs[0][0].set_xlabel('Number of Clusters', fontsize=16)
for ax, (branch, xlabel, bins) in zip(axs.flat[1:], plots):
ax.hist(df[branch].values, bins=bins, align='mid', ec='k')
ax.set_xlabel(xlabel, fontsize=16)
# universal axis settings
for ax in axs.flat:
ax.tick_params(labelsize=16, direction='in')
ax.set_axisbelow(True)
ax.grid(linestyle=':')
fig.text(0.05, 0.5, 'Normalized Counts', rotation=90, va='center', fontsize=18)
fig.savefig(os.path.join(args.outdir, '{}.png'.format(coll)))
......@@ -11,12 +11,24 @@ import subprocess
import argparse
# type: option files, analysis files
# type: option files, analysis files, cluster collections
default_type = {
'endcap_e': [['endcap_e.py'], []],
'endcap_i': [['endcap_i.py'], []],
'barrel': [['barrel.py'], []],
# 'all': [['all_ecal.py'], []],
'endcap_e': [
['endcap_e.py'],
['draw_cluters.py'],
['EcalEndcapNClusters']],
'endcap_i': [
['endcap_i.py'],
['draw_cluters.py'],
['EcalEndcapPClusters']],
'barrel': [
['barrel.py'],
['draw_cluters.py'],
['EcalBarrelClusters']],
# 'all': [
# ['all_ecal.py'],
# ['draw_cluters.py'],
# ['EcalEndcapNClusters', 'EcalEndcapPClusters', 'EcalBarrelClusters']],
}
default_compact = os.path.join(os.environ.get('JUGGLER_DETECTOR_PATH', os.environ.get('DETECTOR_PATH', '')),
......@@ -43,7 +55,7 @@ kwargs = vars(args)
if args.run_type not in default_type:
print('ERROR unsupported run type {}, must in {}'.format(args.run_type, list(default_type.keys())))
exit(1)
opt_scripts, ana_scripts = default_type[args.run_type]
opt_scripts, ana_scripts, collections = default_type[args.run_type]
gen_file = 'gen_{nametag}_{pmin}_{pmax}.hepmc'.format(**kwargs)
sim_file = 'sim_{nametag}_{pmin}_{pmax}.root'.format(**kwargs)
......@@ -72,7 +84,6 @@ if 'sim' in procs:
'-v', 'WARNING']
if args.seed > 0:
sim_cmd += ['--random.seed', args.seed]
print(sim_cmd)
return_code = subprocess.run(sim_cmd).returncode
if return_code is not None and return_code < 0:
print("ERROR running simulation!")
......@@ -101,7 +112,9 @@ if 'rec' in procs:
if 'ana' in procs:
os.makedirs('results', exist_ok=True)
for ana in ana_scripts:
ana_cmd = ['python', os.path.join(sdir, 'scripts', ana), rec_file, '-o', 'results']
ana_cmd = ['python', os.path.join(sdir, 'scripts', ana), rec_file,
'--collections', ','.join(collections),
'-o', 'results']
return_code = subprocess.run(ana_cmd).returncode
if return_code is not None and return_code < 0:
print('ERROR running analysis ({})!'.format(ana))
......
'''
A simple analysis script to extract some basic info of clusters
Chao Peng (ANL), 07/08/2021
'''
import os
import ROOT
import pandas as pd
import numpy as np
import argparse
from matplotlib import pyplot as plt
import matplotlib.ticker as ticker
# load root macros, input is an argument string
def load_root_macros(arg_macros):
for path in arg_macros.split(','):
path = path.strip()
if os.path.exists(path):
gROOT.Macro(path)
print('Loading root macro: \'{}\' loaded.'.format(path))
else:
print('Loading root macros: \'{}\' does not exist, skip loading it.'.format(path))
# read from RDataFrame and flatten a given collection, return pandas dataframe
def flatten_collection(rdf, collection, cols=None):
if not cols:
cols = [str(c) for c in rdf.GetColumnNames() if str(c).startswith('{}.'.format(collection))]
else:
cols = ['{}.{}'.format(collection, c) for c in cols]
data = rdf.AsNumpy(cols)
# flatten the data, add an event id to identify clusters from different events
evns = []
for i, vec in enumerate(data[cols[0]]):
evns += [i]*vec.size()
for n, vals in data.items():
data[n] = np.asarray([v for vec in vals for v in vec])
# build data frame
dfp = pd.DataFrame(columns=cols, data=np.vstack(list(data.values())).T)
dfp.loc[:, 'event'] = evns
return dfp
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('rec_file', help='Path to reconstruction output file.')
parser.add_argument('-o', dest='outdir', default='.', help='Output directory.')
parser.add_argument('-m', '--macros', dest='macros', default='rootlogon.C',
help='Macro files to be loaded by root, separated by \",\".')
parser.add_argument('--collections', dest='coll', default='',
help='Collection names for clusters, separated by \",\"')
args = parser.parse_args()
# multi-threading for RDataFrame
nthreads = os.cpu_count()//2
ROOT.ROOT.EnableImplicitMT(nthreads)
print('CPU available {}, using {}'.format(os.cpu_count(), nthreads))
os.makedirs(args.outdir, exist_ok=True)
# load macros (add libraries/headers root cannot automatically found here)
load_root_macros(args.macros)
rdf_rec = ROOT.RDataFrame('events', args.rec_file)
if not args.coll:
print('No collection names provided, no plots generated.')
exit(-1)
# a function to extract eta
ROOT.gInterpreter.Declare('''
template<typename T>
ROOT::VecOps::RVec<T> eta(const ROOT::VecOps::RVec<T> &theta) {
ROOT::VecOps::RVec<double> result;
for (size_t i = 0; i < theta.size(); ++ i) { result.push_back(-std::log(std::tan(theta[i]/2.))); }
return result;
}
''')
# branch name, x label, x bins
plots = [
('nhits', 'Number of Hits', 50),
('energy', 'Energy (GeV)', 50),
('eta', '$\eta$', 50),
]
for coll in [c.strip() for c in args.coll.split(',')]:
df = flatten_collection(rdf_rec, coll)
# calculate eta
df.loc[:, '{}.eta'.format(coll)] = -np.log(np.tan(df['{}.polar.theta'.format(coll)].values/2.))
fig, axs = plt.subplots(2, 2, figsize=(12, 8), dpi=160)
ncl = df.groupby('event')['{}.clusterID'.format(coll)].nunique().values
axs[0][0].hist(ncl, weights=np.repeat(1./float(ncl.shape[0]), ncl.shape[0]),
bins=np.arange(0, 10), align='mid', ec='k')
axs[0][0].set_xlabel('Number of Clusters', fontsize=16)
for ax, (branch, xlabel, bins) in zip(axs.flat[1:], plots):
ax.hist(df['{}.{}'.format(coll, branch)].values, bins=bins, align='mid', ec='k')
ax.set_xlabel(xlabel, fontsize=16)
# universal axis settings
for ax in axs.flat:
ax.tick_params(labelsize=16, direction='in')
ax.set_axisbelow(True)
ax.grid(linestyle=':')
fig.text(0.05, 0.5, 'Normalized Counts', rotation=90, va='center', fontsize=18)
fig.savefig(os.path.join(args.outdir, '{}.png'.format(coll)))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment