diff --git a/benchmarks/clustering/scripts/cluster_plots.py b/benchmarks/clustering/scripts/cluster_plots.py index 6f70fa7ff88cfc803324e50e258a8418fa21198a..8802a94f1156860e792cea9a6850f195453b1d15 100644 --- a/benchmarks/clustering/scripts/cluster_plots.py +++ b/benchmarks/clustering/scripts/cluster_plots.py @@ -21,98 +21,77 @@ def load_root_macros(arg_macros): print('Loading root macros: \'{}\' does not exist, skip loading it.'.format(path)) -def thrown_particles_figure(df, save, mcbranch="mcparticles"): +# read from RDataFrame and flatten a given collection, return pandas dataframe +def flatten_collection(rdf, collection, cols=None): + if not cols: + cols = [str(c) for c in rdf.GetColumnNames() if str(c).startswith('{}.'.format(collection))] + else: + cols = ['{}.{}'.format(collection, c) for c in cols] + data = rdf.AsNumpy(cols) + # flatten the data, add an event id to identify clusters from different events + evns = [] + for i, vec in enumerate(data[cols[0]]): + evns += [i]*vec.size() + for n, vals in data.items(): + data[n] = np.asarray([v for vec in vals for v in vec]) + # build data frame + dfp = pd.DataFrame(columns=cols, data=np.vstack(list(data.values())).T) + dfp.loc[:, 'event'] = evns + return dfp + + +def thrown_particles_figure(rdf, save, mcbranch="mcparticles"): # define truth particle info - df = df.Define('isThrown', '{}.genStatus == 1'.format(mcbranch))\ - .Define('thrownParticles', '{}[isThrown]'.format(mcbranch))\ - .Define('thrownP', 'fourvec(thrownParticles)')\ - .Define('thrownPID', 'pid(thrownParticles)')\ - .Define('thrownEta', 'eta(thrownParticles)')\ - .Define('thrownTheta', 'theta(thrownP)')\ - .Define('thrownMomentum', 'momentum(thrownP)') + df = flatten_collection(rdf, mcbranch, ['genStatus', 'pdgID', 'psx', 'psy', 'psz', 'mass']) + df.rename(columns={c: c.replace(mcbranch + '.', '') for c in df.columns}, inplace=True) + # select thrown particles + df = df[df['genStatus'] == 1] + # figure fig, axs = plt.subplots(2, 2, figsize=(16, 12), dpi=120) - - # pid info need some special treatment - pids = df.AsNumpy(['thrownPID'])['thrownPID'] - # unpack vectors - pids = np.asarray([v for vec in pids for v in vec], dtype=int) - # build a dictionary for particle id and particle name + # convert pid to particle names pdgbase = ROOT.TDatabasePDG() - pdict = {pid: pdgbase.GetParticle(int(pid)).GetName() for pid in np.unique(pids)} - pmap = {pid: i*2 for i, pid in enumerate(list(pdict))} - # convert pid to index in dictionary - pmaps = [pmap[i] for i in pids] + get_pname = np.vectorize(lambda pid: pdgbase.GetParticle(int(pid)).GetName()) + + # enumerate particle names + df.loc[:, 'pname'] = get_pname(df['pdgID'].values) + penum = {pname: i for i, pname in enumerate(df['pname'].unique())} + df.loc[:, 'pname_id'] = df['pname'].map(penum) + ax = axs.flat[0] - ax.hist(pmaps, bins=np.arange(-4, len(pmap)*2 + 4), align='left', ec='k') + ax.hist(df['pname_id'].values, bins=np.arange(-4, len(penum)*2 + 4), align='left', ec='k') ax.set_ylabel('Particle Counts', fontsize=24) ax.tick_params(labelsize=22) ax.set_axisbelow(True) ax.grid(linestyle=':', which='both', axis='y') - ax.xaxis.set_major_locator(ticker.FixedLocator(list(pmap.values()))) - ax.set_xticklabels(list(pdict.values())) - - # kinematics - data = df.AsNumpy(['thrownMomentum', 'thrownTheta', 'thrownEta']) - labels = { - 'thrownMomentum': (r'$p$ (GeV)', 'Counts'), - 'thrownTheta': (r'$\theta$ (degree)', 'Counts'), - 'thrownEta': (r'$\eta$', 'Counts'), - } - for ax, (name, vals) in zip(axs.flat[1:], data.items()): - hvals = [v for vec in vals for v in vec] - ax.hist(hvals, bins=50, ec='k') - ax.tick_params(labelsize=22, direction='in', which='both') - ax.grid(linestyle=':', which='both') - label = labels[name] - ax.set_axisbelow(True) - ax.set_xlabel(label[0], fontsize=24) - ax.set_ylabel(label[1], fontsize=24) - - fig.text(0.5, 0.95, 'Thrown Particles', ha='center', fontsize=24) - fig.savefig(save) - plt.close(fig) - - -def general_clusters_figure(df, collection, save, min_nhits=3): - data = df.AsNumpy([ - '{}.nhits'.format(collection), - '{}.energy'.format(collection), - '{}.polar.theta'.format(collection), - '{}.polar.phi'.format(collection), - ]) - # build a dataframe - evns = [] - for i, vec in enumerate(data['{}.nhits'.format(collection)]): - evns += [i]*vec.size() - for n, vals in data.items(): - data[n] = np.asarray([v for vec in vals for v in vec]) - dfp = pd.DataFrame(columns=['nhits', 'edep', 'theta', 'phi'], - data=np.vstack(list(data.values())).T) - dfp.loc[:, 'evn'] = evns - # select the max. energy cluster for each event - dfc = dfp.loc[dfp.groupby('evn')['edep'].idxmax()] - dfc = dfc.loc[dfc['nhits'] >= min_nhits] - # figure - fig, axs = plt.subplots(2, 2, figsize=(16, 12), dpi=120) - labels = [ - ('Number of Hits', 'Counts'), - (r'$E$ (GeV)', 'Counts'), - (r'$\theta$ (rad)', 'Counts'), - (r'$\phi$ (rad)', 'Counts'), + ax.xaxis.set_major_locator(ticker.FixedLocator(list(penum.values()))) + ax.set_xticklabels(list(penum), rotation=30) + + # calculate kinematics + get_4vecs = np.vectorize(lambda x, y, z, m: ROOT.Math.PxPyPzMVector(x, y, z, m)) + fourvecs = get_4vecs(*df[['psx', 'psy', 'psz', 'mass']].values.T) + + df.loc[:, 'p'] = [v.P() for v in fourvecs] + df.loc[:, 'eta'] = [v.Eta() for v in fourvecs] + df.loc[:, 'pt'] = [v.Pt() for v in fourvecs] + + # column name, x label, y label, bins + plots = [ + ('p', r'$P$ (GeV)', 'Counts', 50), + ('eta', r'$\eta$', 'Counts', 50), + ('pt', r'$P_T$ (GeV)', 'Counts', 50), ] - for ax, label, vals in zip(axs.flat, labels, dfc[['nhits', 'edep', 'theta', 'phi']].values.T): - ax.hist(vals, bins=50, ec='k') + for ax, (col, xlabel, ylabel, bins) in zip(axs.flat[1:], plots): + ax.hist(df[col].values, bins=bins, align='left', ec='k') ax.tick_params(labelsize=22, direction='in', which='both') ax.grid(linestyle=':', which='both') ax.set_axisbelow(True) - ax.set_xlabel(label[0], fontsize=24) - ax.set_ylabel(label[1], fontsize=24) + ax.set_xlabel(xlabel, fontsize=24) + ax.set_ylabel(ylabel, fontsize=24) - fig.text(0.5, 0.95, collection, ha='center', fontsize=24) + fig.text(0.5, 0.95, 'Thrown Particles', ha='center', fontsize=24) fig.savefig(save) plt.close(fig) - return dfp if __name__ == '__main__': @@ -132,30 +111,67 @@ if __name__ == '__main__': print('CPU available {}, using {}'.format(os.cpu_count(), nthreads)) # declare some functions from c++ script, needed for data frame processing - script_dir = os.path.dirname(os.path.realpath(__file__)) - fcode = open(os.path.join(script_dir, 'barrel_clusters.cxx')).read() - ROOT.gInterpreter.Declare(fcode) + # script_dir = os.path.dirname(os.path.realpath(__file__)) + # fcode = open(os.path.join(script_dir, 'barrel_clusters.cxx')).read() + # ROOT.gInterpreter.Declare(fcode) os.makedirs(args.outdir, exist_ok=True) # load macros (add libraries/headers root cannot automatically found here) load_root_macros(args.macros) rdf_sim = ROOT.RDataFrame('events', args.sim_file) - rdf_rec = ROOT.RDataFrame('events', args.rec_file) - thrown_particles_figure(rdf_sim, save=os.path.join(args.outdir, 'thrown_particles.png'), mcbranch=args.mc) - general_clusters_figure(rdf_rec, collection='EcalEndcapNClusters', min_nhits=10, - save=os.path.join(args.outdir, 'ecal_electron_endcap_clusters.png')) - general_clusters_figure(rdf_rec, collection='EcalEndcapPClusters', min_nhits=5, - save=os.path.join(args.outdir, 'ecal_hadron_endcap_clusters.png')) - general_clusters_figure(rdf_rec, collection='EcalBarrelClusters', - save=os.path.join(args.outdir, 'ecal_barrel_clusters.png')) - - general_clusters_figure(rdf_rec, collection='HcalElectronEndcapClusters', min_nhits=5, - save=os.path.join(args.outdir, 'hcal_electron_endcap_clusters.png')) - general_clusters_figure(rdf_rec, collection='HcalHadronEndcapClusters', min_nhits=10, - save=os.path.join(args.outdir, 'hcal_hadron_endcap_clusters.png')) - general_clusters_figure(rdf_rec, collection='HcalBarrelClusters', - save=os.path.join(args.outdir, 'hcal_barrel_clusters.png')) + rdf_rec = ROOT.RDataFrame('events', args.rec_file) + # cluster collections + colls = [ + 'EcalEndcapNClusters', + 'EcalEndcapPClusters', + 'EcalBarrelClusters', + 'HcalElectronEndcapClusters', + 'HcalHadronEndcapClusters', + 'HcalBarrelClusters', + ] + # a function to extract eta + ROOT.gInterpreter.Declare(''' + template<typename T> + ROOT::VecOps::RVec<T> eta(const ROOT::VecOps::RVec<T> &theta) { + ROOT::VecOps::RVec<double> result; + for (size_t i = 0; i < theta.size(); ++ i) { result.push_back(-std::log(std::tan(theta[i]/2.))); } + return result; + } + ''') + + # branch name, x label, x bins + plots = [ + ('nhits', 'Number of Hits', 50), + ('energy', 'Energy (GeV)', 50), + ('eta', '$\eta$', 50), + ] + for coll in colls: + df = flatten_collection(rdf_rec, coll) + if not df.shape[0]: + print('{} do not have valid entries, skip it'.format(coll)) + continue + df.rename(columns={c: c.replace(coll + '.', '') for c in df.columns}, inplace=True) + # calculate eta + if 'eta' not in df.columns: + df.loc[:, 'eta'] = -np.log(np.tan(df['polar.theta'].values/2.)) + fig, axs = plt.subplots(2, 2, figsize=(12, 8), dpi=160) + ncl = df.groupby('event')['clusterID'].nunique().values + axs[0][0].hist(ncl, weights=np.repeat(1./float(ncl.shape[0]), ncl.shape[0]), + bins=np.arange(0, 10), align='mid', ec='k') + axs[0][0].set_xlabel('Number of Clusters', fontsize=16) + + for ax, (branch, xlabel, bins) in zip(axs.flat[1:], plots): + ax.hist(df[branch].values, bins=bins, align='mid', ec='k') + ax.set_xlabel(xlabel, fontsize=16) + + # universal axis settings + for ax in axs.flat: + ax.tick_params(labelsize=16, direction='in') + ax.set_axisbelow(True) + ax.grid(linestyle=':') + fig.text(0.05, 0.5, 'Normalized Counts', rotation=90, va='center', fontsize=18) + fig.savefig(os.path.join(args.outdir, '{}.png'.format(coll))) diff --git a/benchmarks/clustering/scripts/barrel_clusters.cxx b/benchmarks/clustering/scripts/deprecated/barrel_clusters.cxx similarity index 100% rename from benchmarks/clustering/scripts/barrel_clusters.cxx rename to benchmarks/clustering/scripts/deprecated/barrel_clusters.cxx diff --git a/benchmarks/ecal/run_emcal_benchmarks.py b/benchmarks/ecal/run_emcal_benchmarks.py index c895735e6fb0917594803a1f3f8fc34dd7b205bd..3526ca58d719a7a0a3d1267d266216b64ee18cbb 100755 --- a/benchmarks/ecal/run_emcal_benchmarks.py +++ b/benchmarks/ecal/run_emcal_benchmarks.py @@ -11,12 +11,24 @@ import subprocess import argparse -# type: option files, analysis files +# type: option files, analysis files, cluster collections default_type = { - 'endcap_e': [['endcap_e.py'], []], - 'endcap_i': [['endcap_i.py'], []], - 'barrel': [['barrel.py'], []], -# 'all': [['all_ecal.py'], []], + 'endcap_e': [ + ['endcap_e.py'], + ['draw_cluters.py'], + ['EcalEndcapNClusters']], + 'endcap_i': [ + ['endcap_i.py'], + ['draw_cluters.py'], + ['EcalEndcapPClusters']], + 'barrel': [ + ['barrel.py'], + ['draw_cluters.py'], + ['EcalBarrelClusters']], +# 'all': [ +# ['all_ecal.py'], +# ['draw_cluters.py'], +# ['EcalEndcapNClusters', 'EcalEndcapPClusters', 'EcalBarrelClusters']], } default_compact = os.path.join(os.environ.get('JUGGLER_DETECTOR_PATH', os.environ.get('DETECTOR_PATH', '')), @@ -43,7 +55,7 @@ kwargs = vars(args) if args.run_type not in default_type: print('ERROR unsupported run type {}, must in {}'.format(args.run_type, list(default_type.keys()))) exit(1) -opt_scripts, ana_scripts = default_type[args.run_type] +opt_scripts, ana_scripts, collections = default_type[args.run_type] gen_file = 'gen_{nametag}_{pmin}_{pmax}.hepmc'.format(**kwargs) sim_file = 'sim_{nametag}_{pmin}_{pmax}.root'.format(**kwargs) @@ -72,7 +84,6 @@ if 'sim' in procs: '-v', 'WARNING'] if args.seed > 0: sim_cmd += ['--random.seed', args.seed] - print(sim_cmd) return_code = subprocess.run(sim_cmd).returncode if return_code is not None and return_code < 0: print("ERROR running simulation!") @@ -101,7 +112,9 @@ if 'rec' in procs: if 'ana' in procs: os.makedirs('results', exist_ok=True) for ana in ana_scripts: - ana_cmd = ['python', os.path.join(sdir, 'scripts', ana), rec_file, '-o', 'results'] + ana_cmd = ['python', os.path.join(sdir, 'scripts', ana), rec_file, + '--collections', ','.join(collections), + '-o', 'results'] return_code = subprocess.run(ana_cmd).returncode if return_code is not None and return_code < 0: print('ERROR running analysis ({})!'.format(ana)) diff --git a/benchmarks/ecal/scripts/draw_cluters.py b/benchmarks/ecal/scripts/draw_cluters.py new file mode 100644 index 0000000000000000000000000000000000000000..d46e1a2d5b187a8efee99bc769e619a2dbab2959 --- /dev/null +++ b/benchmarks/ecal/scripts/draw_cluters.py @@ -0,0 +1,107 @@ +''' + A simple analysis script to extract some basic info of clusters + Chao Peng (ANL), 07/08/2021 +''' +import os +import ROOT +import pandas as pd +import numpy as np +import argparse +from matplotlib import pyplot as plt +import matplotlib.ticker as ticker + + +# load root macros, input is an argument string +def load_root_macros(arg_macros): + for path in arg_macros.split(','): + path = path.strip() + if os.path.exists(path): + gROOT.Macro(path) + print('Loading root macro: \'{}\' loaded.'.format(path)) + else: + print('Loading root macros: \'{}\' does not exist, skip loading it.'.format(path)) + + +# read from RDataFrame and flatten a given collection, return pandas dataframe +def flatten_collection(rdf, collection, cols=None): + if not cols: + cols = [str(c) for c in rdf.GetColumnNames() if str(c).startswith('{}.'.format(collection))] + else: + cols = ['{}.{}'.format(collection, c) for c in cols] + data = rdf.AsNumpy(cols) + # flatten the data, add an event id to identify clusters from different events + evns = [] + for i, vec in enumerate(data[cols[0]]): + evns += [i]*vec.size() + for n, vals in data.items(): + data[n] = np.asarray([v for vec in vals for v in vec]) + # build data frame + dfp = pd.DataFrame(columns=cols, data=np.vstack(list(data.values())).T) + dfp.loc[:, 'event'] = evns + return dfp + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('rec_file', help='Path to reconstruction output file.') + parser.add_argument('-o', dest='outdir', default='.', help='Output directory.') + parser.add_argument('-m', '--macros', dest='macros', default='rootlogon.C', + help='Macro files to be loaded by root, separated by \",\".') + parser.add_argument('--collections', dest='coll', default='', + help='Collection names for clusters, separated by \",\"') + args = parser.parse_args() + + # multi-threading for RDataFrame + nthreads = os.cpu_count()//2 + ROOT.ROOT.EnableImplicitMT(nthreads) + print('CPU available {}, using {}'.format(os.cpu_count(), nthreads)) + + os.makedirs(args.outdir, exist_ok=True) + # load macros (add libraries/headers root cannot automatically found here) + load_root_macros(args.macros) + + rdf_rec = ROOT.RDataFrame('events', args.rec_file) + + if not args.coll: + print('No collection names provided, no plots generated.') + exit(-1) + + # a function to extract eta + ROOT.gInterpreter.Declare(''' + template<typename T> + ROOT::VecOps::RVec<T> eta(const ROOT::VecOps::RVec<T> &theta) { + ROOT::VecOps::RVec<double> result; + for (size_t i = 0; i < theta.size(); ++ i) { result.push_back(-std::log(std::tan(theta[i]/2.))); } + return result; + } + ''') + + # branch name, x label, x bins + plots = [ + ('nhits', 'Number of Hits', 50), + ('energy', 'Energy (GeV)', 50), + ('eta', '$\eta$', 50), + ] + for coll in [c.strip() for c in args.coll.split(',')]: + df = flatten_collection(rdf_rec, coll) + # calculate eta + df.loc[:, '{}.eta'.format(coll)] = -np.log(np.tan(df['{}.polar.theta'.format(coll)].values/2.)) + fig, axs = plt.subplots(2, 2, figsize=(12, 8), dpi=160) + ncl = df.groupby('event')['{}.clusterID'.format(coll)].nunique().values + axs[0][0].hist(ncl, weights=np.repeat(1./float(ncl.shape[0]), ncl.shape[0]), + bins=np.arange(0, 10), align='mid', ec='k') + axs[0][0].set_xlabel('Number of Clusters', fontsize=16) + + for ax, (branch, xlabel, bins) in zip(axs.flat[1:], plots): + ax.hist(df['{}.{}'.format(coll, branch)].values, bins=bins, align='mid', ec='k') + ax.set_xlabel(xlabel, fontsize=16) + + # universal axis settings + for ax in axs.flat: + ax.tick_params(labelsize=16, direction='in') + ax.set_axisbelow(True) + ax.grid(linestyle=':') + fig.text(0.05, 0.5, 'Normalized Counts', rotation=90, va='center', fontsize=18) + fig.savefig(os.path.join(args.outdir, '{}.png'.format(coll))) + +