From 0803515d29888b27cbcbd13a55feb5edd628b9dd Mon Sep 17 00:00:00 2001 From: Chao Peng <cpeng@anl.gov> Date: Tue, 15 Jun 2021 15:35:01 -0500 Subject: [PATCH] add an analysis script --- benchmarks/clustering/full_cal_clusters.sh | 3 +- .../clustering/scripts/barrel_clusters.cxx | 9 +- .../clustering/scripts/cluster_plots.py | 102 ++++++++++++++++++ .../clustering/scripts/gen_particles.py | 9 +- 4 files changed, 118 insertions(+), 5 deletions(-) create mode 100644 benchmarks/clustering/scripts/cluster_plots.py diff --git a/benchmarks/clustering/full_cal_clusters.sh b/benchmarks/clustering/full_cal_clusters.sh index ac324763..c9d09a85 100644 --- a/benchmarks/clustering/full_cal_clusters.sh +++ b/benchmarks/clustering/full_cal_clusters.sh @@ -124,7 +124,8 @@ if [[ "$?" -ne "0" ]] ; then exit 1 fi -# TODO analysis scripts to have PR plots! +# run analysis scripts +python ${FULL_CAL_SCRIPT_DIR}/cluster_plots.py ${FULL_CAL_REC_FILE} -o results root_filesize=$(stat --format=%s "${FULL_CAL_REC_FILE}") if [[ "${FULL_CAL_NUMEV}" -lt "500" ]] ; then diff --git a/benchmarks/clustering/scripts/barrel_clusters.cxx b/benchmarks/clustering/scripts/barrel_clusters.cxx index c86c5eef..8968d48d 100644 --- a/benchmarks/clustering/scripts/barrel_clusters.cxx +++ b/benchmarks/clustering/scripts/barrel_clusters.cxx @@ -39,6 +39,13 @@ auto pt = [](std::vector<dd4pod::Geant4ParticleData> const& in) { } return result; }; +auto pid = [](ROOT::VecOps::RVec<dd4pod::Geant4ParticleData> const& in) { + std::vector<int> result; + for (size_t i = 0; i < in.size(); ++i) { + result.push_back(in[i].pdgID); + } + return result; +}; auto eta = [](ROOT::VecOps::RVec<dd4pod::Geant4ParticleData> const& in) { std::vector<float> result; ROOT::Math::PxPyPzMVector lv; @@ -79,7 +86,7 @@ auto delta_E = [](std::vector<ROOT::Math::PxPyPzMVector> const& thrown, const st }; -void barrel_clusters(const char* in_fname = "topside/rec_barrel_clusters.root") +int barrel_clusters(const char* in_fname = "topside/rec_barrel_clusters.root") { ROOT::EnableImplicitMT(); ROOT::RDataFrame df("events", in_fname); diff --git a/benchmarks/clustering/scripts/cluster_plots.py b/benchmarks/clustering/scripts/cluster_plots.py new file mode 100644 index 00000000..bc28d255 --- /dev/null +++ b/benchmarks/clustering/scripts/cluster_plots.py @@ -0,0 +1,102 @@ +''' + A simple analysis script to extract some basic info of Monte-Carlo particles and clusters +''' +import os +import ROOT +import pandas as pd +import numpy as np +import argparse +from matplotlib import pyplot as plt +import matplotlib.ticker as ticker + + +# load root macros, input is an argument string +def load_root_macros(arg_macros): + for path in arg_macros.split(','): + path = path.strip() + if os.path.exists(path): + gROOT.Macro(path) + print('Loading root macro: \'{}\' loaded.'.format(path)) + else: + print('Loading root macros: \'{}\' does not exist, skip loading it.'.format(path)) + + +def thrown_particles_figure(df): + # define truth particle info + df = df.Define('isThrown', 'mcparticles2.genStatus == 1')\ + .Define('thrownParticles', 'mcparticles2[isThrown]')\ + .Define('thrownP', 'fourvec(thrownParticles)')\ + .Define('thrownPID', 'pid(thrownParticles)')\ + .Define('thrownEta', 'eta(thrownParticles)')\ + .Define('thrownTheta', 'theta(thrownP)')\ + .Define('thrownMomentum', 'momentum(thrownP)') + # figure + fig, axs = plt.subplots(2, 2, figsize=(16, 12), dpi=120) + + # pid info need some special treatment + pids = df.AsNumpy(['thrownPID'])['thrownPID'] + # unpack vectors + pids = np.asarray([v for vec in pids for v in vec], dtype=int) + # build a dictionary for particle id and particle name + pdgbase = ROOT.TDatabasePDG() + pdict = {pid: pdgbase.GetParticle(int(pid)).GetName() for pid in np.unique(pids)} + pmap = {pid: i*2 for i, pid in enumerate(list(pdict))} + # convert pid to index in dictionary + pmaps = [pmap[i] for i in pids] + ax = axs.flat[0] + ax.hist(pmaps, bins=np.arange(-4, len(pmap)*2 + 4), align='left', ec='k') + ax.set_ylabel('Particle Counts', fontsize=24) + ax.tick_params(labelsize=22) + ax.set_axisbelow(True) + ax.grid(linestyle=':', which='both', axis='y') + ax.xaxis.set_major_locator(ticker.FixedLocator(list(pmap.values()))) + ax.set_xticklabels(list(pdict.values())) + + # kinematics + data = df.AsNumpy(['thrownMomentum', 'thrownTheta', 'thrownEta']) + labels = { + 'thrownMomentum': (r'$p$ (GeV)', 'Counts'), + 'thrownTheta': (r'$\theta$ (degree)', 'Counts'), + 'thrownEta': (r'$\eta$', 'Counts'), + } + for ax, (name, vals) in zip(axs.flat[1:], data.items()): + hvals = [v for vec in vals for v in vec] + ax.hist(hvals, bins=50, ec='k') + ax.tick_params(labelsize=22, direction='in', which='both') + ax.grid(linestyle=':', which='both') + label = labels[name] + ax.set_axisbelow(True) + ax.set_xlabel(label[0], fontsize=24) + ax.set_ylabel(label[1], fontsize=24) + + fig.text(0.5, 0.95, 'Thrown Particles', ha='center', fontsize=24) + return fig + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('file', help='Path to reconstruction file.') + parser.add_argument('-o', dest='outdir', default='.', help='Output directory.') + parser.add_argument('-m', '--macros', dest='macros', default='rootlogon.C', + help='Macro files to be loaded by root, separated by \",\".') + args = parser.parse_args() + + # multi-threading for RDataFrame + nthreads = os.cpu_count()//2 + ROOT.ROOT.EnableImplicitMT(nthreads) + print('CPU available {}, using {}'.format(os.cpu_count(), nthreads)) + + # declare some functions from c++ script, needed for data frame processing + script_dir = os.path.dirname(os.path.realpath(__file__)) + fcode = open(os.path.join(script_dir, 'barrel_clusters.cxx')).read() + ROOT.gInterpreter.Declare(fcode) + + os.makedirs(args.outdir, exist_ok=True) + # load macros (add libraries/headers root cannot automatically found here) + load_root_macros(args.macros) + + rdf = ROOT.RDataFrame('events', args.file) + + fig = thrown_particles_figure(rdf) + fig.savefig(os.path.join(args.outdir, 'thrown_particles.png')) + diff --git a/benchmarks/clustering/scripts/gen_particles.py b/benchmarks/clustering/scripts/gen_particles.py index 036d4d50..2d0b81c8 100644 --- a/benchmarks/clustering/scripts/gen_particles.py +++ b/benchmarks/clustering/scripts/gen_particles.py @@ -49,21 +49,24 @@ if __name__ == "__main__": parser.add_argument('output', help='path to the output file') parser.add_argument('-n', type=int, default=1000, dest='nev', help='number of events to generate') - parser.add_argument('--parray', type=str, default="", dest='parray', help='an array of momentum in GeV') + parser.add_argument('-s', type=int, default=-1, dest='seed', help='seed for random generator') + parser.add_argument('--parray', type=str, default="", dest='parray', + help='an array of momenta in GeV, separated by \",\"') parser.add_argument('--pmin', type=float, default=8.0, dest='pmin', help='minimum momentum in GeV') parser.add_argument('--pmax', type=float, default=100.0, dest='pmax', help='maximum momentum in GeV') parser.add_argument('--angmin', type=float, default=0.0, dest='angmin', help='minimum angle in degree') parser.add_argument('--angmax', type=float, default=20.0, dest='angmax', help='maximum angle in degree') parser.add_argument('--phmin', type=float, default=0.0, dest='phmin', help='minimum angle in degree') parser.add_argument('--phmax', type=float, default=360.0, dest='phmax', help='maximum angle in degree') - parser.add_argument('--particles', type=str, default='electron', dest='particles', help='particle names') - parser.add_argument('-s', type=int, default=-1, dest='seed', help='seed for random generator') + parser.add_argument('--particles', type=str, default='electron', dest='particles', + help='particle names, support {}'.format(list(PARTICLES.keys()))) args = parser.parse_args() # random seed (< 0 will get it from enviroment variable 'SEED', or a system random number) if args.seed < 0: args.seed = os.environ.get('SEED', int.from_bytes(os.urandom(4), byteorder='big', signed=False)) + print("Random seed is {}".format(args.seed)) np.random.seed(args.seed) output = hm.WriterAscii(args.output); -- GitLab