From 0803515d29888b27cbcbd13a55feb5edd628b9dd Mon Sep 17 00:00:00 2001
From: Chao Peng <cpeng@anl.gov>
Date: Tue, 15 Jun 2021 15:35:01 -0500
Subject: [PATCH] add an analysis script

---
 benchmarks/clustering/full_cal_clusters.sh    |   3 +-
 .../clustering/scripts/barrel_clusters.cxx    |   9 +-
 .../clustering/scripts/cluster_plots.py       | 102 ++++++++++++++++++
 .../clustering/scripts/gen_particles.py       |   9 +-
 4 files changed, 118 insertions(+), 5 deletions(-)
 create mode 100644 benchmarks/clustering/scripts/cluster_plots.py

diff --git a/benchmarks/clustering/full_cal_clusters.sh b/benchmarks/clustering/full_cal_clusters.sh
index ac324763..c9d09a85 100644
--- a/benchmarks/clustering/full_cal_clusters.sh
+++ b/benchmarks/clustering/full_cal_clusters.sh
@@ -124,7 +124,8 @@ if [[ "$?" -ne "0" ]] ; then
   exit 1
 fi
 
-# TODO analysis scripts to have PR plots!
+# run analysis scripts
+python ${FULL_CAL_SCRIPT_DIR}/cluster_plots.py ${FULL_CAL_REC_FILE} -o results
 
 root_filesize=$(stat --format=%s "${FULL_CAL_REC_FILE}")
 if [[ "${FULL_CAL_NUMEV}" -lt "500" ]] ; then
diff --git a/benchmarks/clustering/scripts/barrel_clusters.cxx b/benchmarks/clustering/scripts/barrel_clusters.cxx
index c86c5eef..8968d48d 100644
--- a/benchmarks/clustering/scripts/barrel_clusters.cxx
+++ b/benchmarks/clustering/scripts/barrel_clusters.cxx
@@ -39,6 +39,13 @@ auto pt = [](std::vector<dd4pod::Geant4ParticleData> const& in) {
   }
   return result;
 };
+auto pid = [](ROOT::VecOps::RVec<dd4pod::Geant4ParticleData> const& in) {
+  std::vector<int> result;
+  for (size_t i = 0; i < in.size(); ++i) {
+    result.push_back(in[i].pdgID);
+  }
+  return result;
+};
 auto eta = [](ROOT::VecOps::RVec<dd4pod::Geant4ParticleData> const& in) {
   std::vector<float> result;
   ROOT::Math::PxPyPzMVector lv;
@@ -79,7 +86,7 @@ auto delta_E = [](std::vector<ROOT::Math::PxPyPzMVector> const& thrown, const st
 };
 
 
-void barrel_clusters(const char* in_fname = "topside/rec_barrel_clusters.root")
+int barrel_clusters(const char* in_fname = "topside/rec_barrel_clusters.root")
 {
   ROOT::EnableImplicitMT();
   ROOT::RDataFrame df("events", in_fname);
diff --git a/benchmarks/clustering/scripts/cluster_plots.py b/benchmarks/clustering/scripts/cluster_plots.py
new file mode 100644
index 00000000..bc28d255
--- /dev/null
+++ b/benchmarks/clustering/scripts/cluster_plots.py
@@ -0,0 +1,102 @@
+'''
+    A simple analysis script to extract some basic info of Monte-Carlo particles and clusters
+'''
+import os
+import ROOT
+import pandas as pd
+import numpy as np
+import argparse
+from matplotlib import pyplot as plt
+import matplotlib.ticker as ticker
+
+
+# load root macros, input is an argument string
+def load_root_macros(arg_macros):
+    for path in arg_macros.split(','):
+        path = path.strip()
+        if os.path.exists(path):
+            gROOT.Macro(path)
+            print('Loading root macro: \'{}\' loaded.'.format(path))
+        else:
+            print('Loading root macros: \'{}\' does not exist, skip loading it.'.format(path))
+
+
+def thrown_particles_figure(df):
+    # define truth particle info
+    df = df.Define('isThrown', 'mcparticles2.genStatus == 1')\
+           .Define('thrownParticles', 'mcparticles2[isThrown]')\
+           .Define('thrownP', 'fourvec(thrownParticles)')\
+           .Define('thrownPID', 'pid(thrownParticles)')\
+           .Define('thrownEta', 'eta(thrownParticles)')\
+           .Define('thrownTheta', 'theta(thrownP)')\
+           .Define('thrownMomentum', 'momentum(thrownP)')
+    # figure
+    fig, axs = plt.subplots(2, 2, figsize=(16, 12), dpi=120)
+
+    # pid info need some special treatment
+    pids = df.AsNumpy(['thrownPID'])['thrownPID']
+    # unpack vectors
+    pids = np.asarray([v for vec in pids for v in vec], dtype=int)
+    # build a dictionary for particle id and particle name
+    pdgbase = ROOT.TDatabasePDG()
+    pdict = {pid: pdgbase.GetParticle(int(pid)).GetName() for pid in np.unique(pids)}
+    pmap = {pid: i*2 for i, pid in enumerate(list(pdict))}
+    # convert pid to index in dictionary
+    pmaps = [pmap[i] for i in pids]
+    ax = axs.flat[0]
+    ax.hist(pmaps, bins=np.arange(-4, len(pmap)*2 + 4), align='left', ec='k')
+    ax.set_ylabel('Particle Counts', fontsize=24)
+    ax.tick_params(labelsize=22)
+    ax.set_axisbelow(True)
+    ax.grid(linestyle=':', which='both', axis='y')
+    ax.xaxis.set_major_locator(ticker.FixedLocator(list(pmap.values())))
+    ax.set_xticklabels(list(pdict.values()))
+
+    # kinematics
+    data = df.AsNumpy(['thrownMomentum', 'thrownTheta', 'thrownEta'])
+    labels = {
+        'thrownMomentum': (r'$p$ (GeV)', 'Counts'),
+        'thrownTheta': (r'$\theta$ (degree)', 'Counts'),
+        'thrownEta': (r'$\eta$', 'Counts'),
+    }
+    for ax, (name, vals) in zip(axs.flat[1:], data.items()):
+        hvals = [v for vec in vals for v in vec]
+        ax.hist(hvals, bins=50, ec='k')
+        ax.tick_params(labelsize=22, direction='in', which='both')
+        ax.grid(linestyle=':', which='both')
+        label = labels[name]
+        ax.set_axisbelow(True)
+        ax.set_xlabel(label[0], fontsize=24)
+        ax.set_ylabel(label[1], fontsize=24)
+
+    fig.text(0.5, 0.95, 'Thrown Particles', ha='center', fontsize=24)
+    return fig
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('file', help='Path to reconstruction file.')
+    parser.add_argument('-o', dest='outdir', default='.', help='Output directory.')
+    parser.add_argument('-m', '--macros', dest='macros', default='rootlogon.C',
+            help='Macro files to be loaded by root, separated by \",\".')
+    args = parser.parse_args()
+
+    # multi-threading for RDataFrame
+    nthreads = os.cpu_count()//2
+    ROOT.ROOT.EnableImplicitMT(nthreads)
+    print('CPU available {}, using {}'.format(os.cpu_count(), nthreads))
+
+    # declare some functions from c++ script, needed for data frame processing
+    script_dir = os.path.dirname(os.path.realpath(__file__))
+    fcode = open(os.path.join(script_dir, 'barrel_clusters.cxx')).read()
+    ROOT.gInterpreter.Declare(fcode)
+
+    os.makedirs(args.outdir, exist_ok=True)
+    # load macros (add libraries/headers root cannot automatically found here)
+    load_root_macros(args.macros)
+
+    rdf = ROOT.RDataFrame('events', args.file)
+
+    fig = thrown_particles_figure(rdf)
+    fig.savefig(os.path.join(args.outdir, 'thrown_particles.png'))
+
diff --git a/benchmarks/clustering/scripts/gen_particles.py b/benchmarks/clustering/scripts/gen_particles.py
index 036d4d50..2d0b81c8 100644
--- a/benchmarks/clustering/scripts/gen_particles.py
+++ b/benchmarks/clustering/scripts/gen_particles.py
@@ -49,21 +49,24 @@ if __name__ == "__main__":
 
     parser.add_argument('output', help='path to the output file')
     parser.add_argument('-n', type=int, default=1000, dest='nev', help='number of events to generate')
-    parser.add_argument('--parray', type=str, default="", dest='parray', help='an array of momentum in GeV')
+    parser.add_argument('-s', type=int, default=-1, dest='seed', help='seed for random generator')
+    parser.add_argument('--parray', type=str, default="", dest='parray',
+                        help='an array of momenta in GeV, separated by \",\"')
     parser.add_argument('--pmin', type=float, default=8.0, dest='pmin', help='minimum momentum in GeV')
     parser.add_argument('--pmax', type=float, default=100.0, dest='pmax', help='maximum momentum in GeV')
     parser.add_argument('--angmin', type=float, default=0.0, dest='angmin', help='minimum angle in degree')
     parser.add_argument('--angmax', type=float, default=20.0, dest='angmax', help='maximum angle in degree')
     parser.add_argument('--phmin', type=float, default=0.0, dest='phmin', help='minimum angle in degree')
     parser.add_argument('--phmax', type=float, default=360.0, dest='phmax', help='maximum angle in degree')
-    parser.add_argument('--particles', type=str, default='electron', dest='particles', help='particle names')
-    parser.add_argument('-s', type=int, default=-1, dest='seed', help='seed for random generator')
+    parser.add_argument('--particles', type=str, default='electron', dest='particles',
+                        help='particle names, support {}'.format(list(PARTICLES.keys())))
 
     args = parser.parse_args()
 
     # random seed (< 0 will get it from enviroment variable 'SEED', or a system random number)
     if args.seed < 0:
         args.seed = os.environ.get('SEED', int.from_bytes(os.urandom(4), byteorder='big', signed=False))
+    print("Random seed is {}".format(args.seed))
     np.random.seed(args.seed)
 
     output = hm.WriterAscii(args.output);
-- 
GitLab