From 4d787b836cac7505835936dea032a51629a55735 Mon Sep 17 00:00:00 2001
From: Chao Peng <cpeng@anl.gov>
Date: Mon, 26 Jul 2021 17:04:15 +0000
Subject: [PATCH] add tracking benchmark

---
 .../clustering/scripts/cluster_plots.py       |  26 +--
 benchmarks/tracking/config.yml                |  10 +-
 .../tracking/options/truth_seeded_tracking.py | 134 +++++++++++
 .../tracking/run_tracking_benchmarks.py       | 114 +++++++++
 benchmarks/tracking/scripts/gen_particles.py  | 104 +++++++++
 .../tracking/scripts/tracking_performance.py  | 216 ++++++++++++++++++
 6 files changed, 590 insertions(+), 14 deletions(-)
 create mode 100644 benchmarks/tracking/options/truth_seeded_tracking.py
 create mode 100755 benchmarks/tracking/run_tracking_benchmarks.py
 create mode 100644 benchmarks/tracking/scripts/gen_particles.py
 create mode 100644 benchmarks/tracking/scripts/tracking_performance.py

diff --git a/benchmarks/clustering/scripts/cluster_plots.py b/benchmarks/clustering/scripts/cluster_plots.py
index e2a53cfd..336c35d4 100644
--- a/benchmarks/clustering/scripts/cluster_plots.py
+++ b/benchmarks/clustering/scripts/cluster_plots.py
@@ -50,10 +50,10 @@ def flatten_collection(rdf, collection, cols=None):
 
 def thrown_particles_figure(rdf, save, mcbranch="mcparticles"):
     # define truth particle info
-    df = flatten_collection(rdf, mcbranch, ['genStatus', 'pdgID', 'psx', 'psy', 'psz', 'mass'])
-    df.rename(columns={c: c.replace(mcbranch + '.', '') for c in df.columns}, inplace=True)
+    dft = flatten_collection(rdf, mcbranch, ['genStatus', 'pdgID', 'psx', 'psy', 'psz', 'mass'])
+    dft.rename(columns={c: c.replace(mcbranch + '.', '') for c in dft.columns}, inplace=True)
     # select thrown particles
-    df = df[df['genStatus'] == 1]
+    dft = dft[dft['genStatus'] == 1]
 
     # figure
     fig, axs = plt.subplots(2, 2, figsize=(16, 12), dpi=120)
@@ -62,12 +62,12 @@ def thrown_particles_figure(rdf, save, mcbranch="mcparticles"):
     get_pname = np.vectorize(lambda pid: pdgbase.GetParticle(int(pid)).GetName())
 
     # enumerate particle names
-    df.loc[:, 'pname'] = get_pname(df['pdgID'].values)
-    penum = {pname: i for i, pname in enumerate(df['pname'].unique())}
-    df.loc[:, 'pname_id'] = df['pname'].map(penum)
+    dft.loc[:, 'pname'] = get_pname(dft['pdgID'].values)
+    penum = {pname: i for i, pname in enumerate(dft['pname'].unique())}
+    dft.loc[:, 'pname_id'] = dft['pname'].map(penum)
 
     ax = axs.flat[0]
-    ax.hist(df['pname_id'].values, bins=np.arange(-4, len(penum)*2 + 4), align='left', ec='k')
+    ax.hist(dft['pname_id'].values, bins=np.arange(-4, len(penum)*2 + 4), align='left', ec='k')
     ax.set_ylabel('Particle Counts', fontsize=24)
     ax.tick_params(labelsize=22)
     ax.set_axisbelow(True)
@@ -77,11 +77,11 @@ def thrown_particles_figure(rdf, save, mcbranch="mcparticles"):
 
     # calculate kinematics
     get_4vecs = np.vectorize(lambda x, y, z, m: ROOT.Math.PxPyPzMVector(x, y, z, m))
-    fourvecs = get_4vecs(*df[['psx', 'psy', 'psz', 'mass']].values.T)
+    fourvecs = get_4vecs(*dft[['psx', 'psy', 'psz', 'mass']].values.T)
 
-    df.loc[:, 'p'] = [v.P() for v in fourvecs]
-    df.loc[:, 'eta'] = [v.Eta() for v in fourvecs]
-    df.loc[:, 'pt'] = [v.Pt() for v in fourvecs]
+    dft.loc[:, 'p'] = [v.P() for v in fourvecs]
+    dft.loc[:, 'eta'] = [v.Eta() for v in fourvecs]
+    dft.loc[:, 'pt'] = [v.Pt() for v in fourvecs]
 
     # column name, x label, y label, bins
     plots = [
@@ -90,7 +90,7 @@ def thrown_particles_figure(rdf, save, mcbranch="mcparticles"):
         ('pt', r'$P_T$ (GeV)', 'Counts', 50),
     ]
     for ax, (col, xlabel, ylabel, bins) in zip(axs.flat[1:], plots):
-        ax.hist(df[col].values, bins=bins, align='left', ec='k')
+        ax.hist(dft[col].values, bins=bins, align='left', ec='k')
         ax.tick_params(labelsize=22, direction='in', which='both')
         ax.grid(linestyle=':', which='both')
         ax.set_axisbelow(True)
@@ -100,7 +100,7 @@ def thrown_particles_figure(rdf, save, mcbranch="mcparticles"):
     fig.text(0.5, 0.95, 'Thrown Particles', ha='center', fontsize=24)
     fig.savefig(save)
     plt.close(fig)
-
+    return dft
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
diff --git a/benchmarks/tracking/config.yml b/benchmarks/tracking/config.yml
index 6e5c5d2f..b707a2eb 100644
--- a/benchmarks/tracking/config.yml
+++ b/benchmarks/tracking/config.yml
@@ -4,4 +4,12 @@ tracking_central_electrons:
   timeout: 24 hours
   script:
     - bash benchmarks/tracking/central_electrons.sh
-      #allow_failure: true
\ No newline at end of file
+      #allow_failure: true
+
+tracking_truth_init_electrons:
+  extends: .rec_benchmark
+  stage: run
+  timeout: 24 hours
+  script:
+    - python benchmarks/tracking/run_tracking_benchmarks.py --etamin=-3 --etamax=3 -n 100
+
diff --git a/benchmarks/tracking/options/truth_seeded_tracking.py b/benchmarks/tracking/options/truth_seeded_tracking.py
new file mode 100644
index 00000000..00ea29ed
--- /dev/null
+++ b/benchmarks/tracking/options/truth_seeded_tracking.py
@@ -0,0 +1,134 @@
+from Gaudi.Configuration import *
+
+from GaudiKernel.DataObjectHandleBase import DataObjectHandleBase
+from Configurables import ApplicationMgr, EICDataSvc, PodioOutput, GeoSvc
+from GaudiKernel.SystemOfUnits import MeV, GeV, mm, cm, mrad
+
+detector_name = str(os.environ.get("JUGGLER_DETECTOR", "athena"))
+detector_path = str(os.environ.get("JUGGLER_DETECTOR_PATH", "."))
+compact_path = str(os.environ.get("JUGGLER_COMPACT_PATH", "{}.xml".format(os.path.join(detector_path, detector_name))))
+
+# todo add checks
+input_sims = [f.strip() for f in str.split(os.environ["JUGGLER_SIM_FILE"], ",") if f.strip()]
+output_rec = str(os.environ["JUGGLER_REC_FILE"])
+n_events = int(os.environ["JUGGLER_N_EVENTS"])
+
+geo_service = GeoSvc("GeoSvc", detectors=[compact_path], OutputLevel=WARNING)
+podioevent = EICDataSvc("EventDataSvc", inputs=input_sims, OutputLevel=WARNING)
+
+from Configurables import PodioInput
+from Configurables import Jug__Digi__UFSDTrackerDigi as TrackerDigi
+from Configurables import Jug__Reco__TrackerHitReconstruction as TrackerReco
+
+from Configurables import Jug__Reco__TrackerSourceLinker as TrackerSourceLinker
+from Configurables import Jug__Reco__TrackerSourcesLinker as TrackerSourcesLinker
+#from Configurables import Jug__Reco__TrackingHitsSourceLinker as TrackingHitsSourceLinker
+from Configurables import Jug__Reco__TrackParamTruthInit as TrackParamTruthInit
+
+from Configurables import Jug__Reco__TrackFindingAlgorithm as TrackFindingAlgorithm
+from Configurables import Jug__Reco__ParticlesFromTrackFit as ParticlesFromTrackFit
+from Configurables import Jug__Base__InputCopier_dd4pod__Geant4ParticleCollection_dd4pod__Geant4ParticleCollection_ as MCCopier
+
+
+sim_colls = [
+    "mcparticles",
+    "TrackerEndcapHits",
+    "TrackerBarrelHits",
+    "VertexBarrelHits",
+    "VertexEndcapHits",
+    "EcalBarrelHits"
+]
+podin = PodioInput("PodioReader", collections=sim_colls)
+podout = PodioOutput("out", filename=output_rec)
+
+## copiers to get around input --> output copy bug. Note the "2" appended to the output collection.
+mccopier = MCCopier("MCCopier",
+        inputCollection="mcparticles",
+        outputCollection="mcparticles2")
+
+trk_b_digi = TrackerDigi("trk_b_digi",
+        inputHitCollection="TrackerBarrelHits",
+        outputHitCollection="TrackerBarrelRawHits",
+        timeResolution=8)
+trk_ec_digi = TrackerDigi("trk_ec_digi",
+        inputHitCollection="TrackerEndcapHits",
+        outputHitCollection="TrackerEndcapRawHits",
+        timeResolution=8)
+
+vtx_b_digi = TrackerDigi("vtx_b_digi",
+        inputHitCollection="VertexBarrelHits",
+        outputHitCollection="VertexBarrelRawHits",
+        timeResolution=8)
+
+vtx_ec_digi = TrackerDigi("vtx_ec_digi",
+        inputHitCollection="VertexEndcapHits",
+        outputHitCollection="VertexEndcapRawHits",
+        timeResolution=8)
+
+# Tracker and vertex reconstruction
+trk_b_reco = TrackerReco("trk_b_reco",
+        inputHitCollection = trk_b_digi.outputHitCollection,
+        outputHitCollection="TrackerBarrelRecHits")
+
+trk_ec_reco = TrackerReco("trk_ec_reco",
+        inputHitCollection = trk_ec_digi.outputHitCollection,
+        outputHitCollection="TrackerEndcapRecHits")
+
+vtx_b_reco = TrackerReco("vtx_b_reco",
+        inputHitCollection = vtx_b_digi.outputHitCollection,
+        outputHitCollection="VertexBarrelRecHits")
+
+vtx_ec_reco = TrackerReco("vtx_ec_reco",
+        inputHitCollection = vtx_ec_digi.outputHitCollection,
+        outputHitCollection="VertexEndcapRecHits")
+
+sourcelinker = TrackerSourcesLinker("trk_srcslnkr",
+        inputHitCollections=["VertexBarrelRecHits", "TrackerBarrelRecHits"],
+        outputSourceLinks="TrackerSourceLinks",
+        outputMeasurements="TrackerMeasurements",
+        OutputLevel=DEBUG)
+
+## Track param init
+truth_trk_init = TrackParamTruthInit("truth_trk_init",
+        inputMCParticles="mcparticles",
+        outputInitialTrackParameters="InitTrackParams",
+        OutputLevel=DEBUG)
+
+# Tracking algorithms
+trk_find_alg = TrackFindingAlgorithm("trk_find_alg",
+        inputSourceLinks = sourcelinker.outputSourceLinks,
+        inputMeasurements = sourcelinker.outputMeasurements,
+        inputInitialTrackParameters= truth_trk_init.outputInitialTrackParameters,
+        outputTrajectories="trajectories",
+        OutputLevel=DEBUG)
+
+parts_from_fit = ParticlesFromTrackFit("parts_from_fit",
+        inputTrajectories=trk_find_alg.outputTrajectories,
+        outputParticles="ReconstructedParticles",
+        outputTrackParameters="outputTrackParameters",
+        OutputLevel=DEBUG)
+
+podout.outputCommands = [
+    "keep *",
+    "drop InitTrackParams",
+    "drop trajectories",
+    "drop outputSourceLinks",
+    "drop outputInitialTrackParameters",
+    "drop mcparticles",
+]
+
+ApplicationMgr(
+    TopAlg = [podin, mccopier,
+              trk_b_digi, trk_ec_digi, vtx_b_digi, vtx_ec_digi,
+              trk_b_reco, trk_ec_reco, vtx_b_reco, vtx_ec_reco,
+              sourcelinker,
+              truth_trk_init,
+              trk_find_alg, parts_from_fit,
+              podout
+              ],
+    EvtSel = 'NONE',
+    EvtMax   = n_events,
+    ExtSvc = [podioevent, geo_service],
+    OutputLevel=WARNING
+ )
+
diff --git a/benchmarks/tracking/run_tracking_benchmarks.py b/benchmarks/tracking/run_tracking_benchmarks.py
new file mode 100755
index 00000000..97078593
--- /dev/null
+++ b/benchmarks/tracking/run_tracking_benchmarks.py
@@ -0,0 +1,114 @@
+#! /usr/local/bin/python3
+'''
+    A python script to help run tracking benchmarks
+    simulation -> reconstruction -> analysis
+    Author: Chao Peng (ANL)
+    Date: 07/20/2021
+'''
+import os
+import sys
+import subprocess
+import argparse
+
+
+default_compact = os.path.join(
+        os.environ.get('JUGGLER_DETECTOR_PATH',  os.environ.get('DETECTOR_PATH', '')),
+        '{}.xml'.format(os.environ.get('JUGGLER_DETECTOR', '')))
+
+script_dir = os.path.dirname(os.path.realpath(__file__))
+gen_script = os.path.join(script_dir, 'scripts', 'gen_particles.py')
+option_script = os.path.join(script_dir, 'options', 'truth_seeded_tracking.py')
+analysis_script = os.path.join(script_dir, 'scripts', 'tracking_performance.py')
+
+parser = argparse.ArgumentParser()
+parser.add_argument('-n', '--numberOfEvents', dest='nev', type=int, default=100, help='Number of events to process.')
+parser.add_argument('-t', '--nametag', type=str, default='TRACKING', help='Name tag for output files.')
+parser.add_argument('--seed', type=int, default=-1, help='Random seed to scripts.')
+parser.add_argument('--process', type=str, default='sim, rec, ana', help='Processes to be executed (sim, rec, ana).')
+parser.add_argument('--numberOfLayers', dest='nlayer', type=int, default=20, help='number of layers in ML data.')
+parser.add_argument('--numberOfHits', dest='nhit', type=int, default=20, help='number of hits in ML data.')
+parser.add_argument('--particles', type=str, default='electron', help='Partcile names, separated by \",\".')
+parser.add_argument('--etamin', type=float, default=-4, help='Minimum pseudorapidity.')
+parser.add_argument('--etamax', type=float, default=4, help='Maximum pseudorapidity.')
+parser.add_argument('--pmin', type=float, default=5.0, help='Minimum momentum of particles (GeV).')
+parser.add_argument('--pmax', type=float, default=5.0, help='Maximum momentum of particles (GeV).')
+parser.add_argument('--compact', type=str, default=default_compact, help='Path to detector compact file.')
+parser.add_argument('--uploadSizeLimit', type=float, default=10, help='Upper limit of file size (Mb) to be uploaded.')
+
+args = parser.parse_args()
+kwargs = vars(args)
+
+gen_file = 'gen_{nametag}_{pmin}_{pmax}.hepmc'.format(**kwargs)
+sim_file = 'sim_{nametag}_{pmin}_{pmax}.root'.format(**kwargs)
+rec_file = 'rec_{nametag}_{pmin}_{pmax}.root'.format(**kwargs)
+
+procs = [p.strip() for p in args.process.split(',')]
+sdir = os.path.dirname(os.path.realpath(__file__))
+
+if 'sim' in procs:
+    # generate particles
+    gen_cmd = ['python', gen_script, gen_file,
+            '-n', '{}'.format(args.nev),
+            '-s', '{}'.format(args.seed),
+            '--etamin', '{}'.format(args.etamin), '--etamax', '{}'.format(args.etamax),
+            '--pmin', '{}'.format(args.pmin), '--pmax', '{}'.format(args.pmax),
+            '--particles', args.particles]
+    subprocess.run(gen_cmd)
+    # simulation
+    sim_cmd = ['npsim',
+            '--part.minimalKineticEnergy', '1*TeV',
+            '--numberOfEvents', '{}'.format(args.nev),
+            '--runType', 'batch',
+            '--inputFiles', gen_file,
+            '--outputFile', sim_file,
+            '--compact', args.compact,
+            '-v', 'WARNING']
+    if args.seed > 0:
+        sim_cmd += ['--random.seed', args.seed]
+    return_code = subprocess.run(sim_cmd).returncode
+    if return_code is not None and return_code < 0:
+        print("ERROR running simulation!")
+        exit(1)
+    subprocess.run(['rootls', '-t', sim_file])
+
+
+if 'rec' in procs:
+    # export to environment variables (used to pass arguments to the option file)
+    os.environ['JUGGLER_SIM_FILE'] = sim_file
+    os.environ['JUGGLER_REC_FILE'] = rec_file
+    os.environ['JUGGLER_COMPACT_PATH'] = args.compact
+    os.environ['JUGGLER_N_EVENTS'] = '{}'.format(args.nev)
+
+    juggler_xenv = os.path.join(os.environ.get('JUGGLER_INSTALL_PREFIX', '../local'), 'Juggler.xenv')
+
+    rec_cmd = ['xenv', '-x', juggler_xenv, 'gaudirun.py', os.path.join(sdir, 'options', option_script)]
+    return_code = subprocess.run(rec_cmd).returncode
+    if return_code is not None and return_code < 0:
+        print('ERROR running juggler ({})!'.format(opt))
+        exit(1)
+    process = subprocess.run(['rootls', '-t', rec_file])
+
+
+if 'ana' in procs:
+    os.makedirs('results', exist_ok=True)
+    ana_cmd = ['python', analysis_script, rec_file,
+               '--mc-collection', 'mcparticles2',
+               '--tracking-collection', 'outputTrackParameters',
+               '-o', 'results']
+    return_code = subprocess.run(ana_cmd).returncode
+    if return_code is not None and return_code < 0:
+        print('ERROR running analysis ({})!'.format(ana))
+        exit(1)
+
+
+# upload generated data files if it was small enough (< 10 Mb)
+for upload in [rec_file]:
+    process = subprocess.run(['stat', '--format=%s', upload], stdout=subprocess.PIPE)
+    size = int(process.stdout)/1024./1024.
+    if size > args.uploadSizeLimit:
+        print('Skip uploading file \"{}\" because its size ({:.2f} Mb) > {:.2f} Mb'\
+              .format(upload, size, args.uploadSizeLimit))
+    else:
+        os.system('cp {} results/.'.format(upload))
+        print('Upload file \"{}\", size = {:.2f} Mb'.format(upload, size))
+
diff --git a/benchmarks/tracking/scripts/gen_particles.py b/benchmarks/tracking/scripts/gen_particles.py
new file mode 100644
index 00000000..4b8c4e96
--- /dev/null
+++ b/benchmarks/tracking/scripts/gen_particles.py
@@ -0,0 +1,104 @@
+import os
+from pyHepMC3 import HepMC3 as hm
+import numpy as np
+import argparse
+
+
+PARTICLES = {
+    "pion0": (111, 0.1349766),       # pi0
+    "pion+": (211, 0.13957018),      # pi+
+    "pion-": (-211, 0.13957018),     # pi-
+    "kaon0": (311, 0.497648),        # K0
+    "kaon+": (321, 0.493677),        # K+
+    "kaon-": (-321, 0.493677),       # K-
+    "proton": (2212, 0.938272),      # proton
+    "neutron": (2112, 0.939565),     # neutron
+    "electron": (11, 0.51099895e-3), # electron
+    "positron": (-11, 0.51099895e-3),# positron
+    "photon": (22, 0),               # photon
+}
+
+
+def gen_event(p, theta, phi, pid, mass):
+    evt = hm.GenEvent(momentum_unit=hm.Units.MomentumUnit.GEV, length_unit=hm.Units.LengthUnit.MM)
+    # final state
+    state = 1
+    e0 = np.sqrt(p*p + mass*mass)
+    px = np.cos(phi)*np.sin(theta)
+    py = np.sin(phi)*np.sin(theta)
+    pz = np.cos(theta)
+
+    # beam
+    pbeam = hm.GenParticle(hm.FourVector(0, 0, 0, 0.938272), 2212, 4)
+    ebeam = hm.GenParticle(hm.FourVector(0, 0, e0, np.sqrt(e0*e0 + 0.511e-3*0.511e-3)), 11, 4)
+
+    # out particle
+    hout = hm.GenParticle(hm.FourVector(px*p, py*p, pz*p, e0), pid, state)
+
+    # vertex
+    vert = hm.GenVertex()
+    vert.add_particle_in(ebeam)
+    vert.add_particle_in(pbeam)
+    vert.add_particle_out(hout)
+    evt.add_vertex(vert)
+    return evt
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument('output', help='path to the output file')
+    parser.add_argument('-n', type=int, default=1000, dest='nev', help='number of events to generate')
+    parser.add_argument('-s', type=int, default=-1, dest='seed', help='seed for random generator')
+    parser.add_argument('--parray', type=str, default="", dest='parray',
+                        help='an array of momenta in GeV, separated by \",\"')
+    parser.add_argument('--pmin', type=float, default=8.0, dest='pmin', help='minimum momentum in GeV')
+    parser.add_argument('--pmax', type=float, default=100.0, dest='pmax', help='maximum momentum in GeV')
+    parser.add_argument('--etamin', type=float, default=-4, dest='etamin', help='minimum pseudorapidity')
+    parser.add_argument('--etamax', type=float, default=4, dest='etamax', help='maximum pseudorapidity')
+    parser.add_argument('--phmin', type=float, default=0.0, dest='phmin', help='minimum angle in degree')
+    parser.add_argument('--phmax', type=float, default=360.0, dest='phmax', help='maximum angle in degree')
+    parser.add_argument('--particles', type=str, default='electron', dest='particles',
+                        help='particle names, support {}'.format(list(PARTICLES.keys())))
+
+    args = parser.parse_args()
+
+    # random seed (< 0 will get it from enviroment variable 'SEED', or a system random number)
+    if args.seed < 0:
+        args.seed = os.environ.get('SEED', int.from_bytes(os.urandom(4), byteorder='big', signed=False))
+    print("Random seed is {}".format(args.seed))
+    np.random.seed(args.seed)
+
+    output = hm.WriterAscii(args.output);
+    if output.failed():
+        print("Cannot open file \"{}\"".format(args.output))
+        sys.exit(2)
+
+    # build particle info
+    parts = []
+    for pid in args.particles.split(','):
+        pid = pid.strip()
+        if pid not in PARTICLES.keys():
+            print('pid {:d} not found in dictionary, ignored.'.format(pid))
+            continue
+        parts.append(PARTICLES[pid])
+
+    # p values
+    pvals = np.random.uniform(args.pmin, args.pmax, args.nev) if not args.parray else \
+            np.random.choice([float(p.strip()) for p in args.parray.split(',')], args.nev)
+    thvals = np.arctan(np.exp(np.random.uniform(args.etamin, args.etamax, args.nev)*-1.))*2.
+    phivals = np.random.uniform(args.phmin, args.phmax, args.nev)/180.*np.pi
+    partvals = [parts[i] for i in np.random.choice(len(parts), args.nev)]
+
+    count = 0
+    for p, theta, phi, (pid, mass) in zip(pvals, thvals, phivals, partvals):
+        if (count % 1000 == 0):
+            print("Generated {} events".format(count), end='\r')
+        evt = gen_event(p, theta, phi, pid, mass)
+        output.write_event(evt)
+        evt.clear()
+        count += 1
+
+    print("Generated {} events".format(args.nev))
+    output.close()
+
diff --git a/benchmarks/tracking/scripts/tracking_performance.py b/benchmarks/tracking/scripts/tracking_performance.py
new file mode 100644
index 00000000..5f7c414c
--- /dev/null
+++ b/benchmarks/tracking/scripts/tracking_performance.py
@@ -0,0 +1,216 @@
+'''
+    A simple analysis script to extract some basic info of Monte-Carlo particles and clusters
+'''
+import os
+import ROOT
+import pandas as pd
+import numpy as np
+import argparse
+from matplotlib import pyplot as plt
+import matplotlib.ticker as ticker
+
+
+# load root macros, input is an argument string
+def load_root_macros(arg_macros):
+    for path in arg_macros.split(','):
+        path = path.strip()
+        if os.path.exists(path):
+            gROOT.Macro(path)
+            print('Loading root macro: \'{}\' loaded.'.format(path))
+        else:
+            print('Loading root macros: \'{}\' does not exist, skip loading it.'.format(path))
+
+
+# read from RDataFrame and flatten a given collection, return pandas dataframe
+def flatten_collection(rdf, collection, cols=None):
+    if not cols:
+        cols = [str(c) for c in rdf.GetColumnNames() if str(c).startswith('{}.'.format(collection))]
+    else:
+        cols = ['{}.{}'.format(collection, c) for c in cols]
+    if not cols:
+        print('cannot find any branch under collection {}'.format(collection))
+        return pd.DataFrame()
+
+    data = rdf.AsNumpy(cols)
+    # flatten the data, add an event id to identify clusters from different events
+    evns = []
+    for i, vec in enumerate(data[cols[0]]):
+        evns += [i]*vec.size()
+    for n, vals in data.items():
+        # make sure ints are not converted to floats
+        typename = vals[0].__class__.__name__.lower()
+        dtype = np.int64 if 'int' in typename or 'long' in typename else np.float64
+        # type safe creation
+        data[n] = np.asarray([v for vec in vals for v in vec], dtype=dtype)
+    # build data frame
+    dfp = pd.DataFrame({c: pd.Series(v) for c, v in data.items()})
+    dfp.loc[:, 'event'] = evns
+    return dfp
+
+
+def thrown_particles_figure(rdf, save, mcbranch="mcparticles2"):
+    # define truth particle info
+    dft = flatten_collection(rdf, mcbranch, ['genStatus', 'pdgID', 'psx', 'psy', 'psz', 'mass'])
+    dft.rename(columns={c: c.replace(mcbranch + '.', '') for c in dft.columns}, inplace=True)
+    # select thrown particles
+    dft = dft[dft['genStatus'] == 1]
+
+    # figure
+    fig, axs = plt.subplots(2, 2, figsize=(16, 12), dpi=120)
+    # convert pid to particle names
+    pdgbase = ROOT.TDatabasePDG()
+    get_pname = np.vectorize(lambda pid: pdgbase.GetParticle(int(pid)).GetName())
+    get_pcharge = np.vectorize(lambda pid: pdgbase.GetParticle(int(pid)).Charge()/3.)
+
+    # enumerate particle names
+    dft.loc[:, 'pname'] = get_pname(dft['pdgID'].values)
+    dft.loc[:, 'charge'] = get_pcharge(dft['pdgID'].values)
+    penum = {pname: i for i, pname in enumerate(dft['pname'].unique())}
+    dft.loc[:, 'pname_id'] = dft['pname'].map(penum)
+
+    ax = axs.flat[0]
+    ax.hist(dft['pname_id'].values, bins=np.arange(-4, len(penum)*2 + 4), align='left', ec='k')
+    ax.set_ylabel('Particle Counts', fontsize=24)
+    ax.tick_params(labelsize=22)
+    ax.set_axisbelow(True)
+    ax.grid(linestyle=':', which='both', axis='y')
+    ax.xaxis.set_major_locator(ticker.FixedLocator(list(penum.values())))
+    ax.set_xticklabels(list(penum), rotation=30)
+
+    # calculate kinematics
+    get_4vecs = np.vectorize(lambda x, y, z, m: ROOT.Math.PxPyPzMVector(x, y, z, m))
+    fourvecs = get_4vecs(*dft[['psx', 'psy', 'psz', 'mass']].values.T)
+
+    dft.loc[:, 'p'] = [v.P() for v in fourvecs]
+    dft.loc[:, 'theta'] = [v.Theta() for v in fourvecs]
+    dft.loc[:, 'phi'] = [v.Phi() for v in fourvecs]
+    dft.loc[:, 'eta'] = [v.Eta() for v in fourvecs]
+    dft.loc[:, 'pt'] = [v.Pt() for v in fourvecs]
+
+    # column name, x label, y label, bins
+    plots = [
+        ('p', r'$P$ (GeV)', 'Counts', 50),
+        ('eta', r'$\eta$', 'Counts', 50),
+        ('pt', r'$P_T$ (GeV)', 'Counts', 50),
+    ]
+    for ax, (col, xlabel, ylabel, bins) in zip(axs.flat[1:], plots):
+        ax.hist(dft[col].values, bins=bins, align='left', ec='k')
+        ax.tick_params(labelsize=22, direction='in', which='both')
+        ax.grid(linestyle=':', which='both')
+        ax.set_axisbelow(True)
+        ax.set_xlabel(xlabel, fontsize=24)
+        ax.set_ylabel(ylabel, fontsize=24)
+
+    fig.text(0.5, 0.95, 'Thrown Particles', ha='center', fontsize=24)
+    fig.savefig(save)
+    plt.close(fig)
+    return dft
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('rec_file', help='Path to reconstruction output file.')
+    parser.add_argument('-o', dest='outdir', default='.', help='Output directory.')
+    parser.add_argument('-m', '--macros', dest='macros', default='rootlogon.C',
+            help='Macro files to be loaded by root, separated by \",\".')
+    parser.add_argument('--mc-collection', dest='mc', default='mcparticles',
+            help='Collection name of MC particles truth info.')
+    parser.add_argument('--tracking-collection', dest='coll', default='outputTrackParameters',
+            help='Collection name of clusters to plot')
+    args = parser.parse_args()
+
+    # multi-threading for RDataFrame
+    nthreads = os.cpu_count()//2
+    ROOT.ROOT.EnableImplicitMT(nthreads)
+    print('CPU available {}, using {}'.format(os.cpu_count(), nthreads))
+
+    # declare some functions from c++ script, needed for data frame processing
+    # script_dir = os.path.dirname(os.path.realpath(__file__))
+    # fcode = open(os.path.join(script_dir, 'barrel_clusters.cxx')).read()
+    # ROOT.gInterpreter.Declare(fcode)
+
+    os.makedirs(args.outdir, exist_ok=True)
+    # load macros (add libraries/headers root cannot automatically found here)
+    load_root_macros(args.macros)
+
+    rdf_rec = ROOT.RDataFrame('events', args.rec_file)
+    dfm = thrown_particles_figure(rdf_rec, save=os.path.join(args.outdir, 'thrown_particles.png'), mcbranch=args.mc)
+    df = flatten_collection(rdf_rec, args.coll)
+    df.rename(columns={c: c.replace(args.coll + '.', '') for c in df.columns}, inplace=True)
+
+    fig, axs = plt.subplots(2, 2, figsize=(16, 12), dpi=120)
+    for ax in axs.flat:
+        ax.tick_params(direction='in', which='both', labelsize=20)
+        ax.grid(linestyle=':')
+        ax.set_axisbelow(True)
+    ax = axs.flat[0]
+    # tracking efficiency
+    eta_bins = np.linspace(-4, 4, 21)
+    sim_eta, _ = np.histogram(dfm['eta'].values, bins=eta_bins)
+    rec_eta, _ = np.histogram(-np.log(np.tan(df['theta'].values/2.)), bins=eta_bins)
+    eta_centers = (eta_bins[1:] + eta_bins[:-1])/2.
+    eta_binsize = np.mean(np.diff(eta_centers))
+    # cut off not simulated bins
+    mask = sim_eta > 0
+    sim_eta = sim_eta[mask]
+    rec_eta = rec_eta[mask]
+    eta_centers = eta_centers[mask]
+    track_eff = rec_eta/sim_eta
+    # binary distribution, pq*sqrt(N)
+    # TODO check the errors
+    eff = np.mean(track_eff)
+    track_err = eff*(1. - eff)*np.reciprocal(np.sqrt(sim_eta))
+    # rec_err = eff*(1. - eff)*np.sqrt(rec_eta)
+    # track_eff_lower = track_eff - np.maximum(np.zeros(shape=rec_eta.shape), (rec_eta - rec_err)/sim_eta)
+    # track_eff_upper = np.minimum(np.ones(shape=rec_eta.shape), (rec_eta + rec_err)/sim_eta) - track_eff
+    track_eff_lower = track_eff - np.maximum(np.zeros(shape=rec_eta.shape), track_eff - track_err)
+    track_eff_upper = np.minimum(np.ones(shape=rec_eta.shape), track_eff + track_err) - track_eff
+
+
+    ax.errorbar(eta_centers, track_eff, xerr=eta_binsize/2., yerr=[track_eff_lower, track_eff_upper],
+                fmt='o', capsize=3)
+    ax.set_ylim(0., 1.)
+    ax.set_xlim(-4.5, 4.5)
+    ax.set_ylabel('Tracking Efficiency', fontsize=20)
+    ax.set_xlabel('$\eta$', fontsize=20)
+
+    # momentum resolution
+    ax = axs.flat[1]
+    dfc = dfm.set_index('event').loc[df['event'].values]
+    rec_p = dfc['charge'].values/df['qOverP'].values
+    sim_p = dfc['p'].values
+    # print(dfc['charge'].values, df['qOverP'].values)
+    dp_p = (rec_p - sim_p)/sim_p
+    hval, hbins, _ = ax.hist(dp_p*100., bins=np.linspace(-20, 20, 101),
+                             weights=np.repeat(1./float(rec_p.shape[0]), rec_p.shape[0]), ec='k')
+    nbins = hbins.shape[0] - 1
+    ax.set_ylabel('Normalized Counts / {:d} Bins'.format(nbins), fontsize=20)
+    ax.set_xlabel('$\delta p$ (%)', fontsize=20)
+
+    # theta resolution
+    ax = axs.flat[2]
+    rec_th = df.groupby('event')['theta'].first().values
+    sim_th = dfc.groupby('event')['theta'].first().values
+    dth_th = (rec_th - sim_th)
+    # print(np.vstack([rec_th, sim_th]).T)
+    hval, hbins, _ = ax.hist(dth_th, bins=np.linspace(-0.01, 0.01, 101),
+                             weights=np.repeat(1./float(rec_th.shape[0]), rec_th.shape[0]), ec='k')
+    nbins = hbins.shape[0] - 1
+    ax.set_ylabel('Normalized Counts / {:d} Bins'.format(nbins), fontsize=20)
+    ax.set_xlabel(r'$d\theta$ (rad)', fontsize=20)
+    fig.savefig('test.png')
+
+    # phi resolution
+    ax = axs.flat[3]
+    rec_th = df.groupby('event')['phi'].first().values
+    sim_th = dfc.groupby('event')['phi'].first().values
+    dth_th = (rec_th - sim_th)
+    # print(np.vstack([rec_th, sim_th]).T)
+    hval, hbins, _ = ax.hist(dth_th, bins=np.linspace(-0.01, 0.01, 101),
+                             weights=np.repeat(1./float(rec_th.shape[0]), rec_th.shape[0]), ec='k')
+    nbins = hbins.shape[0] - 1
+    ax.set_ylabel('Normalized Counts / {:d} Bins'.format(nbins), fontsize=20)
+    ax.set_xlabel(r'$d\phi$ (rad)', fontsize=20)
+
+    fig.text(0.5, 0.95, 'Barrel Tracker Benchmark (Truth Init.)', fontsize=22, ha='center')
+    fig.savefig(os.path.join(args.outdir, 'tracking_performance.png'))
+
-- 
GitLab