From 4d787b836cac7505835936dea032a51629a55735 Mon Sep 17 00:00:00 2001 From: Chao Peng <cpeng@anl.gov> Date: Mon, 26 Jul 2021 17:04:15 +0000 Subject: [PATCH] add tracking benchmark --- .../clustering/scripts/cluster_plots.py | 26 +-- benchmarks/tracking/config.yml | 10 +- .../tracking/options/truth_seeded_tracking.py | 134 +++++++++++ .../tracking/run_tracking_benchmarks.py | 114 +++++++++ benchmarks/tracking/scripts/gen_particles.py | 104 +++++++++ .../tracking/scripts/tracking_performance.py | 216 ++++++++++++++++++ 6 files changed, 590 insertions(+), 14 deletions(-) create mode 100644 benchmarks/tracking/options/truth_seeded_tracking.py create mode 100755 benchmarks/tracking/run_tracking_benchmarks.py create mode 100644 benchmarks/tracking/scripts/gen_particles.py create mode 100644 benchmarks/tracking/scripts/tracking_performance.py diff --git a/benchmarks/clustering/scripts/cluster_plots.py b/benchmarks/clustering/scripts/cluster_plots.py index e2a53cfd..336c35d4 100644 --- a/benchmarks/clustering/scripts/cluster_plots.py +++ b/benchmarks/clustering/scripts/cluster_plots.py @@ -50,10 +50,10 @@ def flatten_collection(rdf, collection, cols=None): def thrown_particles_figure(rdf, save, mcbranch="mcparticles"): # define truth particle info - df = flatten_collection(rdf, mcbranch, ['genStatus', 'pdgID', 'psx', 'psy', 'psz', 'mass']) - df.rename(columns={c: c.replace(mcbranch + '.', '') for c in df.columns}, inplace=True) + dft = flatten_collection(rdf, mcbranch, ['genStatus', 'pdgID', 'psx', 'psy', 'psz', 'mass']) + dft.rename(columns={c: c.replace(mcbranch + '.', '') for c in dft.columns}, inplace=True) # select thrown particles - df = df[df['genStatus'] == 1] + dft = dft[dft['genStatus'] == 1] # figure fig, axs = plt.subplots(2, 2, figsize=(16, 12), dpi=120) @@ -62,12 +62,12 @@ def thrown_particles_figure(rdf, save, mcbranch="mcparticles"): get_pname = np.vectorize(lambda pid: pdgbase.GetParticle(int(pid)).GetName()) # enumerate particle names - df.loc[:, 'pname'] = get_pname(df['pdgID'].values) - penum = {pname: i for i, pname in enumerate(df['pname'].unique())} - df.loc[:, 'pname_id'] = df['pname'].map(penum) + dft.loc[:, 'pname'] = get_pname(dft['pdgID'].values) + penum = {pname: i for i, pname in enumerate(dft['pname'].unique())} + dft.loc[:, 'pname_id'] = dft['pname'].map(penum) ax = axs.flat[0] - ax.hist(df['pname_id'].values, bins=np.arange(-4, len(penum)*2 + 4), align='left', ec='k') + ax.hist(dft['pname_id'].values, bins=np.arange(-4, len(penum)*2 + 4), align='left', ec='k') ax.set_ylabel('Particle Counts', fontsize=24) ax.tick_params(labelsize=22) ax.set_axisbelow(True) @@ -77,11 +77,11 @@ def thrown_particles_figure(rdf, save, mcbranch="mcparticles"): # calculate kinematics get_4vecs = np.vectorize(lambda x, y, z, m: ROOT.Math.PxPyPzMVector(x, y, z, m)) - fourvecs = get_4vecs(*df[['psx', 'psy', 'psz', 'mass']].values.T) + fourvecs = get_4vecs(*dft[['psx', 'psy', 'psz', 'mass']].values.T) - df.loc[:, 'p'] = [v.P() for v in fourvecs] - df.loc[:, 'eta'] = [v.Eta() for v in fourvecs] - df.loc[:, 'pt'] = [v.Pt() for v in fourvecs] + dft.loc[:, 'p'] = [v.P() for v in fourvecs] + dft.loc[:, 'eta'] = [v.Eta() for v in fourvecs] + dft.loc[:, 'pt'] = [v.Pt() for v in fourvecs] # column name, x label, y label, bins plots = [ @@ -90,7 +90,7 @@ def thrown_particles_figure(rdf, save, mcbranch="mcparticles"): ('pt', r'$P_T$ (GeV)', 'Counts', 50), ] for ax, (col, xlabel, ylabel, bins) in zip(axs.flat[1:], plots): - ax.hist(df[col].values, bins=bins, align='left', ec='k') + ax.hist(dft[col].values, bins=bins, align='left', ec='k') ax.tick_params(labelsize=22, direction='in', which='both') ax.grid(linestyle=':', which='both') ax.set_axisbelow(True) @@ -100,7 +100,7 @@ def thrown_particles_figure(rdf, save, mcbranch="mcparticles"): fig.text(0.5, 0.95, 'Thrown Particles', ha='center', fontsize=24) fig.savefig(save) plt.close(fig) - + return dft if __name__ == '__main__': parser = argparse.ArgumentParser() diff --git a/benchmarks/tracking/config.yml b/benchmarks/tracking/config.yml index 6e5c5d2f..b707a2eb 100644 --- a/benchmarks/tracking/config.yml +++ b/benchmarks/tracking/config.yml @@ -4,4 +4,12 @@ tracking_central_electrons: timeout: 24 hours script: - bash benchmarks/tracking/central_electrons.sh - #allow_failure: true \ No newline at end of file + #allow_failure: true + +tracking_truth_init_electrons: + extends: .rec_benchmark + stage: run + timeout: 24 hours + script: + - python benchmarks/tracking/run_tracking_benchmarks.py --etamin=-3 --etamax=3 -n 100 + diff --git a/benchmarks/tracking/options/truth_seeded_tracking.py b/benchmarks/tracking/options/truth_seeded_tracking.py new file mode 100644 index 00000000..00ea29ed --- /dev/null +++ b/benchmarks/tracking/options/truth_seeded_tracking.py @@ -0,0 +1,134 @@ +from Gaudi.Configuration import * + +from GaudiKernel.DataObjectHandleBase import DataObjectHandleBase +from Configurables import ApplicationMgr, EICDataSvc, PodioOutput, GeoSvc +from GaudiKernel.SystemOfUnits import MeV, GeV, mm, cm, mrad + +detector_name = str(os.environ.get("JUGGLER_DETECTOR", "athena")) +detector_path = str(os.environ.get("JUGGLER_DETECTOR_PATH", ".")) +compact_path = str(os.environ.get("JUGGLER_COMPACT_PATH", "{}.xml".format(os.path.join(detector_path, detector_name)))) + +# todo add checks +input_sims = [f.strip() for f in str.split(os.environ["JUGGLER_SIM_FILE"], ",") if f.strip()] +output_rec = str(os.environ["JUGGLER_REC_FILE"]) +n_events = int(os.environ["JUGGLER_N_EVENTS"]) + +geo_service = GeoSvc("GeoSvc", detectors=[compact_path], OutputLevel=WARNING) +podioevent = EICDataSvc("EventDataSvc", inputs=input_sims, OutputLevel=WARNING) + +from Configurables import PodioInput +from Configurables import Jug__Digi__UFSDTrackerDigi as TrackerDigi +from Configurables import Jug__Reco__TrackerHitReconstruction as TrackerReco + +from Configurables import Jug__Reco__TrackerSourceLinker as TrackerSourceLinker +from Configurables import Jug__Reco__TrackerSourcesLinker as TrackerSourcesLinker +#from Configurables import Jug__Reco__TrackingHitsSourceLinker as TrackingHitsSourceLinker +from Configurables import Jug__Reco__TrackParamTruthInit as TrackParamTruthInit + +from Configurables import Jug__Reco__TrackFindingAlgorithm as TrackFindingAlgorithm +from Configurables import Jug__Reco__ParticlesFromTrackFit as ParticlesFromTrackFit +from Configurables import Jug__Base__InputCopier_dd4pod__Geant4ParticleCollection_dd4pod__Geant4ParticleCollection_ as MCCopier + + +sim_colls = [ + "mcparticles", + "TrackerEndcapHits", + "TrackerBarrelHits", + "VertexBarrelHits", + "VertexEndcapHits", + "EcalBarrelHits" +] +podin = PodioInput("PodioReader", collections=sim_colls) +podout = PodioOutput("out", filename=output_rec) + +## copiers to get around input --> output copy bug. Note the "2" appended to the output collection. +mccopier = MCCopier("MCCopier", + inputCollection="mcparticles", + outputCollection="mcparticles2") + +trk_b_digi = TrackerDigi("trk_b_digi", + inputHitCollection="TrackerBarrelHits", + outputHitCollection="TrackerBarrelRawHits", + timeResolution=8) +trk_ec_digi = TrackerDigi("trk_ec_digi", + inputHitCollection="TrackerEndcapHits", + outputHitCollection="TrackerEndcapRawHits", + timeResolution=8) + +vtx_b_digi = TrackerDigi("vtx_b_digi", + inputHitCollection="VertexBarrelHits", + outputHitCollection="VertexBarrelRawHits", + timeResolution=8) + +vtx_ec_digi = TrackerDigi("vtx_ec_digi", + inputHitCollection="VertexEndcapHits", + outputHitCollection="VertexEndcapRawHits", + timeResolution=8) + +# Tracker and vertex reconstruction +trk_b_reco = TrackerReco("trk_b_reco", + inputHitCollection = trk_b_digi.outputHitCollection, + outputHitCollection="TrackerBarrelRecHits") + +trk_ec_reco = TrackerReco("trk_ec_reco", + inputHitCollection = trk_ec_digi.outputHitCollection, + outputHitCollection="TrackerEndcapRecHits") + +vtx_b_reco = TrackerReco("vtx_b_reco", + inputHitCollection = vtx_b_digi.outputHitCollection, + outputHitCollection="VertexBarrelRecHits") + +vtx_ec_reco = TrackerReco("vtx_ec_reco", + inputHitCollection = vtx_ec_digi.outputHitCollection, + outputHitCollection="VertexEndcapRecHits") + +sourcelinker = TrackerSourcesLinker("trk_srcslnkr", + inputHitCollections=["VertexBarrelRecHits", "TrackerBarrelRecHits"], + outputSourceLinks="TrackerSourceLinks", + outputMeasurements="TrackerMeasurements", + OutputLevel=DEBUG) + +## Track param init +truth_trk_init = TrackParamTruthInit("truth_trk_init", + inputMCParticles="mcparticles", + outputInitialTrackParameters="InitTrackParams", + OutputLevel=DEBUG) + +# Tracking algorithms +trk_find_alg = TrackFindingAlgorithm("trk_find_alg", + inputSourceLinks = sourcelinker.outputSourceLinks, + inputMeasurements = sourcelinker.outputMeasurements, + inputInitialTrackParameters= truth_trk_init.outputInitialTrackParameters, + outputTrajectories="trajectories", + OutputLevel=DEBUG) + +parts_from_fit = ParticlesFromTrackFit("parts_from_fit", + inputTrajectories=trk_find_alg.outputTrajectories, + outputParticles="ReconstructedParticles", + outputTrackParameters="outputTrackParameters", + OutputLevel=DEBUG) + +podout.outputCommands = [ + "keep *", + "drop InitTrackParams", + "drop trajectories", + "drop outputSourceLinks", + "drop outputInitialTrackParameters", + "drop mcparticles", +] + +ApplicationMgr( + TopAlg = [podin, mccopier, + trk_b_digi, trk_ec_digi, vtx_b_digi, vtx_ec_digi, + trk_b_reco, trk_ec_reco, vtx_b_reco, vtx_ec_reco, + sourcelinker, + truth_trk_init, + trk_find_alg, parts_from_fit, + podout + ], + EvtSel = 'NONE', + EvtMax = n_events, + ExtSvc = [podioevent, geo_service], + OutputLevel=WARNING + ) + diff --git a/benchmarks/tracking/run_tracking_benchmarks.py b/benchmarks/tracking/run_tracking_benchmarks.py new file mode 100755 index 00000000..97078593 --- /dev/null +++ b/benchmarks/tracking/run_tracking_benchmarks.py @@ -0,0 +1,114 @@ +#! /usr/local/bin/python3 +''' + A python script to help run tracking benchmarks + simulation -> reconstruction -> analysis + Author: Chao Peng (ANL) + Date: 07/20/2021 +''' +import os +import sys +import subprocess +import argparse + + +default_compact = os.path.join( + os.environ.get('JUGGLER_DETECTOR_PATH', os.environ.get('DETECTOR_PATH', '')), + '{}.xml'.format(os.environ.get('JUGGLER_DETECTOR', ''))) + +script_dir = os.path.dirname(os.path.realpath(__file__)) +gen_script = os.path.join(script_dir, 'scripts', 'gen_particles.py') +option_script = os.path.join(script_dir, 'options', 'truth_seeded_tracking.py') +analysis_script = os.path.join(script_dir, 'scripts', 'tracking_performance.py') + +parser = argparse.ArgumentParser() +parser.add_argument('-n', '--numberOfEvents', dest='nev', type=int, default=100, help='Number of events to process.') +parser.add_argument('-t', '--nametag', type=str, default='TRACKING', help='Name tag for output files.') +parser.add_argument('--seed', type=int, default=-1, help='Random seed to scripts.') +parser.add_argument('--process', type=str, default='sim, rec, ana', help='Processes to be executed (sim, rec, ana).') +parser.add_argument('--numberOfLayers', dest='nlayer', type=int, default=20, help='number of layers in ML data.') +parser.add_argument('--numberOfHits', dest='nhit', type=int, default=20, help='number of hits in ML data.') +parser.add_argument('--particles', type=str, default='electron', help='Partcile names, separated by \",\".') +parser.add_argument('--etamin', type=float, default=-4, help='Minimum pseudorapidity.') +parser.add_argument('--etamax', type=float, default=4, help='Maximum pseudorapidity.') +parser.add_argument('--pmin', type=float, default=5.0, help='Minimum momentum of particles (GeV).') +parser.add_argument('--pmax', type=float, default=5.0, help='Maximum momentum of particles (GeV).') +parser.add_argument('--compact', type=str, default=default_compact, help='Path to detector compact file.') +parser.add_argument('--uploadSizeLimit', type=float, default=10, help='Upper limit of file size (Mb) to be uploaded.') + +args = parser.parse_args() +kwargs = vars(args) + +gen_file = 'gen_{nametag}_{pmin}_{pmax}.hepmc'.format(**kwargs) +sim_file = 'sim_{nametag}_{pmin}_{pmax}.root'.format(**kwargs) +rec_file = 'rec_{nametag}_{pmin}_{pmax}.root'.format(**kwargs) + +procs = [p.strip() for p in args.process.split(',')] +sdir = os.path.dirname(os.path.realpath(__file__)) + +if 'sim' in procs: + # generate particles + gen_cmd = ['python', gen_script, gen_file, + '-n', '{}'.format(args.nev), + '-s', '{}'.format(args.seed), + '--etamin', '{}'.format(args.etamin), '--etamax', '{}'.format(args.etamax), + '--pmin', '{}'.format(args.pmin), '--pmax', '{}'.format(args.pmax), + '--particles', args.particles] + subprocess.run(gen_cmd) + # simulation + sim_cmd = ['npsim', + '--part.minimalKineticEnergy', '1*TeV', + '--numberOfEvents', '{}'.format(args.nev), + '--runType', 'batch', + '--inputFiles', gen_file, + '--outputFile', sim_file, + '--compact', args.compact, + '-v', 'WARNING'] + if args.seed > 0: + sim_cmd += ['--random.seed', args.seed] + return_code = subprocess.run(sim_cmd).returncode + if return_code is not None and return_code < 0: + print("ERROR running simulation!") + exit(1) + subprocess.run(['rootls', '-t', sim_file]) + + +if 'rec' in procs: + # export to environment variables (used to pass arguments to the option file) + os.environ['JUGGLER_SIM_FILE'] = sim_file + os.environ['JUGGLER_REC_FILE'] = rec_file + os.environ['JUGGLER_COMPACT_PATH'] = args.compact + os.environ['JUGGLER_N_EVENTS'] = '{}'.format(args.nev) + + juggler_xenv = os.path.join(os.environ.get('JUGGLER_INSTALL_PREFIX', '../local'), 'Juggler.xenv') + + rec_cmd = ['xenv', '-x', juggler_xenv, 'gaudirun.py', os.path.join(sdir, 'options', option_script)] + return_code = subprocess.run(rec_cmd).returncode + if return_code is not None and return_code < 0: + print('ERROR running juggler ({})!'.format(opt)) + exit(1) + process = subprocess.run(['rootls', '-t', rec_file]) + + +if 'ana' in procs: + os.makedirs('results', exist_ok=True) + ana_cmd = ['python', analysis_script, rec_file, + '--mc-collection', 'mcparticles2', + '--tracking-collection', 'outputTrackParameters', + '-o', 'results'] + return_code = subprocess.run(ana_cmd).returncode + if return_code is not None and return_code < 0: + print('ERROR running analysis ({})!'.format(ana)) + exit(1) + + +# upload generated data files if it was small enough (< 10 Mb) +for upload in [rec_file]: + process = subprocess.run(['stat', '--format=%s', upload], stdout=subprocess.PIPE) + size = int(process.stdout)/1024./1024. + if size > args.uploadSizeLimit: + print('Skip uploading file \"{}\" because its size ({:.2f} Mb) > {:.2f} Mb'\ + .format(upload, size, args.uploadSizeLimit)) + else: + os.system('cp {} results/.'.format(upload)) + print('Upload file \"{}\", size = {:.2f} Mb'.format(upload, size)) + diff --git a/benchmarks/tracking/scripts/gen_particles.py b/benchmarks/tracking/scripts/gen_particles.py new file mode 100644 index 00000000..4b8c4e96 --- /dev/null +++ b/benchmarks/tracking/scripts/gen_particles.py @@ -0,0 +1,104 @@ +import os +from pyHepMC3 import HepMC3 as hm +import numpy as np +import argparse + + +PARTICLES = { + "pion0": (111, 0.1349766), # pi0 + "pion+": (211, 0.13957018), # pi+ + "pion-": (-211, 0.13957018), # pi- + "kaon0": (311, 0.497648), # K0 + "kaon+": (321, 0.493677), # K+ + "kaon-": (-321, 0.493677), # K- + "proton": (2212, 0.938272), # proton + "neutron": (2112, 0.939565), # neutron + "electron": (11, 0.51099895e-3), # electron + "positron": (-11, 0.51099895e-3),# positron + "photon": (22, 0), # photon +} + + +def gen_event(p, theta, phi, pid, mass): + evt = hm.GenEvent(momentum_unit=hm.Units.MomentumUnit.GEV, length_unit=hm.Units.LengthUnit.MM) + # final state + state = 1 + e0 = np.sqrt(p*p + mass*mass) + px = np.cos(phi)*np.sin(theta) + py = np.sin(phi)*np.sin(theta) + pz = np.cos(theta) + + # beam + pbeam = hm.GenParticle(hm.FourVector(0, 0, 0, 0.938272), 2212, 4) + ebeam = hm.GenParticle(hm.FourVector(0, 0, e0, np.sqrt(e0*e0 + 0.511e-3*0.511e-3)), 11, 4) + + # out particle + hout = hm.GenParticle(hm.FourVector(px*p, py*p, pz*p, e0), pid, state) + + # vertex + vert = hm.GenVertex() + vert.add_particle_in(ebeam) + vert.add_particle_in(pbeam) + vert.add_particle_out(hout) + evt.add_vertex(vert) + return evt + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument('output', help='path to the output file') + parser.add_argument('-n', type=int, default=1000, dest='nev', help='number of events to generate') + parser.add_argument('-s', type=int, default=-1, dest='seed', help='seed for random generator') + parser.add_argument('--parray', type=str, default="", dest='parray', + help='an array of momenta in GeV, separated by \",\"') + parser.add_argument('--pmin', type=float, default=8.0, dest='pmin', help='minimum momentum in GeV') + parser.add_argument('--pmax', type=float, default=100.0, dest='pmax', help='maximum momentum in GeV') + parser.add_argument('--etamin', type=float, default=-4, dest='etamin', help='minimum pseudorapidity') + parser.add_argument('--etamax', type=float, default=4, dest='etamax', help='maximum pseudorapidity') + parser.add_argument('--phmin', type=float, default=0.0, dest='phmin', help='minimum angle in degree') + parser.add_argument('--phmax', type=float, default=360.0, dest='phmax', help='maximum angle in degree') + parser.add_argument('--particles', type=str, default='electron', dest='particles', + help='particle names, support {}'.format(list(PARTICLES.keys()))) + + args = parser.parse_args() + + # random seed (< 0 will get it from enviroment variable 'SEED', or a system random number) + if args.seed < 0: + args.seed = os.environ.get('SEED', int.from_bytes(os.urandom(4), byteorder='big', signed=False)) + print("Random seed is {}".format(args.seed)) + np.random.seed(args.seed) + + output = hm.WriterAscii(args.output); + if output.failed(): + print("Cannot open file \"{}\"".format(args.output)) + sys.exit(2) + + # build particle info + parts = [] + for pid in args.particles.split(','): + pid = pid.strip() + if pid not in PARTICLES.keys(): + print('pid {:d} not found in dictionary, ignored.'.format(pid)) + continue + parts.append(PARTICLES[pid]) + + # p values + pvals = np.random.uniform(args.pmin, args.pmax, args.nev) if not args.parray else \ + np.random.choice([float(p.strip()) for p in args.parray.split(',')], args.nev) + thvals = np.arctan(np.exp(np.random.uniform(args.etamin, args.etamax, args.nev)*-1.))*2. + phivals = np.random.uniform(args.phmin, args.phmax, args.nev)/180.*np.pi + partvals = [parts[i] for i in np.random.choice(len(parts), args.nev)] + + count = 0 + for p, theta, phi, (pid, mass) in zip(pvals, thvals, phivals, partvals): + if (count % 1000 == 0): + print("Generated {} events".format(count), end='\r') + evt = gen_event(p, theta, phi, pid, mass) + output.write_event(evt) + evt.clear() + count += 1 + + print("Generated {} events".format(args.nev)) + output.close() + diff --git a/benchmarks/tracking/scripts/tracking_performance.py b/benchmarks/tracking/scripts/tracking_performance.py new file mode 100644 index 00000000..5f7c414c --- /dev/null +++ b/benchmarks/tracking/scripts/tracking_performance.py @@ -0,0 +1,216 @@ +''' + A simple analysis script to extract some basic info of Monte-Carlo particles and clusters +''' +import os +import ROOT +import pandas as pd +import numpy as np +import argparse +from matplotlib import pyplot as plt +import matplotlib.ticker as ticker + + +# load root macros, input is an argument string +def load_root_macros(arg_macros): + for path in arg_macros.split(','): + path = path.strip() + if os.path.exists(path): + gROOT.Macro(path) + print('Loading root macro: \'{}\' loaded.'.format(path)) + else: + print('Loading root macros: \'{}\' does not exist, skip loading it.'.format(path)) + + +# read from RDataFrame and flatten a given collection, return pandas dataframe +def flatten_collection(rdf, collection, cols=None): + if not cols: + cols = [str(c) for c in rdf.GetColumnNames() if str(c).startswith('{}.'.format(collection))] + else: + cols = ['{}.{}'.format(collection, c) for c in cols] + if not cols: + print('cannot find any branch under collection {}'.format(collection)) + return pd.DataFrame() + + data = rdf.AsNumpy(cols) + # flatten the data, add an event id to identify clusters from different events + evns = [] + for i, vec in enumerate(data[cols[0]]): + evns += [i]*vec.size() + for n, vals in data.items(): + # make sure ints are not converted to floats + typename = vals[0].__class__.__name__.lower() + dtype = np.int64 if 'int' in typename or 'long' in typename else np.float64 + # type safe creation + data[n] = np.asarray([v for vec in vals for v in vec], dtype=dtype) + # build data frame + dfp = pd.DataFrame({c: pd.Series(v) for c, v in data.items()}) + dfp.loc[:, 'event'] = evns + return dfp + + +def thrown_particles_figure(rdf, save, mcbranch="mcparticles2"): + # define truth particle info + dft = flatten_collection(rdf, mcbranch, ['genStatus', 'pdgID', 'psx', 'psy', 'psz', 'mass']) + dft.rename(columns={c: c.replace(mcbranch + '.', '') for c in dft.columns}, inplace=True) + # select thrown particles + dft = dft[dft['genStatus'] == 1] + + # figure + fig, axs = plt.subplots(2, 2, figsize=(16, 12), dpi=120) + # convert pid to particle names + pdgbase = ROOT.TDatabasePDG() + get_pname = np.vectorize(lambda pid: pdgbase.GetParticle(int(pid)).GetName()) + get_pcharge = np.vectorize(lambda pid: pdgbase.GetParticle(int(pid)).Charge()/3.) + + # enumerate particle names + dft.loc[:, 'pname'] = get_pname(dft['pdgID'].values) + dft.loc[:, 'charge'] = get_pcharge(dft['pdgID'].values) + penum = {pname: i for i, pname in enumerate(dft['pname'].unique())} + dft.loc[:, 'pname_id'] = dft['pname'].map(penum) + + ax = axs.flat[0] + ax.hist(dft['pname_id'].values, bins=np.arange(-4, len(penum)*2 + 4), align='left', ec='k') + ax.set_ylabel('Particle Counts', fontsize=24) + ax.tick_params(labelsize=22) + ax.set_axisbelow(True) + ax.grid(linestyle=':', which='both', axis='y') + ax.xaxis.set_major_locator(ticker.FixedLocator(list(penum.values()))) + ax.set_xticklabels(list(penum), rotation=30) + + # calculate kinematics + get_4vecs = np.vectorize(lambda x, y, z, m: ROOT.Math.PxPyPzMVector(x, y, z, m)) + fourvecs = get_4vecs(*dft[['psx', 'psy', 'psz', 'mass']].values.T) + + dft.loc[:, 'p'] = [v.P() for v in fourvecs] + dft.loc[:, 'theta'] = [v.Theta() for v in fourvecs] + dft.loc[:, 'phi'] = [v.Phi() for v in fourvecs] + dft.loc[:, 'eta'] = [v.Eta() for v in fourvecs] + dft.loc[:, 'pt'] = [v.Pt() for v in fourvecs] + + # column name, x label, y label, bins + plots = [ + ('p', r'$P$ (GeV)', 'Counts', 50), + ('eta', r'$\eta$', 'Counts', 50), + ('pt', r'$P_T$ (GeV)', 'Counts', 50), + ] + for ax, (col, xlabel, ylabel, bins) in zip(axs.flat[1:], plots): + ax.hist(dft[col].values, bins=bins, align='left', ec='k') + ax.tick_params(labelsize=22, direction='in', which='both') + ax.grid(linestyle=':', which='both') + ax.set_axisbelow(True) + ax.set_xlabel(xlabel, fontsize=24) + ax.set_ylabel(ylabel, fontsize=24) + + fig.text(0.5, 0.95, 'Thrown Particles', ha='center', fontsize=24) + fig.savefig(save) + plt.close(fig) + return dft + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('rec_file', help='Path to reconstruction output file.') + parser.add_argument('-o', dest='outdir', default='.', help='Output directory.') + parser.add_argument('-m', '--macros', dest='macros', default='rootlogon.C', + help='Macro files to be loaded by root, separated by \",\".') + parser.add_argument('--mc-collection', dest='mc', default='mcparticles', + help='Collection name of MC particles truth info.') + parser.add_argument('--tracking-collection', dest='coll', default='outputTrackParameters', + help='Collection name of clusters to plot') + args = parser.parse_args() + + # multi-threading for RDataFrame + nthreads = os.cpu_count()//2 + ROOT.ROOT.EnableImplicitMT(nthreads) + print('CPU available {}, using {}'.format(os.cpu_count(), nthreads)) + + # declare some functions from c++ script, needed for data frame processing + # script_dir = os.path.dirname(os.path.realpath(__file__)) + # fcode = open(os.path.join(script_dir, 'barrel_clusters.cxx')).read() + # ROOT.gInterpreter.Declare(fcode) + + os.makedirs(args.outdir, exist_ok=True) + # load macros (add libraries/headers root cannot automatically found here) + load_root_macros(args.macros) + + rdf_rec = ROOT.RDataFrame('events', args.rec_file) + dfm = thrown_particles_figure(rdf_rec, save=os.path.join(args.outdir, 'thrown_particles.png'), mcbranch=args.mc) + df = flatten_collection(rdf_rec, args.coll) + df.rename(columns={c: c.replace(args.coll + '.', '') for c in df.columns}, inplace=True) + + fig, axs = plt.subplots(2, 2, figsize=(16, 12), dpi=120) + for ax in axs.flat: + ax.tick_params(direction='in', which='both', labelsize=20) + ax.grid(linestyle=':') + ax.set_axisbelow(True) + ax = axs.flat[0] + # tracking efficiency + eta_bins = np.linspace(-4, 4, 21) + sim_eta, _ = np.histogram(dfm['eta'].values, bins=eta_bins) + rec_eta, _ = np.histogram(-np.log(np.tan(df['theta'].values/2.)), bins=eta_bins) + eta_centers = (eta_bins[1:] + eta_bins[:-1])/2. + eta_binsize = np.mean(np.diff(eta_centers)) + # cut off not simulated bins + mask = sim_eta > 0 + sim_eta = sim_eta[mask] + rec_eta = rec_eta[mask] + eta_centers = eta_centers[mask] + track_eff = rec_eta/sim_eta + # binary distribution, pq*sqrt(N) + # TODO check the errors + eff = np.mean(track_eff) + track_err = eff*(1. - eff)*np.reciprocal(np.sqrt(sim_eta)) + # rec_err = eff*(1. - eff)*np.sqrt(rec_eta) + # track_eff_lower = track_eff - np.maximum(np.zeros(shape=rec_eta.shape), (rec_eta - rec_err)/sim_eta) + # track_eff_upper = np.minimum(np.ones(shape=rec_eta.shape), (rec_eta + rec_err)/sim_eta) - track_eff + track_eff_lower = track_eff - np.maximum(np.zeros(shape=rec_eta.shape), track_eff - track_err) + track_eff_upper = np.minimum(np.ones(shape=rec_eta.shape), track_eff + track_err) - track_eff + + + ax.errorbar(eta_centers, track_eff, xerr=eta_binsize/2., yerr=[track_eff_lower, track_eff_upper], + fmt='o', capsize=3) + ax.set_ylim(0., 1.) + ax.set_xlim(-4.5, 4.5) + ax.set_ylabel('Tracking Efficiency', fontsize=20) + ax.set_xlabel('$\eta$', fontsize=20) + + # momentum resolution + ax = axs.flat[1] + dfc = dfm.set_index('event').loc[df['event'].values] + rec_p = dfc['charge'].values/df['qOverP'].values + sim_p = dfc['p'].values + # print(dfc['charge'].values, df['qOverP'].values) + dp_p = (rec_p - sim_p)/sim_p + hval, hbins, _ = ax.hist(dp_p*100., bins=np.linspace(-20, 20, 101), + weights=np.repeat(1./float(rec_p.shape[0]), rec_p.shape[0]), ec='k') + nbins = hbins.shape[0] - 1 + ax.set_ylabel('Normalized Counts / {:d} Bins'.format(nbins), fontsize=20) + ax.set_xlabel('$\delta p$ (%)', fontsize=20) + + # theta resolution + ax = axs.flat[2] + rec_th = df.groupby('event')['theta'].first().values + sim_th = dfc.groupby('event')['theta'].first().values + dth_th = (rec_th - sim_th) + # print(np.vstack([rec_th, sim_th]).T) + hval, hbins, _ = ax.hist(dth_th, bins=np.linspace(-0.01, 0.01, 101), + weights=np.repeat(1./float(rec_th.shape[0]), rec_th.shape[0]), ec='k') + nbins = hbins.shape[0] - 1 + ax.set_ylabel('Normalized Counts / {:d} Bins'.format(nbins), fontsize=20) + ax.set_xlabel(r'$d\theta$ (rad)', fontsize=20) + fig.savefig('test.png') + + # phi resolution + ax = axs.flat[3] + rec_th = df.groupby('event')['phi'].first().values + sim_th = dfc.groupby('event')['phi'].first().values + dth_th = (rec_th - sim_th) + # print(np.vstack([rec_th, sim_th]).T) + hval, hbins, _ = ax.hist(dth_th, bins=np.linspace(-0.01, 0.01, 101), + weights=np.repeat(1./float(rec_th.shape[0]), rec_th.shape[0]), ec='k') + nbins = hbins.shape[0] - 1 + ax.set_ylabel('Normalized Counts / {:d} Bins'.format(nbins), fontsize=20) + ax.set_xlabel(r'$d\phi$ (rad)', fontsize=20) + + fig.text(0.5, 0.95, 'Barrel Tracker Benchmark (Truth Init.)', fontsize=22, ha='center') + fig.savefig(os.path.join(args.outdir, 'tracking_performance.png')) + -- GitLab