Skip to content
Snippets Groups Projects
Commit a6d78484 authored by Shujie Li's avatar Shujie Li Committed by Wouter Deconinck
Browse files

add new tracking performance plots with gaus fit

parent ff75cc6c
No related tags found
1 merge request!219add new tracking performance plots with gaus fit
This commit is part of merge request !219. Comments created here will be created in the context of that merge request.
...@@ -38,6 +38,7 @@ script_dir = os.path.dirname(os.path.realpath(__file__)) ...@@ -38,6 +38,7 @@ script_dir = os.path.dirname(os.path.realpath(__file__))
gen_script = os.path.join(script_dir, 'scripts', 'gen_particles.py') gen_script = os.path.join(script_dir, 'scripts', 'gen_particles.py')
option_script = os.path.join(script_dir, 'options', args.options) option_script = os.path.join(script_dir, 'options', args.options)
analysis_script = os.path.join(script_dir, 'scripts', 'tracking_performance.py') analysis_script = os.path.join(script_dir, 'scripts', 'tracking_performance.py')
analysis_script_fit = os.path.join(script_dir, 'scripts', 'tracking_performance_fit.py')
gen_file = 'gen_{nametag}_{pmin}_{pmax}.hepmc'.format(**kwargs) gen_file = 'gen_{nametag}_{pmin}_{pmax}.hepmc'.format(**kwargs)
sim_file = 'sim_{nametag}_{pmin}_{pmax}.root'.format(**kwargs) sim_file = 'sim_{nametag}_{pmin}_{pmax}.root'.format(**kwargs)
...@@ -99,6 +100,14 @@ if 'ana' in procs: ...@@ -99,6 +100,14 @@ if 'ana' in procs:
print('ERROR running analysis ({})!'.format(ana_cmd)) print('ERROR running analysis ({})!'.format(ana_cmd))
exit(1) exit(1)
## new performance plot with fit
ana_cmd = ['python', analysis_script_fit, rec_file,
'--mc-collection', 'mcparticles',
'--tracking-collection', 'outputTrackParameters',
'-o', 'results', '-t', args.nametag]
return_code = subprocess.run(ana_cmd).returncode
if return_code is not None and return_code != 0:
print('ERROR running analysis ({})!'.format(ana_cmd))
# upload generated data files if it was small enough (< 10 Mb) # upload generated data files if it was small enough (< 10 Mb)
for upload in [rec_file]: for upload in [rec_file]:
......
import os
import ROOT
import pandas as pd
import numpy as np
import argparse
from matplotlib import pyplot as plt
import matplotlib.ticker as ticker
# pd.set_option(20, None, 'display.max_columns', None)
pd.options.display.max_rows = 40
pd.options.display.max_columns = 100
# pd.set_option('display.max_rows', None)#, 'display.max_columns', None)
import matplotlib.cm as cm
import matplotlib as mpl
import matplotlib.pylab as plt
import awkward as ak
import uproot as ur
# import uproot3
print('Uproot version: ' + ur.__version__)
deg2rad = np.pi/180.0
plt.rcParams['figure.figsize'] = [12.0, 8.0]
# plt.rcParams['figure.dpi'] = 80
SMALL_SIZE = 13
MEDIUM_SIZE = 14
BIGGER_SIZE = 16
plt.rc('font', size=SMALL_SIZE) # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE) # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE) # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE) # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE) # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE) # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE) # fontsize of the figure title]
# class Inputs:
# def __init__(self, worksim, rec_file):
# self.worksim = worksim
# self.rec_file = worksim+"/"+rec_file
# self.outdir = self.worksim
# macros = 'rootlogon.C'
# nametag = "tracking"
# mc = "mcparticles"
# fit = "outputTrackParameters"
# rec = "ReconstructedParticles"
# append kin variables to dataframe, mcparticles "ps", reconstructedparticles "p"
PARTICLES = pd.DataFrame({
"pion0": (111, 0, 0.1349766), # pi0
"pion+": (211, 1, 0.13957018), # pi+
"pion-": (-211,-1,0.13957018), # pi-
"kaon0L": (130, 0, 0.497648), # K0L
"kaon0S": (310, 0, 0.497648), # K0S
"kaon0": (311, 0, 0.497648), # K0
"kaon+": (321, 1, 0.493677), # K+
"kaon-": (-321, -1, 0.493677), # K-
"proton": (2212, 1, 0.938272), # proton
"neutron": (2112, 0, 0.939565), # neutron
"electron": (11, -1, 0.51099895e-3), # electron
"positron": (-11, 1, 0.51099895e-3),# positron
"photon": (22, 0, 0), # photon
},index=["pid","charge","mass"]).transpose()
def add_kin(data,prefix="p"):
get_4vecs = np.vectorize(lambda x, y, z, m: ROOT.Math.PxPyPzMVector(x, y, z, m))
fourvecs = get_4vecs(*data[[prefix+'.x', prefix+'.y', prefix+'.z', 'mass']].values.T)
data.loc[:, 'p'] = [v.P() for v in fourvecs]
data.loc[:, 'theta'] = [v.Theta() for v in fourvecs]
data.loc[:, 'phi'] = [v.Phi() for v in fourvecs]
data.loc[:, 'eta'] = [v.Eta() for v in fourvecs]
data.loc[:, 'pt'] = [v.Pt() for v in fourvecs]
return data
#1 d histogram and gaus fit
from lmfit.models import GaussianModel
def hist_gaus(dataset,ax, bins=100,klog=0,header=None):
## select data in range if bins is provided as an array
if not np.isscalar(bins):
c1 = dataset<=bins[-1]
c2 = dataset>=bins[0]
dataset=dataset[c1&c2]
## 1st fit
n, bins, patches = ax.hist(dataset, bins,density=False, facecolor='b', alpha=0.3)
xx = bins[0:-1]+(bins[1]-bins[0])/2.0
ymax = np.max(n)
std = np.std(dataset)
mean = np.mean(dataset)
# # print(mean,std)
c1 = xx<=(mean+2*std)
c2 = xx>=(mean-2*std)
cond = c1&c2
while len(n[cond])<len(bins)/4.0:
ax.cla()
diff = (bins[-1]-bins[0])/2.0/2.0
n, bins, patches = ax.hist(dataset, np.linspace(bins[0]+diff,bins[-1]-diff,len(bins)),density=False, facecolor='b', alpha=0.3)
xx = bins[0:-1]+(bins[1]-bins[0])/2.0
c1 = xx<=(mean+2*std)
c2 = xx>=(mean-2*std)
cond = c1&c2
model = GaussianModel()
# create parameters with initial guesses:
params = model.make_params(center=np.median(dataset), amplitude=np.max(n), sigma=np.std(dataset))
result = model.fit(n, params, x=xx)
# 2nd fit
std = result.params['sigma'].value
c1 = xx<=(mean+2*std)
c2 = xx>=(mean-2*std)
cond = c1&c2
model = GaussianModel()
params = model.make_params(center=np.median(dataset), amplitude=np.max(n), sigma=np.std(dataset))
result = model.fit(n[cond], params, x=xx[cond])
print(result.success)
# print(result.fit_report())
# print (result.best_values)
# plt.plot(xx, result.best_fit, 'r-', label='best fit')
if len(result.best_fit)>0:
ax.plot(xx[cond], result.best_fit, 'r-', label='sigma=%g,err=%g' %(result.params['sigma'].value,result.params['sigma'].stderr))
ax.legend(title=header, frameon=False,loc='upper left')
if klog:
ax.set_yscale('log')
ax.set_ylim(1,ymax*10)
else:
ax.set_ylim(0,ymax*1.3)
return result.params['sigma'].value,result.params['sigma'].stderr
def read_data(args, rows=-1):
fname = args.rec_file
rdf_rec = ROOT.RDataFrame('events',fname)
if rows>0:
rdf_rec=rdf_rec.Range(0,rows)
print("read in ",rdf_rec.Count().GetValue(), "rows")
return rdf_rec
# read from RDataFrame and flatten a given collection, return pandas dataframe
def flatten_collection(rdf, collection, cols=None):
if not cols:
cols = [str(c) for c in rdf.GetColumnNames() if str(c).startswith('{}.'.format(collection))]
else:
cols = ['{}.{}'.format(collection, c) for c in cols]
if not cols:
print('cannot find any branch under collection {}'.format(collection))
return pd.DataFrame()
for cname in cols:
if "[" in cname:
cols.remove(cname) ## can not convert array to python. drop for now
# print(cols)
data = rdf.AsNumpy(cols)
# flatten the data, add an event id to identify clusters from different events
evns = []
for i, vec in enumerate(data[cols[0]]):
evns += [i]*vec.size()
for n, vals in data.items():
# make sure ints are not converted to floats
typename = vals[0].__class__.__name__.lower()
dtype = np.int64 if 'int' in typename or 'long' in typename else np.float64
# type safe creation
data[n] = np.asarray([v for vec in vals for v in vec], dtype=dtype)
# build data frame
dfp = pd.DataFrame({c: pd.Series(v) for c, v in data.items()})
dfp.loc[:, 'event'] = evns
dfp.rename(columns={c: c.replace(collection + '.', '') for c in dfp.columns}, inplace=True)
return dfp
def pre_proc(rdf_rec,pid=0):
df_fit = flatten_collection(rdf_rec,args.fit)
df_fit = df_fit.groupby('event').first().reset_index()
df_fit["p"] = 1./df_fit["qOverP"]
df_fit["eta"]= -np.log(np.tan(df_fit['direction.theta'].values/2.))
df_fit["pt"] = df_fit["p"]*np.sin(df_fit["direction.theta"])
df_fit["theta"] = df_fit["direction.theta"]
df_fit["phi"] = df_fit["direction.phi"]
# if len(df_fit)<1:
print (args.fit,df_fit.columns,len(df_fit))
df_rec = flatten_collection(rdf_rec, args.rec)
print (args.rec,df_rec.columns,len(df_rec))
if len(df_rec)>0:
# df_rec=df_rec[df_rec.pid==pid]
df_rec = df_rec.groupby('event').first().reset_index()
# df_rec = df_rec.reset_index()
# if "mass" not in df_rec.columns:
if pid!=0:
# df_rec["mass"] = [PARTICLES.loc[PARTICLES.pid==pid]['mass'].values for pid in df_rec.pid]
mass = PARTICLES.loc[PARTICLES.pid==pid]['mass'].values[0]
df_rec["mass"] = mass*np.ones(len(df_rec))
df_rec = add_kin(df_rec,"p")
df_mc = flatten_collection(rdf_rec,args.mc)
if pid!=0:
df_mc = df_mc[df_mc.pdgID==pid]
df_mc = df_mc[(df_mc.genStatus==1)].reset_index(drop=False)
# df_mc = df_mc.set_index('event').loc[df['event'].values]
print (args.mc,df_mc.columns,len(df_mc))
df_mc = add_kin(df_mc,"ps")
return df_mc, df_rec,df_fit
## --------tracking performance plots-------------------
def performance_plot(df,dfc,dfm,dirname=""):
dp_lim=5 #%
th_lim=0.003 #rad
ph_lim=0.003
fig, axs = plt.subplots(3, 2, figsize=(20, 14))
for ax in axs.flat:
ax.tick_params(direction='in', which='both')#, labelsize=20)
ax.grid(linestyle=':')
ax.set_axisbelow(True)
# ---------------tracking efficiency v.s. eta-----------------
ax = axs.flat[0]
eta_bins = np.linspace(-4, 4, 21)
sim_eta, _ = np.histogram(dfm['eta'].values, bins=eta_bins)
## ?? outputTrackParameters.direction.phi = mcparticles2.theta???
rec_eta, _ = np.histogram(df['eta'], bins=eta_bins)
# rec_eta, _ = np.histogram(-np.log(np.tan(df['direction.theta'].values/2.)), bins=eta_bins)
track_eff_total = np.sum(rec_eta)/np.sum(sim_eta)
eta_centers = (eta_bins[1:] + eta_bins[:-1])/2.
eta_binsize = np.mean(np.diff(eta_centers))
# cut off not simulated bins
mask = sim_eta > 0
sim_eta = sim_eta[mask]
rec_eta = rec_eta[mask]
eta_centers = eta_centers[mask]
track_eff = np.array(rec_eta)/np.array(sim_eta)
# binary distribution, pq*sqrt(N)
# TODO check the errors
eff = np.mean(track_eff)
track_err = eff*(1. - eff)*np.reciprocal(np.sqrt(sim_eta))
# rec_err = eff*(1. - eff)*np.sqrt(rec_eta)
# track_eff_lower = track_eff - np.maximum(np.zeros(shape=rec_eta.shape), (rec_eta - rec_err)/sim_eta)
# track_eff_upper = np.minimum(np.ones(shape=rec_eta.shape), (rec_eta + rec_err)/sim_eta) - track_eff
track_eff_lower = track_eff - np.maximum(np.zeros(shape=rec_eta.shape), track_eff - track_err)
track_eff_upper = np.minimum(np.ones(shape=rec_eta.shape), track_eff + track_err) - track_eff
ax.errorbar(eta_centers, track_eff, xerr=eta_binsize/2., yerr=[track_eff_lower, track_eff_upper],
fmt='o', capsize=3)
ax.set_ylim(0., 1.1)
ax.set_xlim(-4.5, 4.5)
ax.set_ylabel('Tracking Efficiency')#, fontsize=20)
ax.set_xlabel(r'$\eta$')#, fontsize=20)
ax.text(-4,1,"recon/generated events= %d / %d =%.2f" %(len(df),len(dfm),len(df)/len(dfm)))
# ---------------theta distribution------------
ax = axs.flat[1]
sim_th_deg = dfm.groupby('event')['theta'].first().values*180/np.pi
rec_th_deg = df.groupby('event')['theta'].first().values*180/np.pi
hval, hbins, _ = ax.hist(sim_th_deg, bins=np.linspace(-0, 180,61),
ec='k',alpha=0.3,label="Generated")
hval, hbins, _ = ax.hist(rec_th_deg, bins=np.linspace(-0,180, 61),
ec='k',alpha=0.3, label="Reconstructed")
nbins = hbins.shape[0] - 1
# ax.set_ylabel('Counts / {:d} Bins'.format(nbins))#, fontsize=20)
ax.set_xlabel(r'$\theta$(degree)')#, fontsize=20)
# ----------momentum resolution--------------
ax = axs.flat[2]
# rec_p = dfc['charge'].values/df['qOverP'].values
rec_p = df['p'].values
sim_p = dfc['p'].values
dp_p = (rec_p - sim_p)/sim_p
# hval, hbins, _ = ax.hist(dp_p*100., bins=np.linspace(-5, 5, 101),
# weights=np.repeat(1./float(rec_p.shape[0]), rec_p.shape[0]), ec='k')
nbins = 100
sig_p,err_p=hist_gaus(dp_p*100.,ax,np.linspace(-dp_lim, dp_lim, nbins+1),klog=0,header=None)
# ax.set_ylabel('Normalized Density / {:d} Bins'.format(nbins))#, fontsize=20)
ax.set_xlabel(r'$\delta p$ (%)')#, fontsize=20)
# ----------------pT resolution----------------------
ax = axs.flat[3]
rec_pt = df.groupby('event')['pt'].first().values
sim_pt = dfc.groupby('event')['pt'].first().values
dp_pt = (rec_pt - sim_pt)/sim_pt*100
# print(dfm.head(),dfc.head())
# print(np.vstack([rec_th, sim_th]).T)
sig_pt,err_pt=hist_gaus(dp_pt,ax,np.linspace(-dp_lim, dp_lim, nbins+1),klog=0,header=None)
# ax.set_ylabel('Normalized Density / {:d} Bins'.format(nbins))#, fontsize=20)
ax.set_xlabel(r'$\delta p_T$(%)')#, fontsize=20)
# ----------------theta resolution----------------------
nbins = 100
ax = axs.flat[4]
rec_th = df.groupby('event')['theta'].first().values
sim_th = dfc.groupby('event')['theta'].first().values
# print(dfm.head(),dfc.head())
dth_th = (rec_th - sim_th)
# print(np.vstack([rec_th, sim_th]).T)
sig_th,err_th=hist_gaus(dth_th,ax,np.linspace(-th_lim, th_lim, nbins+1),klog=0,header=None)
# ax.set_ylabel('Normalized Density / {:d} Bins'.format(nbins))#, fontsize=20)
ax.set_xlabel(r'$d\theta$ (rad)')#, fontsize=20)
# -----------------phi resolution----------------------
ax = axs.flat[5]
rec_th = df.groupby('event')['phi'].first().values
sim_th = dfc.groupby('event')['phi'].first().values
dth_th = (rec_th - sim_th)
# print(np.vstack([rec_th, sim_th]).T)
# hval, hbins, _ = ax.hist(dth_th, bins=np.linspace(-0.002, 0.002, 101),
# weights=np.repeat(1./float(rec_th.shape[0]), rec_th.shape[0]), ec='k')
sig_ph,err_ph=hist_gaus(dth_th,ax,np.linspace(-ph_lim, ph_lim, nbins+1),klog=0,header=None)
# ax.set_ylabel('Normalized Counts / {:d} Bins'.format(nbins))#, fontsize=20)
ax.set_xlabel(r'$d\phi$ (rad)')#, fontsize=20)
# eta distribution
# ax = axs.flat[4]
# sim_eta = dfm.groupby('event')['eta'].first().values
# rec_eta = -np.log(np.tan(df.groupby('event')['direction.theta'].first().values/2.))
# hval, hbins, _ = ax.hist(sim_eta, bins=np.linspace(-4, 4, 41), ec='k',alpha=0.3,label="Generated")
# hval, hbins, _ = ax.hist(rec_eta, bins=np.linspace(-4, 4, 41), ec='k',alpha=0.3, label="Reconstructed")
# nbins = hbins.shape[0] - 1
# ax.set_ylabel('Counts / {:d} Bins'.format(nbins), fontsize=20)
# ax.set_xlabel(r'$\eta$', fontsize=20)
# ax.legend()
# -------phi distribution
# ax = axs.flat[6]
# sim_th_deg = dfm.groupby('event')['phi'].first().values*180/np.pi
# rec_th_deg = df.groupby('event')['direction.phi'].first().values*180/np.pi
# hval, hbins, _ = ax.hist(sim_th_deg, bins=np.linspace(-180, 180,61),
# ec='k',alpha=0.3,label="Generated")
# hval, hbins, _ = ax.hist(rec_th_deg, bins=np.linspace(-180,180, 61),
# ec='k',alpha=0.3, label="Reconstructed")
# nbins = hbins.shape[0] - 1
# ax.set_ylabel('Counts / {:d} Bins'.format(nbins), fontsize=20)
# ax.set_xlabel(r'$\phi$(degree)', fontsize=20)
fig.text(0.5, 0.95, 'Tracking Benchmark (Truth Init.)', fontsize=18, ha='center')
fig.savefig(os.path.join(args.outdir, '{}_performance_fit.png'.format(args.nametag)))
return track_eff_total,sig_p,err_p,sig_pt,err_pt,sig_th,err_th,sig_ph,err_ph
def eta_theta(xx, inverse=0):
if inverse==1:
return np.arctan((np.e)**(-xx))*2
else:
return -np.log(np.tan(xx/2.))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('rec_file', help='Path to reconstruction output file.')
parser.add_argument('-o', dest='outdir', default='.', help='Output directory.')
parser.add_argument('-t', '--nametag', type=str, default='tracking', help='Name tag for output files.')
parser.add_argument('-m', '--macros', dest='macros', default='rootlogon.C',
help='Macro files to be loaded by root, separated by \",\".')
parser.add_argument('--mc-collection', dest='mc', default='mcparticles',
help='Collection name of MC particles truth info.')
parser.add_argument('--rec-collection', dest='rec', default='ReconstructedParticles',
help='Collection name of reconstructed particles')
parser.add_argument('--tracking-collection', dest='fit', default='outputTrackParameters', help='Collection name of track info from fitting')
args = parser.parse_args()
# multi-threading for RDataFrame
nthreads = os.cpu_count()//2
ROOT.ROOT.EnableImplicitMT(nthreads)
print('CPU available {}, using {}'.format(os.cpu_count(), nthreads))
os.makedirs(args.outdir, exist_ok=True)
rdf=read_data(args,-1) ## -1: optional argument for number of entries to read
df_mc,df_rec,df_fit=pre_proc(rdf,0) ## 0: optional argument for pid
dfc = df_mc.loc[df_fit["event"].values].reset_index()
dfm = df_mc
df = df_fit
_=performance_plot(df_fit,dfc,df_mc)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment