Skip to content
Snippets Groups Projects

add new tracking performance plots with gaus fit

Open Shujie Li requested to merge update_plot_fit into master
1 unresolved thread
2 files
+ 424
0
Compare changes
  • Side-by-side
  • Inline
Files
2
 
import os
 
import ROOT
 
import pandas as pd
 
import numpy as np
 
import argparse
 
from matplotlib import pyplot as plt
 
import matplotlib.ticker as ticker
 
# pd.set_option(20, None, 'display.max_columns', None)
 
pd.options.display.max_rows = 40
 
pd.options.display.max_columns = 100
 
# pd.set_option('display.max_rows', None)#, 'display.max_columns', None)
 
import matplotlib.cm as cm
 
import matplotlib as mpl
 
import matplotlib.pylab as plt
 
import awkward as ak
 
import uproot as ur
 
# import uproot3
 
print('Uproot version: ' + ur.__version__)
 
deg2rad = np.pi/180.0
 
 
plt.rcParams['figure.figsize'] = [12.0, 8.0]
 
# plt.rcParams['figure.dpi'] = 80
 
 
SMALL_SIZE = 13
 
MEDIUM_SIZE = 14
 
BIGGER_SIZE = 16
 
 
plt.rc('font', size=SMALL_SIZE) # controls default text sizes
 
plt.rc('axes', titlesize=SMALL_SIZE) # fontsize of the axes title
 
plt.rc('axes', labelsize=MEDIUM_SIZE) # fontsize of the x and y labels
 
plt.rc('xtick', labelsize=SMALL_SIZE) # fontsize of the tick labels
 
plt.rc('ytick', labelsize=SMALL_SIZE) # fontsize of the tick labels
 
plt.rc('legend', fontsize=SMALL_SIZE) # legend fontsize
 
plt.rc('figure', titlesize=BIGGER_SIZE) # fontsize of the figure title]
 
 
 
# class Inputs:
 
# def __init__(self, worksim, rec_file):
 
# self.worksim = worksim
 
# self.rec_file = worksim+"/"+rec_file
 
# self.outdir = self.worksim
 
# macros = 'rootlogon.C'
 
# nametag = "tracking"
 
# mc = "mcparticles"
 
# fit = "outputTrackParameters"
 
# rec = "ReconstructedParticles"
 
 
# append kin variables to dataframe, mcparticles "ps", reconstructedparticles "p"
 
PARTICLES = pd.DataFrame({
 
"pion0": (111, 0, 0.1349766), # pi0
 
"pion+": (211, 1, 0.13957018), # pi+
 
"pion-": (-211,-1,0.13957018), # pi-
 
"kaon0L": (130, 0, 0.497648), # K0L
 
"kaon0S": (310, 0, 0.497648), # K0S
 
"kaon0": (311, 0, 0.497648), # K0
 
"kaon+": (321, 1, 0.493677), # K+
 
"kaon-": (-321, -1, 0.493677), # K-
 
"proton": (2212, 1, 0.938272), # proton
 
"neutron": (2112, 0, 0.939565), # neutron
 
"electron": (11, -1, 0.51099895e-3), # electron
 
"positron": (-11, 1, 0.51099895e-3),# positron
 
"photon": (22, 0, 0), # photon
 
},index=["pid","charge","mass"]).transpose()
 
 
def add_kin(data,prefix="p"):
 
 
get_4vecs = np.vectorize(lambda x, y, z, m: ROOT.Math.PxPyPzMVector(x, y, z, m))
 
fourvecs = get_4vecs(*data[[prefix+'.x', prefix+'.y', prefix+'.z', 'mass']].values.T)
 
 
data.loc[:, 'p'] = [v.P() for v in fourvecs]
 
data.loc[:, 'theta'] = [v.Theta() for v in fourvecs]
 
data.loc[:, 'phi'] = [v.Phi() for v in fourvecs]
 
data.loc[:, 'eta'] = [v.Eta() for v in fourvecs]
 
data.loc[:, 'pt'] = [v.Pt() for v in fourvecs]
 
return data
 
 
 
#1 d histogram and gaus fit
 
from lmfit.models import GaussianModel
 
def hist_gaus(dataset,ax, bins=100,klog=0,header=None):
 
 
## select data in range if bins is provided as an array
 
if not np.isscalar(bins):
 
c1 = dataset<=bins[-1]
 
c2 = dataset>=bins[0]
 
dataset=dataset[c1&c2]
 
 
 
## 1st fit
 
n, bins, patches = ax.hist(dataset, bins,density=False, facecolor='b', alpha=0.3)
 
xx = bins[0:-1]+(bins[1]-bins[0])/2.0
 
 
ymax = np.max(n)
 
std = np.std(dataset)
 
mean = np.mean(dataset)
 
# # print(mean,std)
 
c1 = xx<=(mean+2*std)
 
c2 = xx>=(mean-2*std)
 
cond = c1&c2
 
while len(n[cond])<len(bins)/4.0:
 
ax.cla()
 
diff = (bins[-1]-bins[0])/2.0/2.0
 
n, bins, patches = ax.hist(dataset, np.linspace(bins[0]+diff,bins[-1]-diff,len(bins)),density=False, facecolor='b', alpha=0.3)
 
xx = bins[0:-1]+(bins[1]-bins[0])/2.0
 
c1 = xx<=(mean+2*std)
 
c2 = xx>=(mean-2*std)
 
cond = c1&c2
 
model = GaussianModel()
 
# create parameters with initial guesses:
 
params = model.make_params(center=np.median(dataset), amplitude=np.max(n), sigma=np.std(dataset))
 
result = model.fit(n, params, x=xx)
 
 
# 2nd fit
 
std = result.params['sigma'].value
 
c1 = xx<=(mean+2*std)
 
c2 = xx>=(mean-2*std)
 
cond = c1&c2
 
model = GaussianModel()
 
params = model.make_params(center=np.median(dataset), amplitude=np.max(n), sigma=np.std(dataset))
 
result = model.fit(n[cond], params, x=xx[cond])
 
print(result.success)
 
 
# print(result.fit_report())
 
# print (result.best_values)
 
# plt.plot(xx, result.best_fit, 'r-', label='best fit')
 
if len(result.best_fit)>0:
 
ax.plot(xx[cond], result.best_fit, 'r-', label='sigma=%g,err=%g' %(result.params['sigma'].value,result.params['sigma'].stderr))
 
ax.legend(title=header, frameon=False,loc='upper left')
 
if klog:
 
ax.set_yscale('log')
 
ax.set_ylim(1,ymax*10)
 
else:
 
ax.set_ylim(0,ymax*1.3)
 
 
return result.params['sigma'].value,result.params['sigma'].stderr
 
 
def read_data(args, rows=-1):
 
 
fname = args.rec_file
 
rdf_rec = ROOT.RDataFrame('events',fname)
 
if rows>0:
 
rdf_rec=rdf_rec.Range(0,rows)
 
print("read in ",rdf_rec.Count().GetValue(), "rows")
 
return rdf_rec
 
 
# read from RDataFrame and flatten a given collection, return pandas dataframe
 
def flatten_collection(rdf, collection, cols=None):
 
if not cols:
 
cols = [str(c) for c in rdf.GetColumnNames() if str(c).startswith('{}.'.format(collection))]
 
else:
 
cols = ['{}.{}'.format(collection, c) for c in cols]
 
if not cols:
 
print('cannot find any branch under collection {}'.format(collection))
 
return pd.DataFrame()
 
for cname in cols:
 
if "[" in cname:
 
cols.remove(cname) ## can not convert array to python. drop for now
 
# print(cols)
 
 
data = rdf.AsNumpy(cols)
 
# flatten the data, add an event id to identify clusters from different events
 
evns = []
 
for i, vec in enumerate(data[cols[0]]):
 
evns += [i]*vec.size()
 
for n, vals in data.items():
 
# make sure ints are not converted to floats
 
typename = vals[0].__class__.__name__.lower()
 
dtype = np.int64 if 'int' in typename or 'long' in typename else np.float64
 
# type safe creation
 
data[n] = np.asarray([v for vec in vals for v in vec], dtype=dtype)
 
# build data frame
 
dfp = pd.DataFrame({c: pd.Series(v) for c, v in data.items()})
 
dfp.loc[:, 'event'] = evns
 
 
dfp.rename(columns={c: c.replace(collection + '.', '') for c in dfp.columns}, inplace=True)
 
return dfp
 
 
 
 
def pre_proc(rdf_rec,pid=0):
 
 
 
df_fit = flatten_collection(rdf_rec,args.fit)
 
df_fit = df_fit.groupby('event').first().reset_index()
 
df_fit["p"] = 1./df_fit["qOverP"]
 
df_fit["eta"]= -np.log(np.tan(df_fit['direction.theta'].values/2.))
 
df_fit["pt"] = df_fit["p"]*np.sin(df_fit["direction.theta"])
 
df_fit["theta"] = df_fit["direction.theta"]
 
df_fit["phi"] = df_fit["direction.phi"]
 
 
# if len(df_fit)<1:
 
print (args.fit,df_fit.columns,len(df_fit))
 
 
df_rec = flatten_collection(rdf_rec, args.rec)
 
print (args.rec,df_rec.columns,len(df_rec))
 
 
 
if len(df_rec)>0:
 
# df_rec=df_rec[df_rec.pid==pid]
 
df_rec = df_rec.groupby('event').first().reset_index()
 
# df_rec = df_rec.reset_index()
 
# if "mass" not in df_rec.columns:
 
if pid!=0:
 
# df_rec["mass"] = [PARTICLES.loc[PARTICLES.pid==pid]['mass'].values for pid in df_rec.pid]
 
mass = PARTICLES.loc[PARTICLES.pid==pid]['mass'].values[0]
 
df_rec["mass"] = mass*np.ones(len(df_rec))
 
df_rec = add_kin(df_rec,"p")
 
 
df_mc = flatten_collection(rdf_rec,args.mc)
 
if pid!=0:
 
df_mc = df_mc[df_mc.pdgID==pid]
 
df_mc = df_mc[(df_mc.genStatus==1)].reset_index(drop=False)
 
# df_mc = df_mc.set_index('event').loc[df['event'].values]
 
print (args.mc,df_mc.columns,len(df_mc))
 
df_mc = add_kin(df_mc,"ps")
 
 
 
return df_mc, df_rec,df_fit
 
 
 
## --------tracking performance plots-------------------
 
def performance_plot(df,dfc,dfm,dirname=""):
 
 
dp_lim=5 #%
 
th_lim=0.003 #rad
 
ph_lim=0.003
 
fig, axs = plt.subplots(3, 2, figsize=(20, 14))
 
for ax in axs.flat:
 
ax.tick_params(direction='in', which='both')#, labelsize=20)
 
ax.grid(linestyle=':')
 
ax.set_axisbelow(True)
 
 
# ---------------tracking efficiency v.s. eta-----------------
 
ax = axs.flat[0]
 
eta_bins = np.linspace(-4, 4, 21)
 
 
 
sim_eta, _ = np.histogram(dfm['eta'].values, bins=eta_bins)
 
## ?? outputTrackParameters.direction.phi = mcparticles2.theta???
 
rec_eta, _ = np.histogram(df['eta'], bins=eta_bins)
 
# rec_eta, _ = np.histogram(-np.log(np.tan(df['direction.theta'].values/2.)), bins=eta_bins)
 
track_eff_total = np.sum(rec_eta)/np.sum(sim_eta)
 
 
eta_centers = (eta_bins[1:] + eta_bins[:-1])/2.
 
eta_binsize = np.mean(np.diff(eta_centers))
 
# cut off not simulated bins
 
mask = sim_eta > 0
 
sim_eta = sim_eta[mask]
 
rec_eta = rec_eta[mask]
 
eta_centers = eta_centers[mask]
 
track_eff = np.array(rec_eta)/np.array(sim_eta)
 
# binary distribution, pq*sqrt(N)
 
# TODO check the errors
 
eff = np.mean(track_eff)
 
track_err = eff*(1. - eff)*np.reciprocal(np.sqrt(sim_eta))
 
# rec_err = eff*(1. - eff)*np.sqrt(rec_eta)
 
# track_eff_lower = track_eff - np.maximum(np.zeros(shape=rec_eta.shape), (rec_eta - rec_err)/sim_eta)
 
# track_eff_upper = np.minimum(np.ones(shape=rec_eta.shape), (rec_eta + rec_err)/sim_eta) - track_eff
 
track_eff_lower = track_eff - np.maximum(np.zeros(shape=rec_eta.shape), track_eff - track_err)
 
track_eff_upper = np.minimum(np.ones(shape=rec_eta.shape), track_eff + track_err) - track_eff
 
 
 
ax.errorbar(eta_centers, track_eff, xerr=eta_binsize/2., yerr=[track_eff_lower, track_eff_upper],
 
fmt='o', capsize=3)
 
ax.set_ylim(0., 1.1)
 
ax.set_xlim(-4.5, 4.5)
 
ax.set_ylabel('Tracking Efficiency')#, fontsize=20)
 
ax.set_xlabel(r'$\eta$')#, fontsize=20)
 
ax.text(-4,1,"recon/generated events= %d / %d =%.2f" %(len(df),len(dfm),len(df)/len(dfm)))
 
 
 
 
 
# ---------------theta distribution------------
 
ax = axs.flat[1]
 
sim_th_deg = dfm.groupby('event')['theta'].first().values*180/np.pi
 
rec_th_deg = df.groupby('event')['theta'].first().values*180/np.pi
 
 
hval, hbins, _ = ax.hist(sim_th_deg, bins=np.linspace(-0, 180,61),
 
ec='k',alpha=0.3,label="Generated")
 
hval, hbins, _ = ax.hist(rec_th_deg, bins=np.linspace(-0,180, 61),
 
ec='k',alpha=0.3, label="Reconstructed")
 
nbins = hbins.shape[0] - 1
 
# ax.set_ylabel('Counts / {:d} Bins'.format(nbins))#, fontsize=20)
 
ax.set_xlabel(r'$\theta$(degree)')#, fontsize=20)
 
 
 
# ----------momentum resolution--------------
 
ax = axs.flat[2]
 
 
# rec_p = dfc['charge'].values/df['qOverP'].values
 
 
rec_p = df['p'].values
 
sim_p = dfc['p'].values
 
 
dp_p = (rec_p - sim_p)/sim_p
 
# hval, hbins, _ = ax.hist(dp_p*100., bins=np.linspace(-5, 5, 101),
 
# weights=np.repeat(1./float(rec_p.shape[0]), rec_p.shape[0]), ec='k')
 
nbins = 100
 
 
sig_p,err_p=hist_gaus(dp_p*100.,ax,np.linspace(-dp_lim, dp_lim, nbins+1),klog=0,header=None)
 
 
# ax.set_ylabel('Normalized Density / {:d} Bins'.format(nbins))#, fontsize=20)
 
ax.set_xlabel(r'$\delta p$ (%)')#, fontsize=20)
 
 
 
# ----------------pT resolution----------------------
 
 
ax = axs.flat[3]
 
rec_pt = df.groupby('event')['pt'].first().values
 
sim_pt = dfc.groupby('event')['pt'].first().values
 
dp_pt = (rec_pt - sim_pt)/sim_pt*100
 
# print(dfm.head(),dfc.head())
 
# print(np.vstack([rec_th, sim_th]).T)
 
sig_pt,err_pt=hist_gaus(dp_pt,ax,np.linspace(-dp_lim, dp_lim, nbins+1),klog=0,header=None)
 
 
# ax.set_ylabel('Normalized Density / {:d} Bins'.format(nbins))#, fontsize=20)
 
ax.set_xlabel(r'$\delta p_T$(%)')#, fontsize=20)
 
 
 
 
# ----------------theta resolution----------------------
 
nbins = 100
 
 
ax = axs.flat[4]
 
rec_th = df.groupby('event')['theta'].first().values
 
sim_th = dfc.groupby('event')['theta'].first().values
 
 
# print(dfm.head(),dfc.head())
 
dth_th = (rec_th - sim_th)
 
# print(np.vstack([rec_th, sim_th]).T)
 
 
sig_th,err_th=hist_gaus(dth_th,ax,np.linspace(-th_lim, th_lim, nbins+1),klog=0,header=None)
 
 
# ax.set_ylabel('Normalized Density / {:d} Bins'.format(nbins))#, fontsize=20)
 
ax.set_xlabel(r'$d\theta$ (rad)')#, fontsize=20)
 
 
# -----------------phi resolution----------------------
 
ax = axs.flat[5]
 
rec_th = df.groupby('event')['phi'].first().values
 
sim_th = dfc.groupby('event')['phi'].first().values
 
dth_th = (rec_th - sim_th)
 
 
# print(np.vstack([rec_th, sim_th]).T)
 
# hval, hbins, _ = ax.hist(dth_th, bins=np.linspace(-0.002, 0.002, 101),
 
# weights=np.repeat(1./float(rec_th.shape[0]), rec_th.shape[0]), ec='k')
 
sig_ph,err_ph=hist_gaus(dth_th,ax,np.linspace(-ph_lim, ph_lim, nbins+1),klog=0,header=None)
 
# ax.set_ylabel('Normalized Counts / {:d} Bins'.format(nbins))#, fontsize=20)
 
ax.set_xlabel(r'$d\phi$ (rad)')#, fontsize=20)
    • Comment on lines +336 to +349

      We had issues with r strings for latex in matplotlib axis labels, so this may fail and need to be replaced with r'd$\phi$ (rad)' instead.

Please register or sign in to reply
 
 
# eta distribution
 
# ax = axs.flat[4]
 
# sim_eta = dfm.groupby('event')['eta'].first().values
 
# rec_eta = -np.log(np.tan(df.groupby('event')['direction.theta'].first().values/2.))
 
# hval, hbins, _ = ax.hist(sim_eta, bins=np.linspace(-4, 4, 41), ec='k',alpha=0.3,label="Generated")
 
# hval, hbins, _ = ax.hist(rec_eta, bins=np.linspace(-4, 4, 41), ec='k',alpha=0.3, label="Reconstructed")
 
# nbins = hbins.shape[0] - 1
 
# ax.set_ylabel('Counts / {:d} Bins'.format(nbins), fontsize=20)
 
# ax.set_xlabel(r'$\eta$', fontsize=20)
 
# ax.legend()
 
# -------phi distribution
 
# ax = axs.flat[6]
 
# sim_th_deg = dfm.groupby('event')['phi'].first().values*180/np.pi
 
# rec_th_deg = df.groupby('event')['direction.phi'].first().values*180/np.pi
 
 
# hval, hbins, _ = ax.hist(sim_th_deg, bins=np.linspace(-180, 180,61),
 
# ec='k',alpha=0.3,label="Generated")
 
# hval, hbins, _ = ax.hist(rec_th_deg, bins=np.linspace(-180,180, 61),
 
# ec='k',alpha=0.3, label="Reconstructed")
 
# nbins = hbins.shape[0] - 1
 
# ax.set_ylabel('Counts / {:d} Bins'.format(nbins), fontsize=20)
 
# ax.set_xlabel(r'$\phi$(degree)', fontsize=20)
 
 
 
fig.text(0.5, 0.95, 'Tracking Benchmark (Truth Init.)', fontsize=18, ha='center')
 
fig.savefig(os.path.join(args.outdir, '{}_performance_fit.png'.format(args.nametag)))
 
 
return track_eff_total,sig_p,err_p,sig_pt,err_pt,sig_th,err_th,sig_ph,err_ph
 
 
 
def eta_theta(xx, inverse=0):
 
if inverse==1:
 
return np.arctan((np.e)**(-xx))*2
 
else:
 
return -np.log(np.tan(xx/2.))
 
 
 
 
if __name__ == '__main__':
 
parser = argparse.ArgumentParser()
 
parser.add_argument('rec_file', help='Path to reconstruction output file.')
 
parser.add_argument('-o', dest='outdir', default='.', help='Output directory.')
 
parser.add_argument('-t', '--nametag', type=str, default='tracking', help='Name tag for output files.')
 
parser.add_argument('-m', '--macros', dest='macros', default='rootlogon.C',
 
help='Macro files to be loaded by root, separated by \",\".')
 
parser.add_argument('--mc-collection', dest='mc', default='mcparticles',
 
help='Collection name of MC particles truth info.')
 
parser.add_argument('--rec-collection', dest='rec', default='ReconstructedParticles',
 
help='Collection name of reconstructed particles')
 
parser.add_argument('--tracking-collection', dest='fit', default='outputTrackParameters', help='Collection name of track info from fitting')
 
args = parser.parse_args()
 
 
# multi-threading for RDataFrame
 
nthreads = os.cpu_count()//2
 
ROOT.ROOT.EnableImplicitMT(nthreads)
 
print('CPU available {}, using {}'.format(os.cpu_count(), nthreads))
 
 
os.makedirs(args.outdir, exist_ok=True)
 
rdf=read_data(args,-1) ## -1: optional argument for number of entries to read
 
df_mc,df_rec,df_fit=pre_proc(rdf,0) ## 0: optional argument for pid
 
dfc = df_mc.loc[df_fit["event"].values].reset_index()
 
dfm = df_mc
 
df = df_fit
 
 
_=performance_plot(df_fit,dfc,df_mc)
Loading