Skip to content
Snippets Groups Projects

add new tracking performance plots with gaus fit

Open Shujie Li requested to merge update_plot_fit into master
1 unresolved thread
2 files
+ 424
0
Compare changes
  • Side-by-side
  • Inline
Files
2
import os
import ROOT
import pandas as pd
import numpy as np
import argparse
from matplotlib import pyplot as plt
import matplotlib.ticker as ticker
# pd.set_option(20, None, 'display.max_columns', None)
pd.options.display.max_rows = 40
pd.options.display.max_columns = 100
# pd.set_option('display.max_rows', None)#, 'display.max_columns', None)
import matplotlib.cm as cm
import matplotlib as mpl
import matplotlib.pylab as plt
import awkward as ak
import uproot as ur
# import uproot3
print('Uproot version: ' + ur.__version__)
deg2rad = np.pi/180.0
plt.rcParams['figure.figsize'] = [12.0, 8.0]
# plt.rcParams['figure.dpi'] = 80
SMALL_SIZE = 13
MEDIUM_SIZE = 14
BIGGER_SIZE = 16
plt.rc('font', size=SMALL_SIZE) # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE) # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE) # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE) # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE) # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE) # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE) # fontsize of the figure title]
# class Inputs:
# def __init__(self, worksim, rec_file):
# self.worksim = worksim
# self.rec_file = worksim+"/"+rec_file
# self.outdir = self.worksim
# macros = 'rootlogon.C'
# nametag = "tracking"
# mc = "mcparticles"
# fit = "outputTrackParameters"
# rec = "ReconstructedParticles"
# append kin variables to dataframe, mcparticles "ps", reconstructedparticles "p"
PARTICLES = pd.DataFrame({
"pion0": (111, 0, 0.1349766), # pi0
"pion+": (211, 1, 0.13957018), # pi+
"pion-": (-211,-1,0.13957018), # pi-
"kaon0L": (130, 0, 0.497648), # K0L
"kaon0S": (310, 0, 0.497648), # K0S
"kaon0": (311, 0, 0.497648), # K0
"kaon+": (321, 1, 0.493677), # K+
"kaon-": (-321, -1, 0.493677), # K-
"proton": (2212, 1, 0.938272), # proton
"neutron": (2112, 0, 0.939565), # neutron
"electron": (11, -1, 0.51099895e-3), # electron
"positron": (-11, 1, 0.51099895e-3),# positron
"photon": (22, 0, 0), # photon
},index=["pid","charge","mass"]).transpose()
def add_kin(data,prefix="p"):
get_4vecs = np.vectorize(lambda x, y, z, m: ROOT.Math.PxPyPzMVector(x, y, z, m))
fourvecs = get_4vecs(*data[[prefix+'.x', prefix+'.y', prefix+'.z', 'mass']].values.T)
data.loc[:, 'p'] = [v.P() for v in fourvecs]
data.loc[:, 'theta'] = [v.Theta() for v in fourvecs]
data.loc[:, 'phi'] = [v.Phi() for v in fourvecs]
data.loc[:, 'eta'] = [v.Eta() for v in fourvecs]
data.loc[:, 'pt'] = [v.Pt() for v in fourvecs]
return data
#1 d histogram and gaus fit
from lmfit.models import GaussianModel
def hist_gaus(dataset,ax, bins=100,klog=0,header=None):
## select data in range if bins is provided as an array
if not np.isscalar(bins):
c1 = dataset<=bins[-1]
c2 = dataset>=bins[0]
dataset=dataset[c1&c2]
## 1st fit
n, bins, patches = ax.hist(dataset, bins,density=False, facecolor='b', alpha=0.3)
xx = bins[0:-1]+(bins[1]-bins[0])/2.0
ymax = np.max(n)
std = np.std(dataset)
mean = np.mean(dataset)
# # print(mean,std)
c1 = xx<=(mean+2*std)
c2 = xx>=(mean-2*std)
cond = c1&c2
while len(n[cond])<len(bins)/4.0:
ax.cla()
diff = (bins[-1]-bins[0])/2.0/2.0
n, bins, patches = ax.hist(dataset, np.linspace(bins[0]+diff,bins[-1]-diff,len(bins)),density=False, facecolor='b', alpha=0.3)
xx = bins[0:-1]+(bins[1]-bins[0])/2.0
c1 = xx<=(mean+2*std)
c2 = xx>=(mean-2*std)
cond = c1&c2
model = GaussianModel()
# create parameters with initial guesses:
params = model.make_params(center=np.median(dataset), amplitude=np.max(n), sigma=np.std(dataset))
result = model.fit(n, params, x=xx)
# 2nd fit
std = result.params['sigma'].value
c1 = xx<=(mean+2*std)
c2 = xx>=(mean-2*std)
cond = c1&c2
model = GaussianModel()
params = model.make_params(center=np.median(dataset), amplitude=np.max(n), sigma=np.std(dataset))
result = model.fit(n[cond], params, x=xx[cond])
print(result.success)
# print(result.fit_report())
# print (result.best_values)
# plt.plot(xx, result.best_fit, 'r-', label='best fit')
if len(result.best_fit)>0:
ax.plot(xx[cond], result.best_fit, 'r-', label='sigma=%g,err=%g' %(result.params['sigma'].value,result.params['sigma'].stderr))
ax.legend(title=header, frameon=False,loc='upper left')
if klog:
ax.set_yscale('log')
ax.set_ylim(1,ymax*10)
else:
ax.set_ylim(0,ymax*1.3)
return result.params['sigma'].value,result.params['sigma'].stderr
def read_data(args, rows=-1):
fname = args.rec_file
rdf_rec = ROOT.RDataFrame('events',fname)
if rows>0:
rdf_rec=rdf_rec.Range(0,rows)
print("read in ",rdf_rec.Count().GetValue(), "rows")
return rdf_rec
# read from RDataFrame and flatten a given collection, return pandas dataframe
def flatten_collection(rdf, collection, cols=None):
if not cols:
cols = [str(c) for c in rdf.GetColumnNames() if str(c).startswith('{}.'.format(collection))]
else:
cols = ['{}.{}'.format(collection, c) for c in cols]
if not cols:
print('cannot find any branch under collection {}'.format(collection))
return pd.DataFrame()
for cname in cols:
if "[" in cname:
cols.remove(cname) ## can not convert array to python. drop for now
# print(cols)
data = rdf.AsNumpy(cols)
# flatten the data, add an event id to identify clusters from different events
evns = []
for i, vec in enumerate(data[cols[0]]):
evns += [i]*vec.size()
for n, vals in data.items():
# make sure ints are not converted to floats
typename = vals[0].__class__.__name__.lower()
dtype = np.int64 if 'int' in typename or 'long' in typename else np.float64
# type safe creation
data[n] = np.asarray([v for vec in vals for v in vec], dtype=dtype)
# build data frame
dfp = pd.DataFrame({c: pd.Series(v) for c, v in data.items()})
dfp.loc[:, 'event'] = evns
dfp.rename(columns={c: c.replace(collection + '.', '') for c in dfp.columns}, inplace=True)
return dfp
def pre_proc(rdf_rec,pid=0):
df_fit = flatten_collection(rdf_rec,args.fit)
df_fit = df_fit.groupby('event').first().reset_index()
df_fit["p"] = 1./df_fit["qOverP"]
df_fit["eta"]= -np.log(np.tan(df_fit['direction.theta'].values/2.))
df_fit["pt"] = df_fit["p"]*np.sin(df_fit["direction.theta"])
df_fit["theta"] = df_fit["direction.theta"]
df_fit["phi"] = df_fit["direction.phi"]
# if len(df_fit)<1:
print (args.fit,df_fit.columns,len(df_fit))
df_rec = flatten_collection(rdf_rec, args.rec)
print (args.rec,df_rec.columns,len(df_rec))
if len(df_rec)>0:
# df_rec=df_rec[df_rec.pid==pid]
df_rec = df_rec.groupby('event').first().reset_index()
# df_rec = df_rec.reset_index()
# if "mass" not in df_rec.columns:
if pid!=0:
# df_rec["mass"] = [PARTICLES.loc[PARTICLES.pid==pid]['mass'].values for pid in df_rec.pid]
mass = PARTICLES.loc[PARTICLES.pid==pid]['mass'].values[0]
df_rec["mass"] = mass*np.ones(len(df_rec))
df_rec = add_kin(df_rec,"p")
df_mc = flatten_collection(rdf_rec,args.mc)
if pid!=0:
df_mc = df_mc[df_mc.pdgID==pid]
df_mc = df_mc[(df_mc.genStatus==1)].reset_index(drop=False)
# df_mc = df_mc.set_index('event').loc[df['event'].values]
print (args.mc,df_mc.columns,len(df_mc))
df_mc = add_kin(df_mc,"ps")
return df_mc, df_rec,df_fit
## --------tracking performance plots-------------------
def performance_plot(df,dfc,dfm,dirname=""):
dp_lim=5 #%
th_lim=0.003 #rad
ph_lim=0.003
fig, axs = plt.subplots(3, 2, figsize=(20, 14))
for ax in axs.flat:
ax.tick_params(direction='in', which='both')#, labelsize=20)
ax.grid(linestyle=':')
ax.set_axisbelow(True)
# ---------------tracking efficiency v.s. eta-----------------
ax = axs.flat[0]
eta_bins = np.linspace(-4, 4, 21)
sim_eta, _ = np.histogram(dfm['eta'].values, bins=eta_bins)
## ?? outputTrackParameters.direction.phi = mcparticles2.theta???
rec_eta, _ = np.histogram(df['eta'], bins=eta_bins)
# rec_eta, _ = np.histogram(-np.log(np.tan(df['direction.theta'].values/2.)), bins=eta_bins)
track_eff_total = np.sum(rec_eta)/np.sum(sim_eta)
eta_centers = (eta_bins[1:] + eta_bins[:-1])/2.
eta_binsize = np.mean(np.diff(eta_centers))
# cut off not simulated bins
mask = sim_eta > 0
sim_eta = sim_eta[mask]
rec_eta = rec_eta[mask]
eta_centers = eta_centers[mask]
track_eff = np.array(rec_eta)/np.array(sim_eta)
# binary distribution, pq*sqrt(N)
# TODO check the errors
eff = np.mean(track_eff)
track_err = eff*(1. - eff)*np.reciprocal(np.sqrt(sim_eta))
# rec_err = eff*(1. - eff)*np.sqrt(rec_eta)
# track_eff_lower = track_eff - np.maximum(np.zeros(shape=rec_eta.shape), (rec_eta - rec_err)/sim_eta)
# track_eff_upper = np.minimum(np.ones(shape=rec_eta.shape), (rec_eta + rec_err)/sim_eta) - track_eff
track_eff_lower = track_eff - np.maximum(np.zeros(shape=rec_eta.shape), track_eff - track_err)
track_eff_upper = np.minimum(np.ones(shape=rec_eta.shape), track_eff + track_err) - track_eff
ax.errorbar(eta_centers, track_eff, xerr=eta_binsize/2., yerr=[track_eff_lower, track_eff_upper],
fmt='o', capsize=3)
ax.set_ylim(0., 1.1)
ax.set_xlim(-4.5, 4.5)
ax.set_ylabel('Tracking Efficiency')#, fontsize=20)
ax.set_xlabel(r'$\eta$')#, fontsize=20)
ax.text(-4,1,"recon/generated events= %d / %d =%.2f" %(len(df),len(dfm),len(df)/len(dfm)))
# ---------------theta distribution------------
ax = axs.flat[1]
sim_th_deg = dfm.groupby('event')['theta'].first().values*180/np.pi
rec_th_deg = df.groupby('event')['theta'].first().values*180/np.pi
hval, hbins, _ = ax.hist(sim_th_deg, bins=np.linspace(-0, 180,61),
ec='k',alpha=0.3,label="Generated")
hval, hbins, _ = ax.hist(rec_th_deg, bins=np.linspace(-0,180, 61),
ec='k',alpha=0.3, label="Reconstructed")
nbins = hbins.shape[0] - 1
# ax.set_ylabel('Counts / {:d} Bins'.format(nbins))#, fontsize=20)
ax.set_xlabel(r'$\theta$(degree)')#, fontsize=20)
# ----------momentum resolution--------------
ax = axs.flat[2]
# rec_p = dfc['charge'].values/df['qOverP'].values
rec_p = df['p'].values
sim_p = dfc['p'].values
dp_p = (rec_p - sim_p)/sim_p
# hval, hbins, _ = ax.hist(dp_p*100., bins=np.linspace(-5, 5, 101),
# weights=np.repeat(1./float(rec_p.shape[0]), rec_p.shape[0]), ec='k')
nbins = 100
sig_p,err_p=hist_gaus(dp_p*100.,ax,np.linspace(-dp_lim, dp_lim, nbins+1),klog=0,header=None)
# ax.set_ylabel('Normalized Density / {:d} Bins'.format(nbins))#, fontsize=20)
ax.set_xlabel(r'$\delta p$ (%)')#, fontsize=20)
# ----------------pT resolution----------------------
ax = axs.flat[3]
rec_pt = df.groupby('event')['pt'].first().values
sim_pt = dfc.groupby('event')['pt'].first().values
dp_pt = (rec_pt - sim_pt)/sim_pt*100
# print(dfm.head(),dfc.head())
# print(np.vstack([rec_th, sim_th]).T)
sig_pt,err_pt=hist_gaus(dp_pt,ax,np.linspace(-dp_lim, dp_lim, nbins+1),klog=0,header=None)
# ax.set_ylabel('Normalized Density / {:d} Bins'.format(nbins))#, fontsize=20)
ax.set_xlabel(r'$\delta p_T$(%)')#, fontsize=20)
# ----------------theta resolution----------------------
nbins = 100
ax = axs.flat[4]
rec_th = df.groupby('event')['theta'].first().values
sim_th = dfc.groupby('event')['theta'].first().values
# print(dfm.head(),dfc.head())
dth_th = (rec_th - sim_th)
# print(np.vstack([rec_th, sim_th]).T)
sig_th,err_th=hist_gaus(dth_th,ax,np.linspace(-th_lim, th_lim, nbins+1),klog=0,header=None)
# ax.set_ylabel('Normalized Density / {:d} Bins'.format(nbins))#, fontsize=20)
ax.set_xlabel(r'$d\theta$ (rad)')#, fontsize=20)
# -----------------phi resolution----------------------
ax = axs.flat[5]
rec_th = df.groupby('event')['phi'].first().values
sim_th = dfc.groupby('event')['phi'].first().values
dth_th = (rec_th - sim_th)
# print(np.vstack([rec_th, sim_th]).T)
# hval, hbins, _ = ax.hist(dth_th, bins=np.linspace(-0.002, 0.002, 101),
# weights=np.repeat(1./float(rec_th.shape[0]), rec_th.shape[0]), ec='k')
sig_ph,err_ph=hist_gaus(dth_th,ax,np.linspace(-ph_lim, ph_lim, nbins+1),klog=0,header=None)
# ax.set_ylabel('Normalized Counts / {:d} Bins'.format(nbins))#, fontsize=20)
ax.set_xlabel(r'$d\phi$ (rad)')#, fontsize=20)
    • Comment on lines +336 to +349

      We had issues with r strings for latex in matplotlib axis labels, so this may fail and need to be replaced with r'd$\phi$ (rad)' instead.

Please register or sign in to reply
# eta distribution
# ax = axs.flat[4]
# sim_eta = dfm.groupby('event')['eta'].first().values
# rec_eta = -np.log(np.tan(df.groupby('event')['direction.theta'].first().values/2.))
# hval, hbins, _ = ax.hist(sim_eta, bins=np.linspace(-4, 4, 41), ec='k',alpha=0.3,label="Generated")
# hval, hbins, _ = ax.hist(rec_eta, bins=np.linspace(-4, 4, 41), ec='k',alpha=0.3, label="Reconstructed")
# nbins = hbins.shape[0] - 1
# ax.set_ylabel('Counts / {:d} Bins'.format(nbins), fontsize=20)
# ax.set_xlabel(r'$\eta$', fontsize=20)
# ax.legend()
# -------phi distribution
# ax = axs.flat[6]
# sim_th_deg = dfm.groupby('event')['phi'].first().values*180/np.pi
# rec_th_deg = df.groupby('event')['direction.phi'].first().values*180/np.pi
# hval, hbins, _ = ax.hist(sim_th_deg, bins=np.linspace(-180, 180,61),
# ec='k',alpha=0.3,label="Generated")
# hval, hbins, _ = ax.hist(rec_th_deg, bins=np.linspace(-180,180, 61),
# ec='k',alpha=0.3, label="Reconstructed")
# nbins = hbins.shape[0] - 1
# ax.set_ylabel('Counts / {:d} Bins'.format(nbins), fontsize=20)
# ax.set_xlabel(r'$\phi$(degree)', fontsize=20)
fig.text(0.5, 0.95, 'Tracking Benchmark (Truth Init.)', fontsize=18, ha='center')
fig.savefig(os.path.join(args.outdir, '{}_performance_fit.png'.format(args.nametag)))
return track_eff_total,sig_p,err_p,sig_pt,err_pt,sig_th,err_th,sig_ph,err_ph
def eta_theta(xx, inverse=0):
if inverse==1:
return np.arctan((np.e)**(-xx))*2
else:
return -np.log(np.tan(xx/2.))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('rec_file', help='Path to reconstruction output file.')
parser.add_argument('-o', dest='outdir', default='.', help='Output directory.')
parser.add_argument('-t', '--nametag', type=str, default='tracking', help='Name tag for output files.')
parser.add_argument('-m', '--macros', dest='macros', default='rootlogon.C',
help='Macro files to be loaded by root, separated by \",\".')
parser.add_argument('--mc-collection', dest='mc', default='mcparticles',
help='Collection name of MC particles truth info.')
parser.add_argument('--rec-collection', dest='rec', default='ReconstructedParticles',
help='Collection name of reconstructed particles')
parser.add_argument('--tracking-collection', dest='fit', default='outputTrackParameters', help='Collection name of track info from fitting')
args = parser.parse_args()
# multi-threading for RDataFrame
nthreads = os.cpu_count()//2
ROOT.ROOT.EnableImplicitMT(nthreads)
print('CPU available {}, using {}'.format(os.cpu_count(), nthreads))
os.makedirs(args.outdir, exist_ok=True)
rdf=read_data(args,-1) ## -1: optional argument for number of entries to read
df_mc,df_rec,df_fit=pre_proc(rdf,0) ## 0: optional argument for pid
dfc = df_mc.loc[df_fit["event"].values].reset_index()
dfm = df_mc
df = df_fit
_=performance_plot(df_fit,dfc,df_mc)
Loading