From e3b0d5f2d3170b6898c363d382e859e87bb8df07 Mon Sep 17 00:00:00 2001
From: Jiajun Huang <jhuan328@ucr.edu>
Date: Wed, 31 Jul 2024 15:55:18 -0700
Subject: [PATCH] Modified zdc lyso analysis code.

---
 benchmarks/zdc_lyso/analysis/analysis.py | 164 +++++++++++++++++------
 1 file changed, 125 insertions(+), 39 deletions(-)

diff --git a/benchmarks/zdc_lyso/analysis/analysis.py b/benchmarks/zdc_lyso/analysis/analysis.py
index 04bd233f..eace6112 100644
--- a/benchmarks/zdc_lyso/analysis/analysis.py
+++ b/benchmarks/zdc_lyso/analysis/analysis.py
@@ -3,9 +3,10 @@ import matplotlib.pyplot as plt
 import mplhep as hep
 import uproot
 import pandas as pd
-import os
 from scipy.optimize import curve_fit
 from matplotlib.backends.backend_pdf import PdfPages
+import os
+import awkward as ak
 
 plt.figure()
 hep.set_style(hep.style.CMS)
@@ -22,18 +23,21 @@ def rotateY(xdata, zdata, angle):
     return rotatedx, rotatedz
     
 Energy = [0.005, 0.01, 0.05, 0.1, 0.5, 1.0]
-q0 = [5, 10, 40, 90, 400, 700]
-q1 = [0.5, 0.5, 0.9, 5, 10, 20]
+
 
 df = pd.DataFrame({})
 for eng in Energy:
     tree = uproot.open(f'sim_output/zdc_lyso/{os.environ["DETECTOR_CONFIG"]}_gamma_{eng}GeV_theta_0deg_thru_0.3deg.eicrecon.tree.edm4eic.root')['events']
-    ecal_reco_energy = tree['EcalFarForwardZDCClusters/EcalFarForwardZDCClusters.energy'].array()
-    #hcal_reco_energy = tree['HcalFarForwardZDCClusters/HcalFarForwardZDCClusters.energy'].array()
+    ecal_reco_energy = list(map(sum, tree['EcalFarForwardZDCClusters/EcalFarForwardZDCClusters.energy'].array()))
+    hcal_reco_energy = list(map(sum, tree['HcalFarForwardZDCClusters/HcalFarForwardZDCClusters.energy'].array()))
+    ecal_rec_energy = list(map(sum, tree['EcalFarForwardZDCRecHits/EcalFarForwardZDCRecHits.energy'].array()))
+    hcal_rec_energy = list(map(sum, tree['HcalFarForwardZDCRecHits/HcalFarForwardZDCRecHits.energy'].array()))
+    ecal_reco_clusters = [len(row) if len(row)>=1 else 0 for row in tree['EcalFarForwardZDCClusters/EcalFarForwardZDCClusters.nhits'].array()]
+    ecal_reco_nhits = [row[0] if len(row)>=1 else 0 for row in tree['EcalFarForwardZDCClusters/EcalFarForwardZDCClusters.nhits'].array()]
     
     tree = uproot.open(f'sim_output/zdc_lyso/{os.environ["DETECTOR_CONFIG"]}_gamma_{eng}GeV_theta_0deg_thru_0.3deg.edm4hep.root')['events']
-    ecal_sim_energy = tree['EcalFarForwardZDCHits/EcalFarForwardZDCHits.energy'].array()
-    hcal_sim_energy = tree['HcalFarForwardZDCHits/HcalFarForwardZDCHits.energy'].array()
+    ecal_sim_energy = list(map(sum, tree['EcalFarForwardZDCHits/EcalFarForwardZDCHits.energy'].array()))
+    hcal_sim_energy = list(map(sum, tree['HcalFarForwardZDCHits/HcalFarForwardZDCHits.energy'].array()))
 
     par_x = tree['MCParticles/MCParticles.momentum.x'].array()[:,2]
     par_y = tree['MCParticles/MCParticles.momentum.y'].array()[:,2]
@@ -41,66 +45,103 @@ for eng in Energy:
     
     eng = int(eng*1000)
 
-    ecal_reco_energy = pd.DataFrame({f'ecal_reco_energy_{eng}': np.array(ecal_reco_energy.tolist(), dtype=object)})
-    #hcal_reco_energy = pd.DataFrame({f'hcal_reco_energy_{eng}': np.array(hcal_reco_energy.tolist(), dtype=object)})
-    ecal_sim_energy = pd.DataFrame({f'ecal_sim_energy_{eng}': np.array(ecal_sim_energy.tolist(), dtype=object)})
-    hcal_sim_energy = pd.DataFrame({f'hcal_sim_energy_{eng}': np.array(hcal_sim_energy.tolist(), dtype=object)})
-
+    ecal_reco_energy = pd.DataFrame({f'ecal_reco_energy_{eng}': np.array(ecal_reco_energy, dtype=object)})
+    hcal_reco_energy = pd.DataFrame({f'hcal_reco_energy_{eng}': np.array(hcal_reco_energy, dtype=object)})
+    ecal_rec_energy = pd.DataFrame({f'ecal_rec_energy_{eng}': np.array(ecal_rec_energy, dtype=object)})
+    hcal_rec_energy = pd.DataFrame({f'hcal_rec_energy_{eng}': np.array(hcal_rec_energy, dtype=object)})
+    ecal_sim_energy = pd.DataFrame({f'ecal_sim_energy_{eng}': np.array(ecal_sim_energy, dtype=object)})
+    hcal_sim_energy = pd.DataFrame({f'hcal_sim_energy_{eng}': np.array(hcal_sim_energy, dtype=object)})
+    ecal_reco_nhits = pd.DataFrame({f'ecal_reco_nhits_{eng}': np.array(ecal_reco_nhits, dtype=object)})
+    ecal_reco_clusters = pd.DataFrame({f'ecal_reco_clusters_{eng}': np.array(ecal_reco_clusters, dtype=object)})
     par_x = pd.DataFrame({f'par_x_{eng}': np.array(par_x.tolist(), dtype=object)})
     par_y = pd.DataFrame({f'par_y_{eng}': np.array(par_y.tolist(), dtype=object)})
     par_z = pd.DataFrame({f'par_z_{eng}': np.array(par_z.tolist(), dtype=object)})
 
-    df = pd.concat([df,ecal_reco_energy,ecal_sim_energy,hcal_sim_energy,par_x,par_y,par_z],axis=1)
+
+    df = pd.concat([df,ecal_reco_energy,ecal_rec_energy,ecal_sim_energy,hcal_reco_energy,hcal_rec_energy,hcal_sim_energy,ecal_reco_clusters,ecal_reco_nhits,par_x,par_y,par_z],axis=1)
+
 
 mu = []
 sigma = []
-resolution = []
-
 fig1, ax = plt.subplots(3,2,figsize=(20,10))
-#fig1.suptitle('ZDC ECal Cluster Energy Reconstruction')
+fig1.suptitle('ZDC ECal Cluster Energy Reconstruction')
 
 plt.tight_layout()
 for i in range(6):
+    x = df[f'par_x_{eng}'].astype(float).to_numpy()
+    y = df[f'par_y_{eng}'].astype(float).to_numpy()
+    z = df[f'par_z_{eng}'].astype(float).to_numpy()
+    x, z = rotateY(x,z, 0.025)
+    theta = np.arccos(z/np.sqrt((x**2+y**2+z**2)))*1000
+    condition = theta <= 3.5
+    
     plt.sca(ax[i%3,i//3])
     eng = int(Energy[i]*1000)
     plt.title(f'Gamma Energy: {eng} MeV')
-    temp = np.array([sum(item) if (item != 0) else 0 for item in df[f'ecal_reco_energy_{eng}']])
-    hist, x = np.histogram(np.array(temp)*1000,bins=30)
+    temp = np.array(df[f'ecal_reco_energy_{eng}'].astype(float).to_numpy()[condition])*1000
+    hist, x = np.histogram(temp,bins=np.linspace(min(temp),max(temp)+np.std(abs(temp)),2*int(np.sqrt(len(temp)))))
     x = x[1:]/2 + x[:-1]/2
-    plt.errorbar(x,hist,yerr=np.sqrt(hist),fmt='-o')
-    temp = np.array([item[0] for item in df[f'ecal_reco_energy_{eng}'] if item])
-    hist, x = np.histogram(np.array(temp)*1000,bins=30)
+    plt.errorbar(x,hist,yerr=np.sqrt(hist),fmt='-o',label='Cluster')
+    coeff, covar = curve_fit(gaussian,x[1:],hist[1:],p0=(max(hist[x>=np.std(abs(temp))]),np.mean(temp[temp!=0]),np.std(temp[temp!=0])))
+    #plt.plot(np.linspace(coeff[1]-3*coeff[2],coeff[1]+3*coeff[2],50),gaussian(np.linspace(coeff[1]-3*coeff[2],coeff[1]+3*coeff[2],50),*coeff))
+    mu.append(coeff[1])
+    sigma.append(coeff[2])
+    
+    temp = np.array(df[f'ecal_rec_energy_{eng}'].astype(float).to_numpy()[condition])*1000
+    hist, x = np.histogram(temp,bins=np.linspace(min(temp),max(temp)+np.std(abs(temp)),2*int(np.sqrt(len(temp)))))
     x = x[1:]/2 + x[:-1]/2
-    coeff, covar = curve_fit(gaussian,x,hist,p0=(200,q0[i],q1[i]),maxfev = 80000)
-    plt.plot(np.linspace(coeff[1]-5*coeff[2],coeff[1]+5*coeff[2],50),gaussian(np.linspace(coeff[1]-5*coeff[2],coeff[1]+5*coeff[2],50),*coeff)
-            ,label = f'$\mu$ = {coeff[1]:.3f} $\pm$ {covar[1][1]:.3f}\n$\sigma$ = {np.abs(coeff[2]):.3f} $\pm$ {covar[2][2]:.3f}\nResolution = {np.abs(coeff[2])*100/coeff[1]:.2f}%')
+    plt.errorbar(x,hist,yerr=np.sqrt(hist),fmt='-o',label='Digitization')
+    coeff, covar = curve_fit(gaussian,x[1:],hist[1:],p0=(max(hist[x>=np.std(abs(temp))]),np.mean(temp[temp!=0]),np.std(temp[temp!=0])))
+    #plt.plot(np.linspace(coeff[1]-3*coeff[2],coeff[1]+3*coeff[2],50),gaussian(np.linspace(coeff[1]-3*coeff[2],coeff[1]+3*coeff[2],50),*coeff))
+    mu.append(coeff[1])
+    sigma.append(coeff[2])
     
-    plt.xlabel('Energy (MeV)')
-    plt.legend()
+    temp = np.array(df[f'ecal_sim_energy_{eng}'].astype(float).to_numpy()[condition])*1000
+    hist, x = np.histogram(temp,bins=np.linspace(min(temp),max(temp)+np.std(abs(temp)),2*int(np.sqrt(len(temp)))))
+    x = x[1:]/2 + x[:-1]/2
+    plt.errorbar(x,hist,yerr=np.sqrt(hist),fmt='-o',label='Simulation')
+    coeff, covar = curve_fit(gaussian,x[1:],hist[1:],p0=(max(hist[x>=np.std(abs(temp))]),np.mean(temp[temp!=0]),np.std(temp[temp!=0])))
+    #plt.plot(np.linspace(coeff[1]-3*coeff[2],coeff[1]+3*coeff[2],50),gaussian(np.linspace(coeff[1]-3*coeff[2],coeff[1]+3*coeff[2],50),*coeff))
     mu.append(coeff[1])
     sigma.append(coeff[2])
-    resolution.append(np.abs(coeff[2])*100/coeff[1])
+    
+    plt.xlabel('Energy (MeV)')
+    plt.legend()
+    
 #plt.savefig('results/Energy_reconstruction_cluster.pdf')
-#plt.show()
+
+mu = np.array(mu)
+sigma = np.array(sigma)
+
+plt.show()
 
 fig2, (ax1,ax2) = plt.subplots(2,1,figsize=(15,10),sharex=True)
 
 plt.tight_layout()
 # Plot data on primary axis
-ax1.scatter(np.array(Energy)*1000, mu, c='b')
+ax1.scatter(np.array(Energy)*1000, mu[::3], label='cluster')
+ax1.scatter(np.array(Energy)*1000, mu[1::3], label='digitization')
+ax1.scatter(np.array(Energy)*1000, mu[2::3], label='simulation')
+
 ax1.plot([4.5,1000],[4.5,1000],c='black',label='x=y')
 ax1.set_ylabel('Reconstructed Energy (MeV)')
 ax1.set_yscale('log')
 ax1.legend()
 ax1.set_title('ECal Craterlake Cluster Energy Reconstruction')
 
-ax2.plot(np.array(Energy)*1000, resolution, c='r')
-ax2.scatter(np.array(Energy)*1000, resolution, c='r')
+ax2.errorbar(np.array(Energy)*1000, abs(sigma[::3]/mu[::3])*100, fmt='-o', label='cluster')
+ax2.errorbar(np.array(Energy)*1000, abs(sigma[1::3]/mu[1::3])*100, fmt='-o', label='digitization')
+ax2.errorbar(np.array(Energy)*1000, abs(sigma[2::3]/mu[2::3])*100, fmt='-o', label='simulation')
+
 ax2.set_ylabel('Resolution (%)')
 ax2.set_xlabel('Gamma Energy (MeV)')
 ax2.set_xscale('log')
+ax2.legend()
+
 #plt.savefig('results/Energy_resolution.pdf')
-#plt.show()
+
+plt.show()
+
 
 htower = []
 herr = []
@@ -127,14 +168,14 @@ for i in range(6):
     condition = theta <= 3.5
 
     plt.title(f'Gamma Energy: {eng} MeV')
-    energy1 = np.array([sum(item) if (item != 0) else 0 for item in df[f'hcal_sim_energy_{eng}']])#df.eval(f'hcal_sim_energy_{eng}').apply(lambda row: sum(row))
+    energy1 = df[f'hcal_sim_energy_{eng}'].astype(float).to_numpy()#df.eval(f'hcal_sim_energy_{eng}').apply(lambda row: sum(row))
     hist, x = np.histogram(energy1*1000,bins=np.logspace(0,3,200))
     x = x[1:]/2 + x[:-1]/2
     plt.plot(x,hist,marker='o',label="HCal")
     hhits.append(len(energy1[energy1!=0]))
     condition1 = energy1!=0
     hhits_cut.append(len(energy1[condition & condition1])/len(condition[condition==True]))
-    energy = np.array([sum(item) if (item != 0) else 0 for item in df[f'ecal_sim_energy_{eng}']])#df.eval(f'ecal_sim_energy_{eng}').apply(lambda row: sum(row))
+    energy = df[f'ecal_sim_energy_{eng}'].astype(float).to_numpy()#df.eval(f'ecal_sim_energy_{eng}').apply(lambda row: sum(row))
     hist, x = np.histogram(energy*1000,bins=np.logspace(0,3,200))
     x = x[1:]/2 + x[:-1]/2
     plt.plot(x,hist,marker='o',label="ECal")
@@ -147,13 +188,18 @@ for i in range(6):
     plt.xscale('log')
     plt.xlim(1e0,1e3)
 
+
+
+    
+
 plt.xlabel('Energy (MeV)')
+
 #plt.savefig('results/Energy_deposition.pdf')
-#plt.show()
+plt.show()
 
 fig4, ax = plt.subplots(2,1,sharex=True,gridspec_kw={'height_ratios': [2,1]})
 plt.sca(ax[0])
-plt.errorbar(np.array(Energy)*1000,np.array(hmean)*47.619+np.array(emean),label='HCal+ECal',fmt='-o')
+plt.errorbar(np.array(Energy)*1000,np.array(hmean)*47.619+np.array(emean),label='HCal/sf+ECal',fmt='-o')
 plt.errorbar(np.array(Energy)*1000,emean,label='ECal',fmt='-o')
 plt.legend()
 plt.yscale('log')
@@ -166,7 +212,7 @@ plt.ylabel('Fraction of energy\n deposited in Hcal (%)')
 plt.xlabel('Truth Energy (MeV)')
 #plt.savefig('results/Energy_ratio_and_Leakage.pdf')
 plt.tight_layout()
-#plt.show()
+plt.show()
 
 fig5 = plt.figure()
 plt.errorbar(np.array(Energy)*1000,np.array(hhits)/1000*100,label='HCal Hits',fmt='-o')
@@ -183,7 +229,46 @@ plt.xlabel('Simulation Truth Gamma Energy (MeV)')
 plt.ylabel('Fraction of Events with non-zero energy (%)')
 #plt.savefig('results/Hits.pdf')
 plt.xscale('log')
-#plt.show()
+plt.show()
+
+fig6, ax = plt.subplots(2,3,figsize=(20,10))
+fig6.suptitle('ZDC Clustering')
+fig6.tight_layout(pad=1.8)
+for i in range(6):
+    plt.sca(ax[i//3,i%3])
+    eng = int(Energy[i]*1000)
+    
+    x = df[f'par_x_{eng}'].astype(float).to_numpy()
+    y = df[f'par_y_{eng}'].astype(float).to_numpy()
+    z = df[f'par_z_{eng}'].astype(float).to_numpy()
+    x, z = rotateY(x,z, 0.025)
+    theta = np.arccos(z/np.sqrt((x**2+y**2+z**2)))*1000
+    condition = theta <= 3.5
+    
+    plt.hist(df[f'ecal_reco_clusters_{eng}'][condition],bins=np.linspace(0,5,6))
+    plt.xlabel('Number of Clusters')
+    plt.title(f'Gamma Energy: {eng} MeV')
+plt.show()
+
+fig7, ax = plt.subplots(2,3,figsize=(20,10))
+fig7.suptitle('ZDC Towering in Clusters')
+fig7.tight_layout(pad=1.8)
+for i in range(6):
+    plt.sca(ax[i//3,i%3])
+    eng = int(Energy[i]*1000)
+    
+    x = df[f'par_x_{eng}'].astype(float).to_numpy()
+    y = df[f'par_y_{eng}'].astype(float).to_numpy()
+    z = df[f'par_z_{eng}'].astype(float).to_numpy()
+    x, z = rotateY(x,z, 0.025)
+    theta = np.arccos(z/np.sqrt((x**2+y**2+z**2)))*1000
+    condition = theta <= 3.5
+    
+    plt.hist(df[f'ecal_reco_nhits_{eng}'][condition],bins=np.linspace(0,max(df[f'ecal_reco_nhits_{eng}'][condition]),max(df[f'ecal_reco_nhits_{eng}'][condition])+1))
+    plt.xlabel('Number of tower in Clusters')
+    plt.title(f'Gamma Energy: {eng} MeV')
+plt.show()
+
 
 #pdfs = ['results/Energy_reconstruction_cluster.pdf','results/Energy_resolution.pdf','results/Energy_deposition.pdf','results/Energy_ratio_and_Leakage.pdf','results/Hits.pdf']
 with PdfPages(f'results/{os.environ["DETECTOR_CONFIG"]}/zdc_lyso/plots.pdf') as pdf:
@@ -192,4 +277,5 @@ with PdfPages(f'results/{os.environ["DETECTOR_CONFIG"]}/zdc_lyso/plots.pdf') as
     pdf.savefig(fig3)
     pdf.savefig(fig4)
     pdf.savefig(fig5)
-
+    pdf.savefig(fig6)
+    pdf.savefig(fig7)
-- 
GitLab