diff --git a/benchmarks/imaging_shower_ML/scripts/epcut_scan.py b/benchmarks/imaging_shower_ML/scripts/epcut_scan.py index f153d70751991e8348eff6b5078c7c19055c91bf..f9dd5ef1ba5a592a4f97eb2bc4b686d49cbc2a23 100644 --- a/benchmarks/imaging_shower_ML/scripts/epcut_scan.py +++ b/benchmarks/imaging_shower_ML/scripts/epcut_scan.py @@ -16,6 +16,7 @@ import numpy as np import pandas as pd import matplotlib.pyplot as plt import matplotlib.backends.backend_pdf as mpdf +from matplotlib.ticker import MultipleLocator from collections import OrderedDict from utils import flatten_collection, imcal_info @@ -169,22 +170,25 @@ if __name__ == '__main__': # print(dfr) - # study the epcut performance with binned data - best = { - 'layer': int(nlayers + 1), - 'ep_cut': 0., - 'el_eff': 0., - 'pi_rej': 0., - } - ep_dict = OrderedDict([ - ('info', { - 'nsamples': int(ntotal), - 'targeted_efficiency': args.eff, - 'tracking_resolution': args.res - }), - ('best', best), - ]) + # prepare output container + best = OrderedDict( + layer=int(nlayers + 1), + ep_cut=0., + el_eff=0., + pi_rej=0., + ) + ep_dict = OrderedDict( + info= OrderedDict( + nsamples=int(ntotal), + targeted_efficiency=args.eff, + tracking_resolution=args.res + ), + best=best, + ) + + # scan layers pdf = mpdf.PdfPages(os.path.join(args.outdir, '{}_layers.pdf'.format(args.ntag))) + box_props = dict(boxstyle='round', facecolor='white', alpha=0.5) for i in np.arange(nlayers): elvals, pivals = el_hist[i], pi_hist[i] # cut position @@ -214,11 +218,52 @@ if __name__ == '__main__': ax.set_xlabel('$E/p$', fontsize=20) ax.set_ylabel('Counts', fontsize=20) ax.axvline(x=ep_cut, color='k', ls='--', lw=2) - props = dict(boxstyle='round', facecolor='white', alpha=0.5) ax.text(0.5, 0.97, 'Layer $\leq${:d}\n$\epsilon_e={:.2f}$%\n$R_{{\pi}}={:.2f}$%'.format(i + 1, eff*100., rej*100.), - transform=ax.transAxes, fontsize=20, va='top', ha='center', bbox=props) + transform=ax.transAxes, fontsize=20, va='top', ha='center', bbox=box_props) pdf.savefig(fig) + plt.close(fig) + + # a plot for the cut scan + cuts = [ep_dict.get('layer_{:d}'.format(i + 1)) for i in np.arange(nlayers)] + cuts_pos = np.array([c.get('ep_cut') for c in cuts]) + cuts_rej = np.array([c.get('pi_rej') for c in cuts]) + + # estimated uncertainty (binomial) + nerr = np.sqrt(cuts_rej*(1. - cuts_rej)*ntotal) # npq + # leftover pions + nres = ntotal * (1. - cuts_rej) + nres_lo = np.clip(nres - nerr, 1, ntotal) + nres_hi = np.clip(nres + nerr, 1, ntotal) + + # rejection power + rej_pow = ntotal/nres + rej_err = (rej_pow - ntotal/nres_hi, ntotal/nres_lo - rej_pow) + + fig, ax1 = plt.subplots(figsize=(8, 8)) + ax2 = ax1.twinx() + ax2.set_yscale('log') + + ax1.plot(np.arange(nlayers) + 1, cuts_pos, ls='-', color=colors[0]) + ax2.errorbar(np.arange(nlayers) + 1, rej_pow, yerr=rej_err, + fmt='o', capsize=3, color=colors[1]) + + ax1.set_xlabel('Layer Number', fontsize=20) + ax1.set_ylabel('Cut Position (E/p)', color=colors[0], fontsize=20) + + ax2.grid(axis='both', which='both', ls=':') + ax2.xaxis.set_major_locator(MultipleLocator(5)) + ax2.xaxis.set_minor_locator(MultipleLocator(1)) + ax2.set_ylabel('$\pi^-$ Rejection Power', color=colors[1], fontsize=20) + ax1.tick_params(labelsize=20) + ax2.tick_params(labelsize=20) + ax1.set_title('2D Scan of E/p Cut', fontsize=22) + ax1.text(0.5, 0.03, '$\epsilon_e \geq$ {:.2f}%'.format(args.eff*100.), + transform=ax1.transAxes, fontsize=20, va='bottom', ha='center', bbox=box_props) + fig.subplots_adjust(left=0.15, right=0.85) + + pdf.savefig(fig) + plt.close(fig) pdf.close() # save cut position and performance diff --git a/benchmarks/imaging_shower_ML/scripts/ml_data_preprocess.py b/benchmarks/imaging_shower_ML/scripts/ml_data_preprocess.py index 88fdf6427e0f0d3aa74eb672f7152237f31d2fa8..34c5c5de2e17077cb29a10cd4764fe0476de1f1f 100644 --- a/benchmarks/imaging_shower_ML/scripts/ml_data_preprocess.py +++ b/benchmarks/imaging_shower_ML/scripts/ml_data_preprocess.py @@ -1,5 +1,9 @@ ''' - A script to prepare datasets for ML training + A script to prepare datasets for ML training. + It may also apply E/p cut to clean-up the samples + + Chao Peng (ANL) + 2022/11/13 ''' import ROOT import os diff --git a/benchmarks/imaging_shower_ML/scripts/utils.py b/benchmarks/imaging_shower_ML/scripts/utils.py index 2ddbc61608d0f494ee3d8209f7362f5669418897..8403c2330b0ac7f7160a66fd4a7420b6f8584b85 100644 --- a/benchmarks/imaging_shower_ML/scripts/utils.py +++ b/benchmarks/imaging_shower_ML/scripts/utils.py @@ -1,3 +1,9 @@ +''' + functions and objects that are commonly used by all scripts within this benchmark + + Chao Peng (ANL) + 2022/11/13 +''' import ROOT import pandas as pd import numpy as np