Update IMCAL ML benchmarks

93478735 · Chao Peng · f0717c76 · 93478735 · 93478735 · 93478735
Commit 93478735 authored 2 years ago by Chao Peng
--- a/benchmarks/imaging_shower_ML/scripts/epcut_scan.py
+++ b/benchmarks/imaging_shower_ML/scripts/epcut_scan.py
@@ -16,6 +16,7 @@ import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
 import matplotlib.backends.backend_pdf as mpdf
+from matplotlib.ticker import MultipleLocator
 from collections import OrderedDict
 from utils import flatten_collection, imcal_info
@@ -169,22 +170,25 @@ if __name__ == '__main__':
        # print(dfr)
-    # study the epcut performance with binned data
+    # prepare output container
-    best = {
+    best = OrderedDict(
-        'layer': int(nlayers + 1),
+        layer=int(nlayers + 1),
-        'ep_cut': 0.,
+        ep_cut=0.,
-        'el_eff': 0.,
+        el_eff=0.,
-        'pi_rej': 0.,
+        pi_rej=0.,
-        }
+        )
-    ep_dict = OrderedDict([
+    ep_dict = OrderedDict(
-        ('info', {
+        info= OrderedDict(
-            'nsamples': int(ntotal),
+            nsamples=int(ntotal),
-            'targeted_efficiency': args.eff,
+            targeted_efficiency=args.eff,
-            'tracking_resolution': args.res
+            tracking_resolution=args.res
-            }),
+            ),
-        ('best', best),
+        best=best,
-        ])
+        )
+    # scan layers
    pdf = mpdf.PdfPages(os.path.join(args.outdir, '{}_layers.pdf'.format(args.ntag)))
+    box_props = dict(boxstyle='round', facecolor='white', alpha=0.5)
    for i in np.arange(nlayers):
        elvals, pivals = el_hist[i], pi_hist[i]
        # cut position
@@ -214,11 +218,52 @@ if __name__ == '__main__':
        ax.set_xlabel('$E/p$', fontsize=20)
        ax.set_ylabel('Counts', fontsize=20)
        ax.axvline(x=ep_cut, color='k', ls='--', lw=2)
-        props = dict(boxstyle='round', facecolor='white', alpha=0.5)
        ax.text(0.5, 0.97,
                'Layer $\leq${:d}\n$\epsilon_e={:.2f}$%\n$R_{{\pi}}={:.2f}$%'.format(i + 1, eff*100., rej*100.),
-                transform=ax.transAxes, fontsize=20, va='top', ha='center', bbox=props)
+                transform=ax.transAxes, fontsize=20, va='top', ha='center', bbox=box_props)
        pdf.savefig(fig)
+        plt.close(fig)
+    # a plot for the cut scan
+    cuts = [ep_dict.get('layer_{:d}'.format(i + 1)) for i in np.arange(nlayers)]
+    cuts_pos = np.array([c.get('ep_cut') for c in cuts])
+    cuts_rej = np.array([c.get('pi_rej') for c in cuts])
+    # estimated uncertainty (binomial)
+    nerr = np.sqrt(cuts_rej*(1. - cuts_rej)*ntotal) # npq
+    # leftover pions
+    nres = ntotal * (1. - cuts_rej)
+    nres_lo = np.clip(nres - nerr, 1, ntotal)
+    nres_hi = np.clip(nres + nerr, 1, ntotal)
+    # rejection power
+    rej_pow = ntotal/nres
+    rej_err = (rej_pow - ntotal/nres_hi, ntotal/nres_lo - rej_pow)
+    fig, ax1 = plt.subplots(figsize=(8, 8))
+    ax2 = ax1.twinx()
+    ax2.set_yscale('log')
+    ax1.plot(np.arange(nlayers) + 1, cuts_pos, ls='-', color=colors[0])
+    ax2.errorbar(np.arange(nlayers) + 1, rej_pow, yerr=rej_err,
+                 fmt='o', capsize=3, color=colors[1])
+    ax1.set_xlabel('Layer Number', fontsize=20)
+    ax1.set_ylabel('Cut Position (E/p)', color=colors[0], fontsize=20)
+    ax2.grid(axis='both', which='both', ls=':')
+    ax2.xaxis.set_major_locator(MultipleLocator(5))
+    ax2.xaxis.set_minor_locator(MultipleLocator(1))
+    ax2.set_ylabel('$\pi^-$ Rejection Power', color=colors[1], fontsize=20)
+    ax1.tick_params(labelsize=20)
+    ax2.tick_params(labelsize=20)
+    ax1.set_title('2D Scan of E/p Cut', fontsize=22)
+    ax1.text(0.5, 0.03, '$\epsilon_e \geq$ {:.2f}%'.format(args.eff*100.),
+             transform=ax1.transAxes, fontsize=20, va='bottom', ha='center', bbox=box_props)
+    fig.subplots_adjust(left=0.15, right=0.85)
+    pdf.savefig(fig)
+    plt.close(fig)
    pdf.close()
    # save cut position and performance

--- a/benchmarks/imaging_shower_ML/scripts/ml_data_preprocess.py
+++ b/benchmarks/imaging_shower_ML/scripts/ml_data_preprocess.py
 '''
-    A script to prepare datasets for ML training
+    A script to prepare datasets for ML training.
+    It may also apply E/p cut to clean-up the samples
+    Chao Peng (ANL)
+    2022/11/13
 '''
 import ROOT
 import os

--- a/benchmarks/imaging_shower_ML/scripts/utils.py
+++ b/benchmarks/imaging_shower_ML/scripts/utils.py
+'''
+    functions and objects that are commonly used by all scripts within this benchmark
+    Chao Peng (ANL)
+    2022/11/13
+'''
 import ROOT
 import pandas as pd
 import numpy as np