From 2dbf8be47de80553724d1682bf179422b4bc0d60 Mon Sep 17 00:00:00 2001
From: Chao Peng <cpeng@anl.gov>
Date: Tue, 15 Jun 2021 22:38:27 +0000
Subject: [PATCH] update clustering benchmark

---
 .../clustering/options/full_cal_clustering.py |  4 ++-
 .../clustering/scripts/cluster_plots.py       | 34 ++++++++++++-------
 2 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/benchmarks/clustering/options/full_cal_clustering.py b/benchmarks/clustering/options/full_cal_clustering.py
index 49d82f28..c7dcf394 100644
--- a/benchmarks/clustering/options/full_cal_clustering.py
+++ b/benchmarks/clustering/options/full_cal_clustering.py
@@ -11,7 +11,7 @@ detector_path = str(os.environ.get("DETECTOR_PATH", "."))
 compact_path = os.path.join(detector_path, detector_name)
 
 # get sampling fractions from system environment variable, 1.0 by default
-cb_ecal_sf = float(os.environ.get("CB_EMCAL_SAMP_FRAC", 1.0))
+cb_ecal_sf = float(os.environ.get("CB_EMCAL_SAMP_FRAC", 0.01324))
 cb_hcal_sf = float(os.environ.get("CB_HCAL_SAMP_FRAC", 1.0))
 
 # input and output
@@ -84,6 +84,7 @@ ce_ecal_cl = IslandCluster("ce_ecal_cl",
 
 ce_ecal_clreco = RecoCoG("ce_ecal_clreco",
         clusterCollection="CrystalEcalClusters",
+        samplingFraction=0.998,      # this accounts for a small fraction of leakage
         logWeightBase=4.6)
 
 
@@ -115,6 +116,7 @@ cb_ecal_cl = ImagingCluster("cb_ecal_cl",
         adjLayerDiff=2,                         # id diff for adjacent layer
         adjSectorDist=3.*cm)                    # different sector
 cb_ecal_clreco = ImagingClusterReco("cb_ecal_clreco",
+        samplingFraction=cb_hcal_sf,
         inputClusterCollection="EcalBarrelClusters",
         outputLayerCollection="EcalBarrelLayers")
 
diff --git a/benchmarks/clustering/scripts/cluster_plots.py b/benchmarks/clustering/scripts/cluster_plots.py
index 52ed607f..6b17413f 100644
--- a/benchmarks/clustering/scripts/cluster_plots.py
+++ b/benchmarks/clustering/scripts/cluster_plots.py
@@ -74,24 +74,34 @@ def thrown_particles_figure(df, save):
     plt.close(fig)
 
 
-def general_clusters_figure(df, collection, save):
+def general_clusters_figure(df, collection, save, min_nhits=5):
     data = df.AsNumpy([
-        '{}.nhits'.format(collection),
-        '{}.energy'.format(collection),
-        '{}.polar.theta'.format(collection),
-        '{}.polar.phi'.format(collection),
-        ])
+                '{}.nhits'.format(collection),
+                '{}.energy'.format(collection),
+                '{}.polar.theta'.format(collection),
+                '{}.polar.phi'.format(collection),
+             ])
+    # build a dataframe
+    evns = []
+    for i, vec in enumerate(data['{}.nhits'.format(collection)]):
+        evns += [i]*vec.size()
+    for n, vals in data.items():
+        data[n] = np.asarray([v for vec in vals for v in vec])
+    dfp = pd.DataFrame(columns=['nhits', 'edep', 'theta', 'phi'],
+            data=np.vstack(list(data.values())).T)
+    dfp.loc[:, 'evn'] = evns
+    # select the max. energy cluster for each event
+    dfp = dfp.loc[dfp.groupby('evn')['edep'].idxmax()]
     # figure
     fig, axs = plt.subplots(2, 2, figsize=(16, 12), dpi=120)
     labels = [
         ('Number of Hits', 'Counts'),
-        ('Energy (MeV)', 'Counts'),
-        ('Theta (rad)', 'Counts'),
-        ('Phi (rad)', 'Counts'),
+        (r'$E$ (MeV)', 'Counts'),
+        (r'$\theta$ (rad)', 'Counts'),
+        (r'$\phi$ (rad)', 'Counts'),
     ]
-    for ax, label, (name, vals) in zip(axs.flat, labels, data.items()):
-        hvals = [v for vec in vals for v in vec]
-        ax.hist(hvals, bins=50, ec='k')
+    for ax, label, vals in zip(axs.flat, labels, dfp[['nhits', 'edep', 'theta', 'phi']].values.T):
+        ax.hist(vals, bins=50, ec='k')
         ax.tick_params(labelsize=22, direction='in', which='both')
         ax.grid(linestyle=':', which='both')
         ax.set_axisbelow(True)
-- 
GitLab