From 2bd8cca52eb42ae36c520f957d19734b837de1db Mon Sep 17 00:00:00 2001
From: Chao Peng <cpeng@anl.gov>
Date: Wed, 5 May 2021 18:09:23 +0000
Subject: [PATCH] add benchmark for sampling calorimeter

---
 .gitlab-ci.yml                                |   1 +
 .../compact/barrel_ecal_test.xml              | 129 +++++++++++
 benchmarks/sampling_ecal/config.yml           |  36 +++
 .../options/sampling_cluster3d.py             |  79 +++++++
 benchmarks/sampling_ecal/requirements.txt     |   1 +
 benchmarks/sampling_ecal/run_emcal_barrel.sh  | 119 ++++++++++
 .../sampling_ecal/scripts/draw_cluster.py     | 211 ++++++++++++++++++
 .../scripts/draw_cluster_layers.py            | 185 +++++++++++++++
 .../sampling_ecal/scripts/energy_profile.py   | 131 +++++++++++
 .../sampling_ecal/scripts/epi_separation.py   | 100 +++++++++
 .../sampling_ecal/scripts/gen_particles.py    | 113 ++++++++++
 benchmarks/sampling_ecal/scripts/utils.py     | 124 ++++++++++
 12 files changed, 1229 insertions(+)
 create mode 100644 benchmarks/sampling_ecal/compact/barrel_ecal_test.xml
 create mode 100644 benchmarks/sampling_ecal/config.yml
 create mode 100644 benchmarks/sampling_ecal/options/sampling_cluster3d.py
 create mode 100644 benchmarks/sampling_ecal/requirements.txt
 create mode 100644 benchmarks/sampling_ecal/run_emcal_barrel.sh
 create mode 100644 benchmarks/sampling_ecal/scripts/draw_cluster.py
 create mode 100644 benchmarks/sampling_ecal/scripts/draw_cluster_layers.py
 create mode 100644 benchmarks/sampling_ecal/scripts/energy_profile.py
 create mode 100644 benchmarks/sampling_ecal/scripts/epi_separation.py
 create mode 100644 benchmarks/sampling_ecal/scripts/gen_particles.py
 create mode 100644 benchmarks/sampling_ecal/scripts/utils.py

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 449c20c8..37c3bd37 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -46,6 +46,7 @@ include:
   - local: 'benchmarks/tracking/config.yml'
   - local: 'benchmarks/clustering/config.yml'
   - local: 'benchmarks/rich/config.yml'
+  - local: 'benchmarks/sampling_ecal/config.yml'
 
 
 final_report:
diff --git a/benchmarks/sampling_ecal/compact/barrel_ecal_test.xml b/benchmarks/sampling_ecal/compact/barrel_ecal_test.xml
new file mode 100644
index 00000000..c194cb19
--- /dev/null
+++ b/benchmarks/sampling_ecal/compact/barrel_ecal_test.xml
@@ -0,0 +1,129 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<lccdd>
+  <info 
+    name="topside" 
+    title="Time-of-flight Optimized PID Silicon Detector for EIC (TOPSiDE)" 
+    author="Whitney R. Armstrong" 
+    url="" 
+    status="development" 
+    version="$Id: 1$">
+    <comment> </comment>
+  </info>
+
+  <define>
+    <include ref="topside/topside_defs.xml" />
+    <include ref="eic/eic_defs.xml" />
+  </define>
+
+  <properties>
+    <matrix name="RINDEX__Vacuum" coldim="2" values="
+      1.0*eV 1.0
+      5.1*eV 1.0
+      "/>
+    <matrix name="RINDEX__Air" coldim="2" values="
+      1.0*eV 1.00029
+      5.1*eV 1.00029
+      "/>
+    <matrix name="RINDEX__Quartz" coldim="2" values="
+      1.0*eV 1.46
+      5.1*eV 1.46
+      "/>
+    <matrix name="RINDEX__N2" coldim="2" values="
+      1.0*eV 1.00033
+      4.0*eV 1.00033
+      5.1*eV 1.00033
+      "/>
+    <matrix name="RINDEX__Pyrex" coldim="2" values="
+      1.0*eV 1.5
+      4.0*eV 1.5
+      5.1*eV 1.5
+      "/>
+    <matrix name= "REFLECTIVITY_mirror" coldim="2" values="
+      1.0*eV  0.9
+      4.0*eV  0.9
+      5.1*eV  0.9
+    "/>
+  </properties>
+
+  <includes>
+    <gdmlFile ref="topside/elements.xml"/>
+    <gdmlFile ref="topside/materials.xml"/>
+  </includes>
+
+  <materials>
+    <material name="AirOptical">
+      <D type="density" unit="g/cm3" value="0.0012"/>
+      <fraction n="0.754" ref="N"/>
+      <fraction n="0.234" ref="O"/>
+      <fraction n="0.012" ref="Ar"/>
+      <property name="RINDEX" ref="RINDEX__Vacuum"/>
+    </material>
+    <material name="N2cherenkov">
+      <D type="density" value="0.00125" unit="g/cm3"/>
+      <composite n="1" ref="N"/>
+      <property name="RINDEX"  ref="RINDEX__N2"/>
+    </material>
+    <material name="PyrexGlass">
+      <D type="density" value="2.23" unit="g/cm3"/>
+      <fraction n="0.806" ref="SiliconOxide"/>
+      <fraction n="0.130" ref="BoronOxide"/>
+      <fraction n="0.040" ref="SodiumOxide"/>
+      <fraction n="0.023" ref="AluminumOxide"/>
+      <property name="RINDEX" ref="RINDEX__Pyrex"/>
+    </material>
+  </materials>
+
+  <surfaces>
+    <comment> For the values of "finish", model and type, see TGeoOpticalSurface.h !
+    </comment>
+    <opticalsurface finish="polished" model="glisur" name="MirrorOpticalSurface" type="dielectric_metal" value="0">
+      <property name="REFLECTIVITY" ref="REFLECTIVITY_mirror"/>
+      <property name="RINDEX"       coldim="2" values="1.034*eV  1.5   4.136*eV  1.5"/>
+      <!--<property name="EFFICIENCY"   ref="EFFICIENCY0x8b77240"/>-->
+    </opticalsurface>
+    <opticalsurface name="mirror2" finish="polished" model="glisur" type="dielectric_dielectric">
+      <property name="REFLECTIVITY"            coldim="2" values="1.034*eV  0.8   4.136*eV  0.9"/>
+      <property name="EFFICIENCY"              coldim="2" values="2.034*eV  0.8   4.136*eV  1.0"/>
+      <property name="RINDEX"                  coldim="2" values="1.034*eV  1.5   4.136*eV  1.5"/>
+    </opticalsurface>
+
+  </surfaces>
+  <limits>
+    <limitset name="EICBeamlineLimits">
+      <limit name="step_length_max" particles="*" value="1.0" unit="mm" />
+      <limit name="track_length_max" particles="*" value="1.0" unit="mm" />
+      <limit name="time_max" particles="*" value="0.1" unit="ns" />
+      <limit name="ekin_min" particles="*" value="0.001" unit="MeV" />
+      <limit name="range_min" particles="*" value="0.1" unit="mm" />
+    </limitset>
+    <limitset name="cal_limits">
+      <limit name="step_length_max" particles="*" value="0.1" unit="mm"/>
+    </limitset>
+  </limits>
+
+  <display>
+  <include ref="topside/display.xml" />
+  </display>
+
+  <!--
+  <include ref="topside/vertex_tracker.xml"/>
+  -->
+  <include ref="topside/beampipe.xml"/>
+  <include ref="topside/silicon_tracker.xml"/>
+  <!--<include ref="topside/ecal.xml"/>-->        <comment> old version of em barrel - SiW sampling design </comment> 
+  <include ref="topside/ecal_wAstroPixSiW.xml"/>  <comment> new version of em barrel - SiW AstroPix sampling design </comment>
+  <!--
+  <include ref="topside/hcal.xml"/>
+  <include ref="topside/forward_rich.xml"/>
+  <include ref="topside/solenoid.xml"/>
+  <include ref="topside/roman_pots.xml"/>
+  <include ref="eic/forward_ion_beamline.xml"/>
+  -->
+
+  <detectors>
+  </detectors>
+  <readouts>
+  </readouts>
+
+
+</lccdd>
diff --git a/benchmarks/sampling_ecal/config.yml b/benchmarks/sampling_ecal/config.yml
new file mode 100644
index 00000000..25153819
--- /dev/null
+++ b/benchmarks/sampling_ecal/config.yml
@@ -0,0 +1,36 @@
+sampling_ecal_electrons:
+  image: eicweb.phy.anl.gov:4567/eic/juggler/juggler:$JUGGLER_TAG
+  needs: ["common:detector"]
+  timeout: 48 hours
+  artifacts:
+    expire_in: 20 weeks
+    paths:
+      - results/
+  stage: run
+  script:
+    - bash benchmarks/sampling_ecal/run_emcal_barrel.sh -n emcal_barrel_electrons -p "electron"
+
+sampling_ecal_photons:
+  image: eicweb.phy.anl.gov:4567/eic/juggler/juggler:$JUGGLER_TAG
+  needs: ["common:detector"]
+  timeout: 48 hours
+  artifacts:
+    expire_in: 20 weeks
+    paths:
+      - results/
+  stage: run
+  script:
+    - bash benchmarks/sampling_ecal/run_emcal_barrel.sh -n emcal_barrel_photons -p "photon"
+
+sampling_ecal_pions:
+  image: eicweb.phy.anl.gov:4567/eic/juggler/juggler:$JUGGLER_TAG
+  needs: ["common:detector"]
+  timeout: 48 hours
+  artifacts:
+    expire_in: 20 weeks
+    paths:
+      - results/
+  stage: run
+  script:
+    - bash benchmarks/sampling_ecal/run_emcal_barrel.sh -n emcal_barrel_pions -p "pion-"
+
diff --git a/benchmarks/sampling_ecal/options/sampling_cluster3d.py b/benchmarks/sampling_ecal/options/sampling_cluster3d.py
new file mode 100644
index 00000000..768cdc08
--- /dev/null
+++ b/benchmarks/sampling_ecal/options/sampling_cluster3d.py
@@ -0,0 +1,79 @@
+import os
+import ROOT
+from Gaudi.Configuration import *
+from GaudiKernel import SystemOfUnits as units
+
+from GaudiKernel.DataObjectHandleBase import DataObjectHandleBase
+from Configurables import ApplicationMgr, EICDataSvc, PodioOutput, GeoSvc
+
+from Configurables import PodioInput
+from Configurables import Jug__Base__InputCopier_dd4pod__Geant4ParticleCollection_dd4pod__Geant4ParticleCollection_ as MCCopier
+from Configurables import Jug__Digi__EcalTungstenSamplingDigi as EcalTungstenSamplingDigi
+from Configurables import Jug__Reco__EcalTungstenSamplingReco as EcalTungstenSamplingReco
+from Configurables import Jug__Reco__TopologicalCellCluster as TopologicalCellCluster
+from Configurables import Jug__Reco__ImagingClusterReco as ImagingReco
+
+
+# input arguments through environment variables
+kwargs = dict()
+kwargs['sf'] = float(os.environ.get('CB_EMCAL_SAMP_FRAC', '1.0'))
+kwargs['input'] = os.environ.get('CB_EMCAL_SIM_FILE', '../topside/barrel_el_5GeV.root')
+kwargs['output'] = os.environ.get('CB_EMCAL_REC_FILE', 'barrel_cluster_el5GeV.root')
+kwargs['compact'] = os.environ.get('CB_EMCAL_COMPACT_PATH', '../topside/test.xml')
+kwargs['nev'] = int(os.environ.get('CB_EMCAL_NUMEV', 1000))
+
+print(kwargs)
+
+if kwargs['nev'] < 1:
+    f = ROOT.TFile(kwargs['input'])
+    kwargs['nev'] = f.events.GetEntries()
+
+# get sampling fraction from system environment variable, 1.0 by default
+sf = float(os.environ.get('CB_EMCAL_SAMP_FRAC', '1.0'))
+
+geo_service  = GeoSvc("GeoSvc", detectors=kwargs['compact'].split(','))
+podioevent = EICDataSvc("EventDataSvc", inputs=kwargs['input'].split(','), OutputLevel=DEBUG)
+out = PodioOutput("out", filename=kwargs['output'])
+
+podioinput = PodioInput("PodioReader", collections=["mcparticles", "EcalBarrelHits"], OutputLevel=DEBUG)
+
+copier = MCCopier("MCCopier",
+                  inputCollection="mcparticles",
+                  outputCollection="mcparticles2",
+                  OutputLevel=DEBUG)
+emcaldigi = EcalTungstenSamplingDigi("ecal_digi",
+                                     inputHitCollection="EcalBarrelHits",
+                                     outputHitCollection="DigiEcalBarrelHits",
+                                     inputEnergyUnit=units.GeV,
+                                     inputTimeUnit=units.ns,
+                                     energyResolutions=[0., 0.02, 0.],
+                                     dynamicRangeADC=3*units.MeV,
+                                     pedestalSigma=40,
+                                     OutputLevel=DEBUG)
+emcalreco = EcalTungstenSamplingReco("ecal_reco",
+                                     inputHitCollection="DigiEcalBarrelHits",
+                                     outputHitCollection="RecoEcalBarrelHits",
+                                     dynamicRangeADC=3.*units.MeV,
+                                     pedestalSigma=40,
+                                     OutputLevel=DEBUG)
+emcalcluster = TopologicalCellCluster(inputHitCollection="RecoEcalBarrelHits",
+                                      outputClusterCollection="EcalBarrelClusters",
+                                      minClusterCenterEdep=0.3*units.MeV,
+                                      groupRanges=[2.*units.cm, 2*units.cm, 2.*units.cm])
+clusterreco = ImagingReco(inputClusterCollection="EcalBarrelClusters",
+                          outputClusterCollection="EcalBarrelClustersReco",
+                          outputLayerCollection="EcalBarrelClustersLayers",
+                          samplingFraction=sf,
+                          layerIDMaskRange=[15, 24],
+                          OutputLevel=DEBUG)
+
+out.outputCommands = ["keep *"]
+
+ApplicationMgr(
+    TopAlg=[podioinput, copier, emcaldigi, emcalreco, emcalcluster, clusterreco, out],
+    EvtSel='NONE',
+    EvtMax=kwargs['nev'],
+    ExtSvc=[podioevent],
+    OutputLevel=DEBUG
+)
+
diff --git a/benchmarks/sampling_ecal/requirements.txt b/benchmarks/sampling_ecal/requirements.txt
new file mode 100644
index 00000000..7a4ab2e2
--- /dev/null
+++ b/benchmarks/sampling_ecal/requirements.txt
@@ -0,0 +1 @@
+imageio >= 2.9.0
diff --git a/benchmarks/sampling_ecal/run_emcal_barrel.sh b/benchmarks/sampling_ecal/run_emcal_barrel.sh
new file mode 100644
index 00000000..2b076e02
--- /dev/null
+++ b/benchmarks/sampling_ecal/run_emcal_barrel.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+
+while getopts n:p:f: flag
+do
+    case "${flag}" in
+        n) nametag=${OPTARG};;
+        p) particle=${OPTARG};;
+    esac
+done
+
+if [[ ! -n  "${CB_EMCAL_TEST_DETECTOR}" ]] ; then
+  export CB_EMCAL_TEST_DETECTOR="barrel_ecal_test"
+fi
+# copy the compact file to detector path
+cp benchmarks/sampling_ecal/compact/${CB_EMCAL_TEST_DETECTOR}.xml ${DETECTOR_PATH}
+export CB_EMCAL_COMPACT_PATH=${DETECTOR_PATH}/${CB_EMCAL_TEST_DETECTOR}.xml
+
+
+if [[ ! -n  "${CB_EMCAL_NUMEV}" ]] ; then
+  export CB_EMCAL_NUMEV=1000
+fi
+
+if [[ ! -n "${CB_EMCAL_IEV}" ]] ; then
+  export CB_EMCAL_IEV=5
+fi
+
+if [[ ! -n  "${CB_EMCAL_ENERGY}" ]] ; then
+  export CB_EMCAL_ENERGY=5.0
+fi
+
+if [[ ! -n "${CB_EMCAL_SAMP_FRAC}" ]] ; then
+  export CB_EMCAL_SAMP_FRAC=0.014
+fi
+
+export CB_EMCAL_NAME_TAG="${nametag}"
+export CB_EMCAL_GEN_FILE="${CB_EMCAL_NAME_TAG}.hepmc"
+
+export CB_EMCAL_SIM_FILE="sim_${CB_EMCAL_NAME_TAG}.root"
+export CB_EMCAL_REC_FILE="rec_${CB_EMCAL_NAME_TAG}.root"
+
+echo "CB_EMCAL_NUMEV = ${CB_EMCAL_NUMEV}"
+echo "CB_EMCAL_COMPACT_PATH = ${CB_EMCAL_COMPACT_PATH}"
+
+# Generate the input events
+python benchmarks/sampling_ecal/scripts/gen_particles.py ${CB_EMCAL_GEN_FILE} \
+    --angmin 90 --angmax 90 --parray ${CB_EMCAL_ENERGY} --particles="${particle}"
+if [[ "$?" -ne "0" ]] ; then
+  echo "ERROR running script: generating input events"
+  exit 1
+fi
+
+ls -lh ${CB_EMCAL_GEN_FILE}
+
+# Run geant4 simulations
+npsim --runType batch \
+      -v WARNING \
+      --numberOfEvents ${CB_EMCAL_NUMEV} \
+      --compactFile ${CB_EMCAL_COMPACT_PATH} \
+      --inputFiles ${CB_EMCAL_GEN_FILE} \
+      --outputFile ${CB_EMCAL_SIM_FILE}
+
+if [[ "$?" -ne "0" ]] ; then
+  echo "ERROR running npdet"
+  exit 1
+fi
+
+rootls -t "${CB_EMCAL_SIM_FILE}"
+
+# Directory for plots
+mkdir -p results
+
+# CB_EMCAL_OPTION_DIR=${JUGGLER_INSTALL_PREFIX}/Examples/options
+# CB_EMCAL_SCRIPT_DIR=${JUGGLER_INSTALL_PREFIX}/Examples/scripts/sampling_calorimeter
+CB_EMCAL_OPTION_DIR=benchmarks/sampling_ecal/options
+CB_EMCAL_SCRIPT_DIR=benchmarks/sampling_ecal/scripts
+
+# Run Juggler
+# xenv -x ${JUGGLER_INSTALL_PREFIX}/Juggler.xenv gaudirun.py ${CB_EMCAL_OPTION_DIR}/sampling_cluster3d.py
+gaudirun.py ${CB_EMCAL_OPTION_DIR}/sampling_cluster3d.py
+if [[ "$?" -ne "0" ]] ; then
+  echo "ERROR running juggler"
+  exit 1
+fi
+
+# check required python modules
+python -m pip install -r benchmarks/sampling_ecal/requirements.txt
+
+# Run analysis script
+python ${CB_EMCAL_SCRIPT_DIR}/draw_cluster_layers.py \
+    ${CB_EMCAL_REC_FILE} -e ${CB_EMCAL_IEV} --topo-size=1.0 --compact=${CB_EMCAL_COMPACT_PATH} -o results/${particle}
+if [[ "$?" -ne "0" ]] ; then
+  echo "ERROR running analysis script: draw_cluster_layers"
+  exit 1
+fi
+
+python ${CB_EMCAL_SCRIPT_DIR}/draw_cluster.py \
+    ${CB_EMCAL_REC_FILE} -e ${CB_EMCAL_IEV} --topo-size=2.0 --compact=${CB_EMCAL_COMPACT_PATH} -o results/${particle}
+if [[ "$?" -ne "0" ]] ; then
+  echo "ERROR running analysis script: draw_cluster"
+  exit 1
+fi
+
+python ${CB_EMCAL_SCRIPT_DIR}/energy_profile.py \
+    ${CB_EMCAL_REC_FILE} --type=EM --energy=${CB_EMCAL_ENERGY} -o results/${particle} \
+    --save=results/profile.csv --color=royalblue
+if [[ "$?" -ne "0" ]] ; then
+  echo "ERROR running analysis script: energy_profile"
+  exit 1
+fi
+
+
+root_filesize=$(stat --format=%s "${CB_EMCAL_REC_FILE}")
+if [[ "${CB_EMCAL_NUMEV}" -lt "500" ]] ; then
+  # file must be less than 10 MB to upload
+  if [[ "${root_filesize}" -lt "10000000" ]] ; then
+    cp ${CB_EMCAL_REC_FILE} results/.
+  fi
+fi
+
diff --git a/benchmarks/sampling_ecal/scripts/draw_cluster.py b/benchmarks/sampling_ecal/scripts/draw_cluster.py
new file mode 100644
index 00000000..31aafb78
--- /dev/null
+++ b/benchmarks/sampling_ecal/scripts/draw_cluster.py
@@ -0,0 +1,211 @@
+'''
+    A script to visualize the cluster
+    It reads the output from the Juggler component ImagingClusterReco, which is supposed to be clusters of hits after
+    digitization, reconstruction, and clustering
+
+    Author: Chao Peng (ANL)
+    Date: 04/30/2021
+'''
+
+import os
+import numpy as np
+import pandas as pd
+import argparse
+import matplotlib
+from matplotlib import cm
+from matplotlib import pyplot as plt
+from matplotlib.ticker import MultipleLocator
+from matplotlib.collections import PatchCollection
+from matplotlib.patches import Rectangle
+from mpl_toolkits.axes_grid1 import make_axes_locatable
+from utils import *
+import sys
+
+
+# draw cluster in a 3d axis, expect a numpy array of (nhits, 4) shape with each row contains (x, y, z, E)
+# note z and x axes are switched
+def draw_hits3d(axis, data, cmap, units=('mm', 'mm', 'mm', 'MeV'), fontsize=24, **kwargs):
+    # normalize energy to get colors
+    x, y, z, edep = np.transpose(data)
+    cvals = edep - min(edep) / (max(edep) - min(edep))
+    cvals[np.isnan(cvals)] = 1.0
+    colors = cmap(cvals)
+
+    # hits
+    axis.scatter(z, y, x, c=colors, marker='o', **kwargs)
+    axis.tick_params(labelsize=fontsize)
+    axis.set_zlabel('x ({})'.format(units[2]), fontsize=fontsize + 2, labelpad=fontsize)
+    axis.set_ylabel('y ({})'.format(units[1]), fontsize=fontsize + 2, labelpad=fontsize)
+    axis.set_xlabel('z ({})'.format(units[0]), fontsize=fontsize + 2, labelpad=fontsize)
+    cb = plt.colorbar(cm.ScalarMappable(norm=matplotlib.colors.Normalize(vmin=min(edep), vmax=max(edep)), cmap=cmap),
+                      ax=axis, shrink=0.85)
+    cb.ax.tick_params(labelsize=fontsize)
+    cb.ax.get_yaxis().labelpad = fontsize
+    cb.ax.set_ylabel('Energy Deposit ({})'.format(units[3]), rotation=90, fontsize=fontsize + 4)
+    return axis
+
+
+# draw a cylinder in 3d axes
+# note z and x axes are switched
+def draw_cylinder3d(axis, r, z, order=['x', 'y', 'z'], rsteps=500, zsteps=500, **kwargs):
+    x = np.linspace(-r, r, rsteps)
+    z = np.linspace(-z, z, zsteps)
+    Xc, Zc = np.meshgrid(x, z)
+    Yc = np.sqrt(r**2 - Xc**2)
+
+    axis.plot_surface(Zc, Yc, Xc, alpha=0.1, **kwargs)
+    axis.plot_surface(Zc, -Yc, Xc, alpha=0.1, **kwargs)
+    return axis
+
+
+# fit the track of cluster and draw the fit
+def draw_track_fit(axis, dfh, length=200, stop_layer=8, scat_kw=dict(), line_kw=dict()):
+    dfh = dfh[dfh['layer'] <= stop_layer]
+    data = dfh.groupby('layer')[['z', 'y','x']].mean().values
+    # data = dfh[['z', 'y', 'x']].values
+    # ax.scatter(*data.T, **scat_kw)
+    datamean = data.mean(axis=0)
+    uu, dd, vv = np.linalg.svd(data - datamean)
+    linepts = vv[0] * np.mgrid[-length:length:2j][:, np.newaxis]
+    linepts += datamean
+    axis.plot3D(*linepts.T, 'k:')
+    return axis
+
+
+# color map
+def draw_heatmap(axis, x, y, weights, bins=1000, cmin=0., cmap=plt.get_cmap('rainbow'), pc_kw=dict()):
+    w, xedg, yedg = np.histogram2d(x, y, weights=weights, bins=bins)
+    xsz = np.mean(np.diff(xedg))
+    ysz = np.mean(np.diff(yedg))
+    wmin, wmax = w.min(), w.max()
+    recs, clrs = [], []
+    for i in np.arange(len(xedg) - 1):
+        for j in np.arange(len(yedg) - 1):
+            if w[i][j] > cmin:
+                recs.append(Rectangle((xedg[i], yedg[j]), xsz, ysz))
+                clrs.append(cmap((w[i][j] - wmin) / (wmax - wmin)))
+    axis.add_collection(PatchCollection(recs, facecolor=clrs, **pc_kw))
+    axis.set_xlim(xedg[0], xedg[-1])
+    axis.set_ylim(yedg[0], yedg[-1])
+    return axis, cm.ScalarMappable(norm=matplotlib.colors.Normalize(vmin=wmin, vmax=wmax), cmap=cmap)
+
+
+# execute this script
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Visualize the cluster from analysis')
+    parser.add_argument('file', type=str, help='path to root file')
+    parser.add_argument('-e', type=int, default=0, dest='iev', help='event number to plot')
+    parser.add_argument('-c', type=int, default=0, dest='icl', help='cluster number to plot')
+    parser.add_argument('-s', type=int, default=8, dest='stop', help='stop layer for track fit')
+    parser.add_argument('-o', type=str, default='./plots', dest='outdir', help='output directory')
+    parser.add_argument('--compact', type=str, default='', dest='compact', help='compact file')
+    parser.add_argument('-m', '--macros', type=str, default='rootlogon.C', dest='macros',
+                        help='root macros to load (accept multiple paths separated by \",\")')
+    parser.add_argument('-b', '--branch-name', type=str, default='EcalBarrelClustersLayers', dest='branch',
+                        help='branch name in the root file (outputLayerCollection from ImagingClusterReco)')
+    parser.add_argument('--topo-size', type=float, default=2.0, dest='topo_size',
+                        help='bin size for projection plot (mrad)')
+    parser.add_argument('--topo-range', type=float, default=50.0, dest='topo_range',
+                        help='half range for projection plot (mrad)')
+    args = parser.parse_args()
+
+
+    # we can read these values from xml file
+    desc = compact_constants(args.compact, [
+        'EcalBarrelAstroPix_RMin',
+        'EcalBarrelAstroPix_ReadoutLayerThickness',
+        'EcalBarrelAstroPix_ReadoutLayerNumber',
+        'EcalBarrelAstroPix_Length'
+    ])
+    if not len(desc):
+        # or define Ecal shapes
+        rmin, thickness, length = 890, 20*(10. + 1.65), 860*2+500
+    else:
+        # convert cm to mm
+        rmin = desc[0]*10.
+        thickness = desc[1]*desc[2]*10.
+        length = desc[3]*10.
+
+
+    # read data
+    load_root_macros(args.macros)
+    df = get_hits_data(args.file, args.iev, branch=args.branch)
+    dfmcp = get_mcp_data(args.file, args.iev, 'mcparticles2')
+    vec = dfmcp.loc[dfmcp['status'] == 24578, ['px', 'py', 'pz']].iloc[0].values
+    vec = vec/np.linalg.norm(vec)
+    df = df[df['cluster'] == args.icl]
+    if not len(df):
+        print("Error: do not find any hits for cluster {:d} in event {:d}".format(args.icl, args.iev))
+        exit(-1)
+    # particle line from (0, 0, 0) to the inner Ecal surface
+    length = rmin/np.sqrt(vec[0]**2 + vec[1]**2)
+    pline = np.transpose(vec*np.mgrid[0:length:2j][:, np.newaxis])
+
+
+    os.makedirs(args.outdir, exist_ok=True)
+    # cluster plot
+    cmap = truncate_colormap(plt.get_cmap('jet'), 0.1, 0.9)
+    fig = plt.figure(figsize=(20, 16), dpi=160)
+    ax = fig.add_subplot(111, projection='3d')
+    # draw particle line
+    ax.plot(*pline[[2, 1]], '--', zs=pline[0], color='green')
+    # draw hits
+    draw_hits3d(ax, df[['x', 'y', 'z', 'edep']].values, cmap, s=5.0)
+    draw_cylinder3d(ax, rmin, length, rstride=10, cstride=10, color='royalblue')
+    draw_cylinder3d(ax, rmin + thickness, length, rstride=10, cstride=10, color='forestgreen')
+    ax.set_zlim(-(rmin + thickness), rmin + thickness)
+    ax.set_ylim(-(rmin + thickness), rmin + thickness)
+    ax.set_xlim(-length, length)
+    fig.tight_layout()
+    fig.savefig(os.path.join(args.outdir, 'e{}_cluster.png'.format(args.iev)))
+
+
+    # zoomed-in cluster plot
+    fig = plt.figure(figsize=(20, 16), dpi=160)
+    ax = fig.add_subplot(111, projection='3d')
+    # draw particle line
+    ax.plot(*pline[[2, 1]], '--', zs=pline[0], color='green')
+    # draw hits
+    draw_hits3d(ax, df[['x', 'y', 'z', 'edep']].values, cmap, s=20.0)
+    draw_track_fit(ax, df, stop_layer=args.stop,
+                   scat_kw=dict(color='k', s=50.0), line_kw=dict(linestyle=':', color='k', lw=3))
+    # view range
+    center = (length + thickness/2.)*vec
+    ranges = np.vstack([center - thickness/4., center + thickness/4.]).T
+    ax.set_zlim(*ranges[0])
+    ax.set_ylim(*ranges[1])
+    ax.set_xlim(*ranges[2])
+
+    fig.tight_layout()
+    fig.savefig(os.path.join(args.outdir, 'e{}_cluster_zoom.png'.format(args.iev)))
+
+
+    # projection plot
+    # convert to polar coordinates (mrad), and stack all r values
+    df['r'] = np.sqrt(df['x'].values**2 + df['y'].values**2 + df['z'].values**2)
+    df['phi'] = np.arctan2(df['y'].values, df['x'].values)*1000.
+    df['theta'] = np.arccos(df['z'].values/df['r'].values)*1000.
+
+    # convert to mrad
+    vecp = np.asarray([np.arccos(vec[2]), np.arctan2(vec[1], vec[0])])*1000.
+    phi_rg = np.asarray([vecp[1] - args.topo_range, vecp[1] + args.topo_range])
+    th_rg = np.asarray([vecp[0] - args.topo_range, vecp[0] + args.topo_range])
+
+    fig, axs = plt.subplots(1, 2, figsize=(17, 16), dpi=160, gridspec_kw={'wspace':0., 'width_ratios': [16, 1]})
+    ax, sm = draw_heatmap(axs[0], df['theta'].values, df['phi'].values, weights=df['edep'].values,
+                          bins=(np.arange(*th_rg, step=args.topo_size), np.arange(*phi_rg, step=args.topo_size)),
+                          cmap=cmap, cmin=0., pc_kw=dict(alpha=0.8, edgecolor='k'))
+
+    ax.set_xlabel(r'$\theta$ (mrad)', fontsize=28)
+    ax.set_ylabel(r'$\phi$ (mrad)', fontsize=28)
+    ax.tick_params(labelsize=24)
+    ax.xaxis.set_minor_locator(MultipleLocator(5))
+    ax.yaxis.set_minor_locator(MultipleLocator(5))
+    ax.grid(linestyle=':', which='both')
+    ax.set_axisbelow(True)
+    cb = plt.colorbar(sm, cax=axs[1], shrink=0.85)
+    cb.ax.tick_params(labelsize=24)
+    cb.ax.get_yaxis().labelpad = 24
+    cb.ax.set_ylabel('Energy Deposit (MeV)', rotation=90, fontsize=28)
+    fig.savefig(os.path.join(args.outdir, 'e{}_topo.png'.format(args.iev)))
+
diff --git a/benchmarks/sampling_ecal/scripts/draw_cluster_layers.py b/benchmarks/sampling_ecal/scripts/draw_cluster_layers.py
new file mode 100644
index 00000000..e3b9ed3d
--- /dev/null
+++ b/benchmarks/sampling_ecal/scripts/draw_cluster_layers.py
@@ -0,0 +1,185 @@
+'''
+    A script to visualize the cluster layer-by-layer
+    It reads the output from the Juggler component ImagingClusterReco, which is supposed to be clusters of hits after
+    digitization, reconstruction, and clustering
+
+    Author: Chao Peng (ANL)
+    Date: 04/30/2021
+'''
+
+import os
+import numpy as np
+import pandas as pd
+import argparse
+import matplotlib
+from matplotlib import cm
+from matplotlib import pyplot as plt
+from matplotlib.ticker import MultipleLocator
+from matplotlib.collections import PatchCollection
+from matplotlib.patches import Rectangle
+from mpl_toolkits.axes_grid1 import make_axes_locatable
+from matplotlib.backends.backend_pdf import PdfPages
+from utils import *
+import sys
+import imageio
+
+
+# color map
+def draw_heatmap(axis, x, y, weights, bins=1000, vmin=None, vmax=None, cmap=plt.get_cmap('rainbow'), pc_kw=dict()):
+    w, xedg, yedg = np.histogram2d(x, y, weights=weights, bins=bins)
+    xsz = np.mean(np.diff(xedg))
+    ysz = np.mean(np.diff(yedg))
+    if vmin == None:
+        vmin = w.min()
+    if vmax == None:
+        vmax = w.max()
+    recs, clrs = [], []
+    for i in np.arange(len(xedg) - 1):
+        for j in np.arange(len(yedg) - 1):
+            if w[i][j] > vmin:
+                recs.append(Rectangle((xedg[i], yedg[j]), xsz, ysz))
+                clrs.append(cmap((w[i][j] - vmin) / (vmax - vmin)))
+    axis.add_collection(PatchCollection(recs, facecolor=clrs, **pc_kw))
+    axis.set_xlim(xedg[0], xedg[-1])
+    axis.set_ylim(yedg[0], yedg[-1])
+    return axis, cm.ScalarMappable(norm=matplotlib.colors.Normalize(vmin=vmin, vmax=vmax), cmap=cmap)
+
+
+# execute this script
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Visualize the cluster (layer-wise) from analysis')
+    parser.add_argument('file', type=str, help='path to root file')
+    parser.add_argument('-e', type=int, default=0, dest='iev', help='event number to plot')
+    parser.add_argument('-c', type=int, default=0, dest='icl', help='cluster number to plot')
+    parser.add_argument('-s', type=int, default=8, dest='stop', help='stop layer for track fit')
+    parser.add_argument('-o', type=str, default='./plots', dest='outdir', help='output directory')
+    parser.add_argument('-d', type=float, default=1.0, dest='dura', help='duration of gif')
+    parser.add_argument('--compact', type=str, default='', dest='compact', help='compact file')
+    parser.add_argument('-m', '--macros', type=str, default='rootlogon.C', dest='macros',
+                        help='root macros to load (accept multiple paths separated by \",\")')
+    parser.add_argument('-b', '--branch-name', type=str, default='EcalBarrelClustersLayers', dest='branch',
+                        help='branch name in the root file (outputLayerCollection from ImagingClusterReco)')
+    parser.add_argument('--topo-size', type=float, default=2.0, dest='topo_size',
+                        help='bin size for projection plot (mrad)')
+    parser.add_argument('--topo-range', type=float, default=50.0, dest='topo_range',
+                        help='half range for projection plot (mrad)')
+    args = parser.parse_args()
+
+
+    # we can read these values from xml file
+    desc = compact_constants(args.compact, [
+        'EcalBarrelAstroPix_RMin',
+        'EcalBarrelAstroPix_ReadoutLayerThickness',
+        'EcalBarrelAstroPix_ReadoutLayerNumber',
+        'EcalBarrelAstroPix_Length'
+    ])
+
+    if not len(desc):
+        # or define Ecal shapes
+        rmin, thickness, length = 890, 20*(10. + 1.65), 860*2+500
+    else:
+        # convert cm to mm
+        rmin = desc[0]*10.
+        thickness = desc[1]*desc[2]*10.
+        length = desc[3]*10.
+
+
+    # read data
+    load_root_macros(args.macros)
+    df = get_hits_data(args.file, args.iev, args.branch)
+    dfmcp = get_mcp_data(args.file, args.iev, 'mcparticles2')
+    vec = dfmcp.loc[dfmcp['status'] == 24578, ['px', 'py', 'pz']].iloc[0].values
+    vec = vec/np.linalg.norm(vec)
+    df = df[df['cluster'] == args.icl]
+    # convert to polar coordinates (mrad), and stack all r values
+    df['r'] = np.sqrt(df['x'].values**2 + df['y'].values**2 + df['z'].values**2)
+    df['phi'] = np.arctan2(df['y'].values, df['x'].values)*1000.
+    df['theta'] = np.arccos(df['z'].values/df['r'].values)*1000.
+    if not len(df):
+        print("Error: do not find any hits for cluster {:d} in event {:d}".format(args.icl, args.iev))
+        exit(-1)
+    # particle line from (0, 0, 0) to the inner Ecal surface
+    length = rmin/np.sqrt(vec[0]**2 + vec[1]**2)
+    pline = np.transpose(vec*np.mgrid[0:length:2j][:, np.newaxis])
+    cmap = truncate_colormap(plt.get_cmap('jet'), 0.1, 0.9)
+
+
+    # convert truth to mrad
+    vecp = np.asarray([np.arccos(vec[2]), np.arctan2(vec[1], vec[0])])*1000.
+    phi_rg = np.asarray([vecp[1] - args.topo_range, vecp[1] + args.topo_range])
+    th_rg = np.asarray([vecp[0] - args.topo_range, vecp[0] + args.topo_range])
+
+    os.makedirs(args.outdir, exist_ok=True)
+    # cluster plot by layers (rebinned)
+    pdf = PdfPages(os.path.join(args.outdir, 'e{}_c{}_layers.pdf'.format(args.iev, args.icl)))
+    bpos, bprops = (0.5, 0.95), dict(boxstyle='round', facecolor='wheat', alpha=0.2)
+    frames = []
+    for i in np.arange(1, df['layer'].max() + 1, dtype=int):
+        data = df[df['layer'] == i]
+        fig, axs = plt.subplots(1, 2, figsize=(17, 16), dpi=160, gridspec_kw={'wspace':0., 'width_ratios': [16, 1]})
+        ax, sm = draw_heatmap(axs[0], data['theta'].values, data['phi'].values, weights=data['edep'].values,
+                              bins=(np.arange(*th_rg, step=args.topo_size), np.arange(*phi_rg, step=args.topo_size)),
+                              cmap=cmap, vmin=0.0, vmax=1.0, pc_kw=dict(alpha=0.8, edgecolor='k'))
+        ax.set_ylabel(r'$\phi$ (mrad)', fontsize=28)
+        ax.set_xlabel(r'$\theta$ (mrad)', fontsize=28)
+        ax.tick_params(labelsize=24)
+        ax.xaxis.set_minor_locator(MultipleLocator(5))
+        ax.yaxis.set_minor_locator(MultipleLocator(5))
+        ax.grid(linestyle=':', which='both')
+        ax.set_axisbelow(True)
+        ax.text(*bpos, 'Layer {:d}'.format(i), transform=ax.transAxes,
+                fontsize=26, va='top', ha='center', bbox=bprops)
+        cb = plt.colorbar(sm, cax=axs[1], shrink=0.85)
+        cb.ax.tick_params(labelsize=24)
+        cb.ax.get_yaxis().labelpad = 24
+        cb.ax.set_ylabel('Energy Deposit (MeV)', rotation=90, fontsize=28)
+        pdf.savefig(fig)
+        # gif frames
+        fig.savefig('ltmp.png')
+        plt.close(fig)
+        frames.append(imageio.imread('ltmp.png'))
+    pdf.close()
+    os.remove('ltmp.png')
+
+    # build GIF
+    imageio.mimsave(os.path.join(args.outdir, 'e{:d}_c{:d}_layers.gif'.format(args.iev, args.icl)),
+                    frames, 'GIF', duration=args.dura)
+
+
+    # cluster plot by layers (scatters)
+    pdf = PdfPages(os.path.join(args.outdir, 'e{}_c{}_layers2.pdf'.format(args.iev, args.icl)))
+    bpos, bprops = (0.5, 0.95), dict(boxstyle='round', facecolor='wheat', alpha=0.2)
+    frames = []
+    for i in np.arange(1, df['layer'].max() + 1, dtype=int):
+        data = df[df['layer'] == i]
+        fig, axs = plt.subplots(1, 2, figsize=(17, 16), dpi=160, gridspec_kw={'wspace':0., 'width_ratios': [16, 1]})
+        ax = axs[0]
+        colors = cmap(data['edep']/1.0)
+        ax.scatter(data['theta'].values, data['phi'].values, c=colors, marker='s', s=15.0)
+        ax.set_xlim(*th_rg)
+        ax.set_ylim(*phi_rg)
+        ax.set_ylabel(r'$\phi$ (mrad)', fontsize=28)
+        ax.set_xlabel(r'$\theta$ (mrad)', fontsize=28)
+        ax.tick_params(labelsize=24)
+        ax.xaxis.set_minor_locator(MultipleLocator(5))
+        ax.yaxis.set_minor_locator(MultipleLocator(5))
+        ax.grid(linestyle=':', which='both')
+        ax.set_axisbelow(True)
+        ax.text(*bpos, 'Layer {:d}'.format(i), transform=ax.transAxes,
+                fontsize=26, va='top', ha='center', bbox=bprops)
+        cb = plt.colorbar(sm, cax=axs[1], shrink=0.85)
+        cb.ax.tick_params(labelsize=24)
+        cb.ax.get_yaxis().labelpad = 24
+        cb.ax.set_ylabel('Energy Deposit (MeV)', rotation=90, fontsize=28)
+        pdf.savefig(fig)
+        # gif frames
+        fig.savefig('ltmp.png')
+        plt.close(fig)
+        frames.append(imageio.imread('ltmp.png'))
+    pdf.close()
+    os.remove('ltmp.png')
+
+    # build GIF
+    imageio.mimsave(os.path.join(args.outdir, 'e{:d}_c{:d}_layers2.gif'.format(args.iev, args.icl)),
+                    frames, 'GIF', duration=args.dura)
+
diff --git a/benchmarks/sampling_ecal/scripts/energy_profile.py b/benchmarks/sampling_ecal/scripts/energy_profile.py
new file mode 100644
index 00000000..c984cfbd
--- /dev/null
+++ b/benchmarks/sampling_ecal/scripts/energy_profile.py
@@ -0,0 +1,131 @@
+'''
+    A script to generate the energy profile (layer-wise)
+    It reads the output from the Juggler component ImagingClusterReco, which is supposed to be clusters of hits after
+    digitization, reconstruction, and clustering
+
+    Author: Chao Peng (ANL)
+    Date: 04/30/2021
+'''
+
+import os
+import numpy as np
+import pandas as pd
+import ROOT
+from ROOT import gROOT, gInterpreter
+import argparse
+import matplotlib
+from matplotlib import pyplot as plt
+from matplotlib.ticker import MultipleLocator, MaxNLocator
+import sys
+from utils import *
+
+
+def find_start_layer(grp, min_edep=0.5):
+    ids, edeps = grp.sort_values('layer')[['layer', 'edep']].values.T
+    edeps = np.cumsum(edeps)
+    return min(ids[edeps > min_edep]) if sum(edeps > min_edep) > 0 else 20
+
+
+# execute this script
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Generate energy profiles')
+    parser.add_argument('file', type=str, help='path to root file')
+    parser.add_argument('-o', '--plot-dir', type=str, default='./plots', dest='outdir', help='output directory')
+    parser.add_argument('-t', '--type', type=str, default='unknown', dest='type', help='profile type (used in save)')
+    parser.add_argument('-e', '--energy', type=float, default=5., dest='energy', help='incident particle energy (GeV)')
+    parser.add_argument('-s', '--save', type=str, default='', dest='save', help='path to save profile')
+    parser.add_argument('-c', '--color', type=str, default='royalblue', dest='color', help='colors for bar plots')
+    parser.add_argument('-b', '--branch-name', type=str, default='EcalBarrelClustersLayers', dest='branch',
+                        help='branch name in the root file (outputLayerCollection from ImagingClusterReco)')
+    parser.add_argument('-m', '--macros', type=str, default='rootlogon.C', dest='macros',
+                        help='root macros to load (accept multiple paths separated by \",\")')
+    args = parser.parse_args()
+
+    load_root_macros(args.macros)
+    # prepare data
+    dfe = get_layers_data(args.file, branch=args.branch)
+    dfe.loc[:, 'total_edep'] = dfe.groupby(['event', 'cluster'])['edep'].transform('sum')
+    # dfe.loc[:, 'efrac'] = dfe['edep']/dfe['total_edep']
+    dfe.loc[:, 'efrac'] = dfe['edep']/(args.energy*1000.)
+    dfe = dfe[dfe['cluster'] == 0]
+
+    os.makedirs(args.outdir, exist_ok=True)
+
+    slayer = dfe.groupby('event').apply(lambda x: find_start_layer(x, 1.0)).astype(int)
+    dfe = dfe.merge(slayer.to_frame(name='slayer'), on='event')
+    # dfe.loc[:, 'eff_layer'] = dfe['layer'] - dfe['slayer']
+    # prof = dfe[dfe['eff_layer'] > 0].groupby('eff_layer')['edep'].describe()
+    prof = dfe.groupby('layer')['efrac'].describe()
+
+    # print(prof['mean'])
+    # plot profiles
+    bpos, bprops = (0.5, 0.95), dict(boxstyle='round', facecolor='wheat', alpha=0.3)
+    fig, ax = plt.subplots(figsize=(16, 9), dpi=160)
+    nev = len(dfe['event'].unique())
+    ax.hist(dfe.groupby('event')['slayer'].min(), weights=[1/float(nev)]*nev,
+            ec='black', bins=np.arange(0.5, 10.5, step=1.0))
+    ax.xaxis.set_major_locator(MaxNLocator(integer=True))
+    ax.xaxis.set_minor_locator(MultipleLocator(1))
+    ax.yaxis.set_minor_locator(MultipleLocator(1))
+    ax.grid(linestyle=':', which='both')
+    ax.tick_params(labelsize=24)
+    ax.set_xlabel('Start Layer', fontsize=26)
+    ax.set_ylabel('Normalized Counts', fontsize=26)
+    ax.text(*bpos, 'Mininum Edep\n' + '{:.1f} MeV'.format(1.0),
+            transform=ax.transAxes, fontsize=26, verticalalignment='top', bbox=bprops)
+    fig.savefig(os.path.join(args.outdir, 'edep_start_{}_{}.png'.format(args.type, int(args.energy))))
+
+
+    fig, ax = plt.subplots(figsize=(16, 9), dpi=160)
+    ax.plot(prof.index, prof['mean'].values*100., color=args.color, lw=2)
+    # ax.fill_between(prof.index, prof['25%'], prof['75%'], color=args.color, alpha=0.3)
+    ax.fill_between(prof.index, (prof['mean'] - prof['std'])*100., (prof['mean'] + prof['std'])*100.,
+                    color=args.color, alpha=0.3)
+    ax.xaxis.set_major_locator(MaxNLocator(integer=True))
+    ax.xaxis.set_minor_locator(MultipleLocator(1))
+    ax.yaxis.set_minor_locator(MultipleLocator(1))
+    ax.grid(linestyle=':', which='both')
+    ax.tick_params(labelsize=24)
+    ax.set_xlabel('Layer', fontsize=26)
+    ax.set_ylabel('Energy Deposit Percentage', fontsize=26)
+    fig.savefig(os.path.join(args.outdir, 'efrac_{}_{}.png'.format(args.type, int(args.energy))))
+
+    # edep fraction by layers
+    layers = np.asarray([
+        [1, 5, 8,],
+        [10, 15, 20],
+    ])
+    layers_bins = np.linspace(0, 0.5, 50)
+
+    fig, ax = plt.subplots(*layers.shape, figsize=(16, 9), dpi=160, sharex='col', sharey='all',
+                           gridspec_kw=dict(hspace=0.05, wspace=0.05))
+
+    for ax, layer in zip(ax.flat, layers.flatten()):
+        data = dfe[dfe['layer'] == layer]
+        ax.hist(data['efrac'].values*100., weights=[1/float(len(data))]*len(data), bins=layers_bins,
+                ec='black', color=args.color)
+        ax.tick_params(labelsize=24)
+        ax.xaxis.set_minor_locator(MultipleLocator(1))
+        ax.grid(linestyle=':', which='both')
+        # ax.set_xlabel('Energy Deposit (MeV)', fontsize=26)
+        # ax.set_ylabel('Normalized Counts', fontsize=26)
+        mean, std = data.describe().loc[['mean', 'std'], 'edep'].values
+        label = 'Layer {}'.format(layer)
+    #            + '\n' + r'$\mu = {:.3f}$ MeV'.format(mean)
+    #            + '\n' + r'$\sigma = {:.3f}$ MeV'.format(std)
+        ax.text(*bpos, label, transform=ax.transAxes, fontsize=26, verticalalignment='top', bbox=bprops)
+        ax.set_axisbelow(True)
+        ax.set_yscale('log')
+    fig.text(0.5, 0.02, 'Energy Deposit Percentage', fontsize=26, ha='center')
+    fig.text(0.02, 0.5, 'Normalized Counts', fontsize=26, va='center', rotation=90)
+    fig.savefig(os.path.join(args.outdir, 'efrac_layers_{}_{}.png'.format(args.type, int(args.energy))))
+
+
+    if args.save:
+        prof.loc[:, 'energy'] = args.energy*1000.
+        prof.loc[:, 'type'] = args.type
+        if os.path.exists(args.save):
+            prev = pd.read_csv(args.save).set_index('layer', drop=True)
+            prof = pd.concat([prof, prev])
+        prof.to_csv(args.save)
+
diff --git a/benchmarks/sampling_ecal/scripts/epi_separation.py b/benchmarks/sampling_ecal/scripts/epi_separation.py
new file mode 100644
index 00000000..e552f21a
--- /dev/null
+++ b/benchmarks/sampling_ecal/scripts/epi_separation.py
@@ -0,0 +1,100 @@
+'''
+    A script to use the energy profile for e-pi separation
+    It reads the output from the Juggler component ImagingClusterReco, which is supposed to be clusters of hits after
+    digitization, reconstruction, and clustering
+
+    Author: Chao Peng (ANL)
+    Date: 04/30/2021
+'''
+
+import os
+import numpy as np
+import pandas as pd
+import ROOT
+from ROOT import gROOT, gInterpreter
+import argparse
+import matplotlib
+from matplotlib import pyplot as plt
+from matplotlib.ticker import MultipleLocator, MaxNLocator
+import sys
+from utils import *
+
+
+def prepare_data(path, **kwargs):
+    tmp = get_layers_data(path, **kwargs)
+    tmp.loc[:, 'total_edep'] = tmp.groupby(['event', 'cluster'])['edep'].transform('sum')
+    tmp.loc[:, 'efrac'] = tmp['edep']/tmp['total_edep']
+    # tmp = tmp[tmp['cluster'] == 0]
+    return tmp
+
+
+def calc_chi2(grp, pr, lmin=5, lmax=12):
+    grp2 = grp[(grp['layer'] >= lmin) & (grp['layer'] <= lmax)]
+    mean, std = pr.loc[grp2['layer'], ['mean', 'std']].values.T*pr['energy'].mean()
+    edeps = grp2['edep'].values
+    return np.sqrt(np.sum((edeps - mean)**2/std**2)/float(len(edeps)))
+
+
+# execute this script
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='epi_separation')
+    parser.add_argument('efile', type=str, help='path to root file (electrons)')
+    parser.add_argument('pifile', type=str, help='path to root file (pions)')
+    parser.add_argument('--prof', type=str, default='profile.csv', help='path to electron profile')
+    parser.add_argument('--plot-dir', type=str, default='./plots', dest='outdir', help='output directory')
+    parser.add_argument('-b', '--branch-name', type=str, default='EcalBarrelClustersLayers', dest='branch',
+                        help='branch name in the root file (outputLayerCollection from ImagingClusterReco)')
+    parser.add_argument('-m', '--macros', type=str, default='rootlogon.C', dest='macros',
+                        help='root macros to load (accept multiple paths separated by \",\")')
+    args = parser.parse_args()
+
+    os.makedirs(args.outdir, exist_ok=True)
+
+
+    load_root_macros(args.macros)
+    # prepare data
+    dfe = prepare_data(args.efile, branch=args.branch)
+    dfpi = prepare_data(args.pifile, branch=args.branch)
+
+    colors = ['royalblue', 'indianred', 'limegreen']
+    prof = pd.read_csv(args.prof).set_index('layer', drop=True)
+
+    # profile comparison
+    fig, ax = plt.subplots(figsize=(16, 9), dpi=160)
+    for title, color in zip(sorted(prof['type'].unique()), colors):
+        pr = prof[prof['type'] == title]
+        ax.plot(pr.index, pr['mean'], color=color, lw=2)
+        ax.fill_between(pr.index, (pr['mean'] - pr['std']), (pr['mean'] + pr['std']),
+                        color=color, alpha=0.3, label=title)
+    ax.xaxis.set_major_locator(MaxNLocator(integer=True))
+    ax.xaxis.set_minor_locator(MultipleLocator(1))
+    ax.yaxis.set_minor_locator(MultipleLocator(1))
+    ax.grid(linestyle=':', which='both')
+    ax.tick_params(labelsize=24)
+    ax.set_xlabel('Layer', fontsize=26)
+    ax.set_ylabel('Energy Deposit Percentage', fontsize=26)
+    ax.legend(fontsize=26, loc='upper left')
+    fig.savefig(os.path.join(args.outdir, 'compare_prof.png'))
+
+
+    # check profile
+    layer_range = (4, 12)
+    pre = prof[prof['type'].str.lower() == 'em']
+    fig, ax = plt.subplots(figsize=(16, 9), dpi=160)
+    chi2 = dfe.groupby(['event', 'cluster']).apply(lambda x: calc_chi2(x, pre, *layer_range))
+    ax.hist(chi2, bins=np.linspace(0, 5, 200), weights=[1/float(len(chi2))]*chi2,
+            ec='royalblue', color='royalblue', alpha=0.5, label='electrons')
+    chi2 = dfpi.groupby(['event', 'cluster']).apply(lambda x: calc_chi2(x, pre, *layer_range))
+    # print(chi2[chi2 < 0.7])
+    ax.hist(chi2, bins=np.linspace(0, 5, 200), weights=[1/float(len(chi2))]*chi2,
+            ec='indianred', color='indianred', alpha=0.5, label='pions')
+    ax.grid(linestyle=':', which='major')
+    ax.set_axisbelow(True)
+    ax.tick_params(labelsize=24)
+    ax.set_xlabel(r'$\chi^2$', fontsize=26)
+    ax.set_ylabel('Normalized Counts', fontsize=26)
+    # ax.set_yscale('log')
+    # ax.set_ylim(1e-3, 1.)
+    ax.legend(title=r'$\chi^2$ for $E_{{dep}}$ in layer {:d} - {:d}'.format(*layer_range), title_fontsize=28, fontsize=26)
+    fig.savefig(os.path.join(args.outdir, 'efrac_chi2.png'))
+
diff --git a/benchmarks/sampling_ecal/scripts/gen_particles.py b/benchmarks/sampling_ecal/scripts/gen_particles.py
new file mode 100644
index 00000000..fb3a8966
--- /dev/null
+++ b/benchmarks/sampling_ecal/scripts/gen_particles.py
@@ -0,0 +1,113 @@
+import os
+from pyHepMC3 import HepMC3 as hm
+import numpy as np
+import argparse
+
+
+PARTICLES = {
+    "pion0": (111, 0.1349766),       # pi0
+    "pion+": (211, 0.13957018),      # pi+
+    "pion-": (-211, 0.13957018),     # pi-
+    "kaon0": (311, 0.497648),        # K0
+    "kaon+": (321, 0.493677),        # K+
+    "kaon-": (-321, 0.493677),       # K-
+    "proton": (2212, 0.938272),      # proton
+    "neutron": (2112, 0.939565),     # neutron
+    "electron": (11, 0.51099895e-3), # electron
+    "positron": (-11, 0.51099895e-3),# positron
+    "photon": (22, 0),               # photon
+}
+
+
+# p in GeV, angle in degree, vertex in mm
+def gen_event(prange=(8, 100), arange=(0, 20), phrange=(0., 360.), parts=[(p, m) for p, m in PARTICLES.values()]):
+    evt = hm.GenEvent(momentum_unit=hm.Units.MomentumUnit.GEV, length_unit=hm.Units.LengthUnit.MM)
+    pid, mass = parts[np.random.randint(len(parts))]
+
+    # final state
+    state = 1
+
+    # momentum, angles, energy
+    p = np.random.uniform(*prange)
+    theta = np.random.uniform(*arange)*np.pi/180.
+    phi = np.random.uniform(*phrange)*np.pi/180.
+    e0 = np.sqrt(p*p + mass*mass)
+
+    px = np.cos(phi)*np.sin(theta)
+    py = np.sin(phi)*np.sin(theta)
+    pz = np.cos(theta)
+
+    # beam
+    pbeam = hm.GenParticle(hm.FourVector(0, 0, 0, 0.938272), 2212, 4)
+    ebeam = hm.GenParticle(hm.FourVector(0, 0, e0, np.sqrt(e0*e0 + 0.511e-3*0.511e-3)), 11, 4)
+
+    hout = hm.GenParticle(hm.FourVector(px*p, py*p, pz*p, e0), pid, state)
+    # evt.add_particle(part)
+    vert = hm.GenVertex()
+    vert.add_particle_in(ebeam)
+    vert.add_particle_in(pbeam)
+    vert.add_particle_out(hout)
+    evt.add_vertex(vert)
+    return evt
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser('RICH dataset generator')
+
+    parser.add_argument('output', help='path to the output file')
+    parser.add_argument('-n', type=int, default=1000, dest='nev', help='number of events to generate')
+    parser.add_argument('--parray', type=str, default="", dest='parray', help='an array of momentum in GeV')
+    parser.add_argument('--pmin', type=float, default=8.0, dest='pmin', help='minimum momentum in GeV')
+    parser.add_argument('--pmax', type=float, default=100.0, dest='pmax', help='maximum momentum in GeV')
+    parser.add_argument('--angmin', type=float, default=0.0, dest='angmin', help='minimum angle in degree')
+    parser.add_argument('--angmax', type=float, default=20.0, dest='angmax', help='maximum angle in degree')
+    parser.add_argument('--phmin', type=float, default=0.0, dest='phmin', help='minimum angle in degree')
+    parser.add_argument('--phmax', type=float, default=360.0, dest='phmax', help='maximum angle in degree')
+    parser.add_argument('--particles', type=str, default='11', dest='particles', help='particles pdg code')
+    parser.add_argument('-s', type=int, default=-1, dest='seed', help='seed for random generator')
+
+    args = parser.parse_args()
+
+    # random seed
+    if args.seed < 0:
+        args.seed = os.environ.get('SEED', int.from_bytes(os.urandom(4), byteorder='big', signed=False))
+    np.random.seed(args.seed)
+
+    output = hm.WriterAscii(args.output);
+    if output.failed():
+        print("Cannot open file \"{}\"".format(args.output))
+        sys.exit(2)
+
+    parts = []
+    for pid in args.particles.split(','):
+        pid = pid.strip()
+        if pid not in PARTICLES.keys():
+            print('pid {:d} not found in dictionary, ignored.'.format(pid))
+            continue
+        parts.append(PARTICLES[pid])
+
+
+    if not args.parray:
+        count = 0
+        while count < args.nev:
+            if (count % 1000 == 0):
+                print("Generated {} events".format(count), end='\r')
+            evt = gen_event((args.pmin, args.pmax), (args.angmin, args.angmax), (args.phmin, args.phmax), parts=parts)
+            output.write_event(evt)
+            evt.clear()
+            count += 1
+    else:
+        for mo in args.parray.split(','):
+            p = float(mo.strip())
+            count = 0
+            while count < args.nev:
+                if (count % 1000 == 0):
+                    print("Generated {} events".format(count), end='\r')
+                evt = gen_event((p, p), (args.angmin, args.angmax), parts=parts)
+                output.write_event(evt)
+                evt.clear()
+                count += 1
+
+    print("Generated {} events".format(args.nev))
+    output.close()
+
diff --git a/benchmarks/sampling_ecal/scripts/utils.py b/benchmarks/sampling_ecal/scripts/utils.py
new file mode 100644
index 00000000..2e81c508
--- /dev/null
+++ b/benchmarks/sampling_ecal/scripts/utils.py
@@ -0,0 +1,124 @@
+'''
+    A utility script to help the analysis of imaging calorimeter data
+
+    Author: Chao Peng (ANL)
+    Date: 04/30/2021
+'''
+
+import os
+import ROOT
+import numpy as np
+import pandas as pd
+import matplotlib
+import DDG4
+from ROOT import gROOT, gInterpreter
+
+
+# helper function to truncate color map (for a better view from the rainbow colormap)
+def truncate_colormap(cmap, minval=0.0, maxval=1.0, n=100):
+    new_cmap = matplotlib.colors.LinearSegmentedColormap.from_list(
+        'trunc({n},{a:.2f},{b:.2f})'.format(n=cmap.name, a=minval, b=maxval),
+        cmap(np.linspace(minval, maxval, n)))
+    return new_cmap
+
+
+# load root macros, input is an argument string
+def load_root_macros(arg_macros):
+    for path in arg_macros.split(','):
+        path = path.strip()
+        if os.path.exists(path):
+            gROOT.Macro(path)
+        else:
+            print('\"{}\" does not exist, skip loading it.'.format(path))
+
+
+# read mc particles from root file
+def get_mcp_data(path, evnums=None, branch='mcparticles2'):
+    f = ROOT.TFile(path)
+    events = f.events
+    if evnums is None:
+        evnums = np.arange(events.GetEntries())
+    elif isinstance(evnums, int):
+        evnums = [evnums]
+
+    dbuf = np.zeros(shape=(2000*len(evnums), 6))
+    idb = 0
+    for iev in evnums:
+        if iev >= events.GetEntries():
+            print('Error: event {:d} is out of range (0 - {:d})'.format(iev, events.GetEntries() - 1))
+            continue
+
+        events.GetEntry(iev)
+        # extract full mc particle data
+        for part in getattr(events, branch):
+            dbuf[idb] = (iev, part.psx, part.psy, part.psz, part.pdgID, part.status)
+            idb += 1
+    return pd.DataFrame(data=dbuf[:idb], columns=['event', 'px', 'py', 'pz', 'pid', 'status'])
+
+
+# read hits data from root file
+def get_hits_data(path, evnums=None, branch='EcalBarrelClustersLayers'):
+    f = ROOT.TFile(path)
+    events = f.events
+    if evnums is None:
+        evnums = np.arange(events.GetEntries())
+    elif isinstance(evnums, int):
+        evnums = [evnums]
+
+    dbuf = np.zeros(shape=(2000*len(evnums), 7))
+    idb = 0
+    for iev in evnums:
+        if iev >= events.GetEntries():
+            print('Error: event {:d} is out of range (0 - {:d})'.format(iev, events.GetEntries() - 1))
+            continue
+
+        events.GetEntry(iev)
+        for layer in getattr(events, branch):
+            for k, hit in enumerate(layer.hits):
+                if k < layer.nhits:
+                    dbuf[idb] = (iev, layer.clusterID, layer.layerID, hit.x, hit.y, hit.z, hit.E)
+                    idb += 1
+
+    return pd.DataFrame(data=dbuf[:idb], columns=['event', 'cluster', 'layer', 'x', 'y', 'z', 'edep'])
+
+
+# read layers data from root file
+def get_layers_data(path, evnums=None, branch="EcalBarrelClustersLayers"):
+    f = ROOT.TFile(path)
+    events = f.events
+    if evnums is None:
+        evnums = np.arange(events.GetEntries())
+    elif isinstance(evnums, int):
+        evnums = [evnums]
+
+    dbuf = np.zeros(shape=(2000*len(evnums), 7))
+    idb = 0
+    for iev in evnums:
+        if iev >= events.GetEntries():
+            print('Error: event {:d} is out of range (0 - {:d})'.format(iev, events.GetEntries() - 1))
+            continue
+
+        events.GetEntry(iev)
+        for layer in getattr(events, branch):
+            dbuf[idb] = (iev, layer.clusterID, layer.layerID,
+                         layer.position.x, layer.position.y, layer.position.z, layer.edep)
+            idb += 1
+
+    return pd.DataFrame(data=dbuf[:idb], columns=['event', 'cluster', 'layer', 'x', 'y', 'z', 'edep'])
+
+
+def compact_constants(path, names):
+    if not os.path.exists(path):
+        print('Cannot find compact file \"{}\".'.format(path))
+        return []
+    kernel = DDG4.Kernel()
+    description = kernel.detectorDescription()
+    kernel.loadGeometry("file:{}".format(path))
+    try:
+        vals = [description.constantAsDouble(n) for n in names]
+    except:
+        print('Fail to extract values from {}, return empty.'.format(names))
+        vals = []
+    kernel.terminate()
+    return vals
+
-- 
GitLab