diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index bcccd797d90fcfefbedd54c15b899dace82e4062..1739e4cc03d69d725cded7bba5e630c3895c4531 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -102,7 +102,7 @@ common:detector:
       - runner_system_failure
   interruptible: true
 
-include: 
+include:
   - local: 'benchmarks/ecal/config.yml'
   - local: 'benchmarks/track_finding/config.yml'
   - local: 'benchmarks/track_fitting/config.yml'
diff --git a/benchmarks/imaging_shower_ML/config.yml b/benchmarks/imaging_shower_ML/config.yml
index 15730f2cf08ac161074756ebc420685176502bb2..5d4429b835ec4a99085885fd59ce57bee48f92e4 100644
--- a/benchmarks/imaging_shower_ML/config.yml
+++ b/benchmarks/imaging_shower_ML/config.yml
@@ -1,4 +1,4 @@
-ml_shower:tagging_epimuphka_100:
+ml_shower:epi_separation:
   extends: .rec_benchmark
   stage: benchmarks1
   script:
@@ -7,74 +7,7 @@ ml_shower:tagging_epimuphka_100:
     - |
       if [[ ${DETECTOR} =~ athena
          || ${DETECTOR} =~ ecce && ${DETECTOR_CONFIG} =~ imaging ]] ; then
-        python3 benchmarks/imaging_shower_ML/sim_rec_tag.py -t imcal_epimuphka_100 -n 100 --particles "electron,pion-,muon,photon,kaon-" --pmin 0.5 --pmax 10
+        pip3 install -r rbenchmarks/imaging_shower_ML/equirements.txt
+        python3 benchmarks/imaging_shower_ML/run_benchmark.py -t imcal_epi -n 1000 --pmin 1.8 --pmax 2.2 --nocut-samples
       fi
 
-ml_shower:tagging_epimuphka:
-  extends: .rec_benchmark
-  when: manual
-  stage: benchmarks1
-  script:
-    - ls -hal
-    - |
-      if [[ ${DETECTOR} =~ athena
-         || ${DETECTOR} =~ ecce && ${DETECTOR_CONFIG} =~ imaging ]] ; then
-        python3 benchmarks/imaging_shower_ML/sim_rec_tag.py -t imcal_epimuphka -n 10000 --particles "electron,pion-,muon,photon,kaon-" --pmin 0.5 --pmax 10
-      fi
-
-ml_shower:tagging_e:
-  extends: .rec_benchmark
-  when: manual
-  stage: benchmarks1
-  script:
-    - |
-      if [[ ${DETECTOR} =~ athena
-         || ${DETECTOR} =~ ecce && ${DETECTOR_CONFIG} =~ imaging ]] ; then
-        python3 benchmarks/imaging_shower_ML/sim_rec_tag.py -t imcal_e -n 100 --particles "electron" --pmin 0.5 --pmax 10
-      fi
-
-ml_shower:tagging_pi:
-  extends: .rec_benchmark
-  when: manual
-  stage: benchmarks1
-  script:
-    - |
-      if [[ ${DETECTOR} =~ athena
-         || ${DETECTOR} =~ ecce && ${DETECTOR_CONFIG} =~ imaging ]] ; then
-        python3 benchmarks/imaging_shower_ML/sim_rec_tag.py -t imcal_pi -n 100  --particles "pion-" --pmin 0.5 --pmax 10
-      fi
-
-ml_shower:training_100:
-  extends: .rec_benchmark
-  stage: process
-  needs: ["ml_shower:tagging_epimuphka_100"]#, "ml_shower:tagging_e", "ml_shower:tagging_pi"]
-  script:
-    - python3 -m pip install tensorflow particle
-    - |
-      if [[ ${DETECTOR} =~ athena
-         || ${DETECTOR} =~ ecce && ${DETECTOR_CONFIG} =~ imaging ]] ; then
-        python3 benchmarks/imaging_shower_ML/scripts/ml_training.py -t imcal_epimuphka_100 --pmin 0.5 --pmax 10
-      fi
-
-ml_shower:training:
-  extends: .rec_benchmark
-  stage: process
-  when: manual
-  needs: ["ml_shower:tagging_epimuphka"]#, "ml_shower:tagging_e", "ml_shower:tagging_pi"]
-  script:
-    - python3 -m pip install tensorflow particle
-    - |
-      if [[ ${DETECTOR} =~ athena
-         || ${DETECTOR} =~ ecce && ${DETECTOR_CONFIG} =~ imaging ]] ; then
-        python3 benchmarks/imaging_shower_ML/scripts/ml_training.py -t imcal_epimuphka --pmin 0.5 --pmax 10
-      fi
-
-ml_shower:test:
-  extends: .rec_benchmark
-  when: manual
-  stage: collect
-  needs: ["ml_shower:training"]
-  script:
-    - ls -lrth
-    # TODO
-
diff --git a/benchmarks/imaging_shower_ML/scripts/prepare_tf_dataset.py b/benchmarks/imaging_shower_ML/deprecated/prepare_tf_dataset.py
similarity index 100%
rename from benchmarks/imaging_shower_ML/scripts/prepare_tf_dataset.py
rename to benchmarks/imaging_shower_ML/deprecated/prepare_tf_dataset.py
diff --git a/benchmarks/imaging_shower_ML/sim_rec_tag.py b/benchmarks/imaging_shower_ML/deprecated/sim_rec_tag.py
similarity index 100%
rename from benchmarks/imaging_shower_ML/sim_rec_tag.py
rename to benchmarks/imaging_shower_ML/deprecated/sim_rec_tag.py
diff --git a/benchmarks/imaging_shower_ML/options/imaging_ml_data.py b/benchmarks/imaging_shower_ML/options/imaging_ml_data.py
index 096f68da5e6269b0e44a5c66b78fa7be14de372f..15a9d5b793815a965ab82e30c54b93bc78bf1a2d 100644
--- a/benchmarks/imaging_shower_ML/options/imaging_ml_data.py
+++ b/benchmarks/imaging_shower_ML/options/imaging_ml_data.py
@@ -9,9 +9,9 @@ from GaudiKernel.SystemOfUnits import MeV, GeV, mm, cm, mrad
 from Configurables import Jug__Digi__CalorimeterHitDigi as CalHitDigi
 from Configurables import Jug__Reco__CalorimeterHitReco as CalHitReco
 from Configurables import Jug__Reco__CalorimeterHitsMerger as CalHitsMerger
-from Configurables import Jug__Reco__ImagingPixelMerger as MLDataMerger
-from Configurables import Jug__Reco__ImagingPixelDataSorter as MLDataSorter
-from Configurables import Jug__Reco__ImagingPixelDataCombiner as MLDataCombiner
+# from Configurables import Jug__Reco__ImagingPixelMerger as MLDataMerger
+# from Configurables import Jug__Reco__ImagingPixelDataSorter as MLDataSorter
+# from Configurables import Jug__Reco__ImagingPixelDataCombiner as MLDataCombiner
 
 
 # input arguments through environment variables
@@ -20,9 +20,6 @@ kwargs['input'] = os.environ.get('JUGGLER_SIM_FILE', '')
 kwargs['output'] = os.environ.get('JUGGLER_REC_FILE', '')
 kwargs['compact'] = os.environ.get('JUGGLER_COMPACT_PATH', '')
 kwargs['nev'] = int(os.environ.get('JUGGLER_N_EVENTS', -1))
-kwargs['combine'] = os.environ.get('IMCAL_ML_COMBINE', 'concatenate')
-kwargs['img_nlayers'] = int(os.environ.get('IMCAL_ML_IMG_NLAYERS', 9))
-kwargs['nhits'] = int(os.environ.get('IMCAL_ML_NHITS', 20))
 
 if kwargs['nev'] < 1:
     f = ROOT.TFile(kwargs['input'])
@@ -34,56 +31,66 @@ sf = float(os.environ.get('JUGGLER_SAMP_FRAC', '1.0'))
 geo_service  = GeoSvc('GeoSvc', detectors=[f.strip() for f in kwargs['compact'].split(',')])
 podev = EICDataSvc('EventDataSvc', inputs=[f.strip() for f in kwargs['input'].split(',')])
 
-podin = PodioInput('PodioReader', collections=['MCParticles', 'EcalBarrelImagingHits', 'EcalBarrelImagingHitsContributions', 'EcalBarrelScFiHits', 'EcalBarrelScFiHitsContributions'])
+podin = PodioInput('PodioReader', collections=[
+    'MCParticles',
+    'EcalBarrelImagingHits',
+    'EcalBarrelImagingHitsContributions',
+    'EcalBarrelScFiHits',
+    'EcalBarrelScFiHitsContributions'
+    ])
 podout = PodioOutput('out', filename=kwargs['output'])
 
-# Central Barrel Ecal (Imaging Cal.)
+# Barrel Ecal AstroPix
 becal_img_daq = dict(
         dynamicRangeADC=3*MeV,
-        capacityADC=8192,
+        capacityADC=2**13,
         pedestalMean=400,
-        pedestalSigma=20)   # about 6 keV
+        pedestalSigma=20
+        )   # about 6 keV pedestal
 
-becal_img_digi = CalHitDigi('becal_img_digi',
+becal_img_digi = CalHitDigi(
+        'becal_img_digi',
         inputHitCollection='EcalBarrelImagingHits',
         outputHitCollection='EcalBarrelImagingHitsDigi',
         energyResolutions=[0., 0.02, 0.],   # 2% flat resolution
-        **becal_img_daq)
+        **becal_img_daq
+        )
 
-becal_img_reco = CalHitReco('becal_img_reco',
+becal_img_reco = CalHitReco(
+        'becal_img_reco',
         inputHitCollection=becal_img_digi.outputHitCollection,
         outputHitCollection='EcalBarrelImagingHitsReco',
         thresholdFactor=3,  # about 20 keV
         readoutClass='EcalBarrelImagingHits',  # readout class
         layerField='layer',             # field to get layer id
         sectorField='module',           # field to get sector id
-        **becal_img_daq)
+        **becal_img_daq
+        )
 
-becal_img_merger = MLDataMerger('becal_img_merger',
+"""
+becal_img_merger = MLDataMerger(
+        'becal_img_merger',
         inputHits=becal_img_reco.outputHitCollection,
         outputHits='EcalBarrelImagingHitsSeg',
         etaSize=0.001,
-        phiSize=0.001)
+        phiSize=0.001
+        )
 
 becal_img_sorter = MLDataSorter('becal_img_sorter',
         inputHitCollection=becal_img_merger.outputHits,
         outputHitCollection='EcalBarrelImagingHitsML',
-        numberOfLayers=kwargs['img_nlayers'],
-        numberOfHits=kwargs['nhits'])
+        numberOfLayers=6,
+        numberOfHits=20
+        )
+"""
 
-
-#Central ECAL SciFi
+# Barrel ECAL ScFi
 # use the same daq_setting for digi/reco pair
 becal_scfi_daq = dict(
         dynamicRangeADC=50.*MeV,
-        capacityADC=32768,
+        capacityADC=2**15,
         pedestalMean=400,
         pedestalSigma=10)
-# becal_scfi_daq = dict(
-#         dynamicRangeADC=50.*MeV,
-#         capacityADC=2**16,
-#         pedestalMean=100,
-#         pedestalSigma=0)
 
 becal_scfi_digi = CalHitDigi('becal_scfi_digi',
         inputHitCollection='EcalBarrelScFiHits',
@@ -105,25 +112,25 @@ becal_scfi_merger = CalHitsMerger('becal_scfi_merger',
         # OutputLevel=DEBUG,
         inputHitCollection=becal_scfi_reco.outputHitCollection,
         outputHitCollection='EcalBarrelScFiGridReco',
-        fields=['fiber'],
-        fieldRefNumbers=[1],
+        fields=['fiber', 'z'],
+        fieldRefNumbers=[1, 1],
         readoutClass='EcalBarrelScFiHits')
 
+"""
 becal_scfi_sorter = MLDataSorter('becal_scfi_sorter',
         inputHitCollection=becal_scfi_merger.outputHitCollection,
         outputHitCollection='EcalBarrelScFiHitsML',
         numberOfLayers=20,
-        numberOfHits=kwargs['nhits'])
+        numberOfHits=20)
 
 # combine layers
 becal_combiner = MLDataCombiner('becal_combiner',
         inputHits1=becal_img_sorter.outputHitCollection,
         inputHits2=becal_scfi_sorter.outputHitCollection,
-        outputHits='EcalBarrelImagingHitsCombinedML',
+        outputHits='EcalBarrelCombinedHitsML',
         layerIncrement=100,
-        rule=kwargs['combine'])
-
-        
+        rule='concatenate')
+"""
 
 podout.outputCommands = [
 #        'keep *',
@@ -135,11 +142,15 @@ podout.outputCommands = [
 ]
 
 ApplicationMgr(
-    TopAlg=[podin,
-            becal_img_digi, becal_img_reco, becal_img_merger, becal_img_sorter,
-            becal_scfi_digi, becal_scfi_reco, becal_scfi_merger, becal_scfi_sorter,
-            becal_combiner,
-            podout],
+    TopAlg=[
+        podin,
+        becal_img_digi, becal_img_reco,
+        # becal_img_merger, becal_img_sorter,
+        becal_scfi_digi, becal_scfi_reco, becal_scfi_merger,
+        # becal_scfi_sorter,
+        # becal_combiner,
+        podout
+        ],
     EvtSel='NONE',
     EvtMax=kwargs['nev'],
     ExtSvc=[podev],
diff --git a/benchmarks/imaging_shower_ML/requirements.txt b/benchmarks/imaging_shower_ML/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4088c9a02f807b1d8433fc29813c14f0519d7601
--- /dev/null
+++ b/benchmarks/imaging_shower_ML/requirements.txt
@@ -0,0 +1,2 @@
+tensorflow >= 2.10.0
+tables >= 3.7.0
diff --git a/benchmarks/imaging_shower_ML/run_benchmark.py b/benchmarks/imaging_shower_ML/run_benchmark.py
new file mode 100755
index 0000000000000000000000000000000000000000..2b2ee476290dd5c6c52d54beb02899f454eaabd6
--- /dev/null
+++ b/benchmarks/imaging_shower_ML/run_benchmark.py
@@ -0,0 +1,267 @@
+#! /usr/local/bin/python3
+'''
+    A python script to facilitate the ML benchmarks for e/pi separation with the imaging calorimeter (single particles).
+    This process follows the steps below:
+    1. Simulation to generate training samples
+    2. Study and apply E/p cut to reduce the training samples
+    3. Train and test ML models with the "cleaned" (after E/p cut) samples
+    4. Benchmark the performance
+
+    Author: Chao Peng (ANL)
+    Date: 11/11/2022
+'''
+import os
+import sys
+import subprocess
+import argparse
+
+
+SDIR = os.path.dirname(os.path.realpath(__file__))
+# {var} is from args
+FILE_NAMES = dict(
+    gen_script = os.path.join(SDIR, 'scripts', 'gen_particles.py'),
+    rec_script = os.path.join(SDIR, 'options', 'imaging_ml_data.py'),
+    epscan_script = os.path.join(SDIR, 'scripts', 'epcut_scan.py'),
+    prep_script = os.path.join(SDIR, 'scripts', 'ml_data_preprocess.py'),
+    ml_script = os.path.join(SDIR, 'scripts', 'ml_training.py'),
+
+    sim_dir = os.path.join('{outdir}', 'sim_data'),
+    epscan_dir = os.path.join('{outdir}', 'epcut'),
+    ml_dir = os.path.join('{outdir}', 'ml_result'),
+
+    gen_file = os.path.join('{outdir}', 'sim_data', '{ntag}_gen.hepmc'),
+    sim_file = os.path.join('{outdir}', 'sim_data', '{ntag}_sim.edm4hep.root'),
+    rec_file = os.path.join('{outdir}', 'sim_data', '{ntag}_rec.root'),
+    ml_file = os.path.join('{outdir}', 'sim_data', '{ntag}_data.h5'),
+)
+# default values for argument parser
+DEFAULT_COMPACT = os.path.join(
+        os.environ.get('DETECTOR_PATH', ''),
+        '{}.xml'.format(os.environ.get('DETECTOR_CONFIG', ''))
+        )
+# defined steps
+SCRIPT_STEPS = (
+    'sim',      # step 1; simulation to generate samples
+    'epscan',   # step 2: e/p cut scan to find the optimized cut
+    'prep',     # step 3: prepare tagging data (apply e/p cut)
+    'train',    # step 4: train ML model and benchmark it
+    # 'bmk',      # step 5: benchmarking
+)
+
+
+# simulation and reconstruction
+def gen_sim_rec(**kwargs):
+    # generate particles
+    gen_cmd = [
+        'python {gen_script} {gen_file}',
+        '-n {nev}',
+        '-s {seed}',
+        '--angmin {angmin} --angmax {angmax}',
+        '--pmin {pmin} --pmax {pmax}',
+        '--particles {particles}',
+        ]
+    gen_cmd = ' '.join(gen_cmd).format(**kwargs).split(' ')
+    subprocess.run(gen_cmd)
+
+    # simulation
+    sim_cmd = [
+        'ddsim --runType batch --part.minimalKineticEnergy 1*TeV --filter.tracker edep0',
+        '-v WARNING',
+        '--numberOfEvents {nev}',
+        # '--physics.list {physics_list}',
+        '--inputFiles {gen_file}',
+        '--outputFile {sim_file}',
+        '--compact {compact}',
+        ]
+    if 'seed' in kwargs and kwargs['seed'] > 0:
+        sim_cmd += ['--random.seed {seed}']
+    sim_cmd = ' '.join(sim_cmd).format(**kwargs).split(' ')
+    return_code = subprocess.run(sim_cmd).returncode
+    print(return_code)
+    if return_code is not None and return_code < 0:
+        print("ERROR running simulation!")
+        exit(return_code)
+    subprocess.run(['rootls', '-t', kwargs['sim_file']])
+
+    # reconstruction with juggler
+    # export to environment variables (used to pass arguments to the option file)
+    run_env = os.environ.copy()
+    juggler_vars = [
+        'JUGGLER_SIM_FILE {sim_file}',
+        'JUGGLER_REC_FILE {rec_file}',
+        'JUGGLER_COMPACT_PATH {compact}',
+        'JUGGLER_N_EVENTS {nev}',
+        ]
+    lst = ' '.join(juggler_vars).format(**kwargs).split(' ')
+    run_env.update({lst[i]: lst[i + 1] for i in range(0, len(lst), 2)})
+
+    rec_cmd = 'gaudirun.py {rec_script}'.format(**kwargs).split(' ')
+    print(rec_cmd)
+    return_code = subprocess.run(rec_cmd, env=run_env).returncode
+    print(return_code)
+    if return_code is not None and return_code < 0:
+        print("ERROR running juggler (reconstruction)!")
+        exit(return_code)
+    process = subprocess.run(['rootls', '-t', kwargs['rec_file']])
+
+
+
+if __name__ == '__main__':
+
+    # argument parser
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+            '-n', '--n-events', type=int,
+            dest='nev',
+            default=100,
+            help='number of events.'
+            )
+    parser.add_argument(
+            '-o', '--outdir', type=str,
+            dest='outdir',
+            default='sim_output',
+            help='output directory.'
+            )
+    parser.add_argument(
+            '-t', '--name-tag', type=str,
+            dest='ntag',
+            default='imcal_ml',
+            help='name tag for output files.'
+            )
+    parser.add_argument(
+            '-c', '--compact', type=str,
+            dest='compact',
+            default=DEFAULT_COMPACT,
+            help='path to detector compact file.'
+            )
+    parser.add_argument(
+            '-s', '--seed', type=int,
+            default=-1,
+            help='random seed to child scripts (only pass it if > 0).'
+            )
+    parser.add_argument(
+            '--batch-size', type=int,
+            dest='batch',
+            default=100000,
+            help='batch size to process data.'
+            )
+    parser.add_argument(
+            '--p-min', type=float,
+            dest='pmin',
+            default=1.5,
+            help='minimum momentum of particles.'
+            )
+    parser.add_argument(
+            '--p-max', type=float,
+            dest='pmax',
+            default=2.5,
+            help='maximum momentum of particles.'
+            )
+    parser.add_argument(
+            '--angle-min', type=float,
+            dest='angmin',
+            default=75,
+            help='minimum scattering angle of particles.'
+            )
+    parser.add_argument(
+            '--angle-max', type=float,
+            dest='angmax',
+            default=105,
+            help='maximum scattering angle of particles.'
+            )
+    parser.add_argument(
+            '--weight-pion', type=float,
+            dest='wpi',
+            default=10.,
+            help='weights for pions.'
+            )
+    parser.add_argument(
+            '--particles', type=str,
+            default='electron,pion-',
+            help='partcile names, separated by \",\".'
+            )
+    parser.add_argument(
+            '--test-size', type=float,
+            default=0.2,
+            help='relative size of test samples (between 0-1).'
+            )
+    parser.add_argument(
+            '--no-epcut', action='store_true',
+            default=False,
+            help='do not apply E/p cut to ML samples, might be useful for test runs with small samples.'
+            )
+    parser.add_argument(
+            '--steps', type=str,
+            default=', '.join(SCRIPT_STEPS),
+            help='FOR DEV: choose the steps to be executed ({}).'.format(', '.join(SCRIPT_STEPS))
+            )
+
+    args = parser.parse_args()
+    kwargs = vars(args)
+
+    # prepare
+    steps = [p.strip() for p in args.steps.split(',')]
+
+    # make dirs, add paths to kwargs
+    FILE_NAMES.update({key: val.format(**kwargs) for key, val in FILE_NAMES.items()})
+    for key, val in FILE_NAMES.items():
+        if key.endswith('_dir'):
+            os.makedirs(val, exist_ok=True)
+    kwargs.update(FILE_NAMES)
+
+    # simulation for benchmark samples
+    if SCRIPT_STEPS[0] in steps:
+        gen_sim_rec(**kwargs)
+
+    # ep cut scan
+    if SCRIPT_STEPS[1] in steps:
+        cmd = [
+            'python {epscan_script} {rec_file}',
+            '-o {epscan_dir} --name-tag {ntag}',
+            '--batch-size {batch}',
+            '-s {seed}',
+            '-e 0.97'
+            ]
+        cmd = ' '.join(cmd).format(**kwargs).split(' ')
+        return_code = subprocess.run(cmd).returncode
+        if return_code is not None and return_code < 0:
+            print("ERROR running epcut scan script!")
+            exit(return_code)
+
+    # prepare ML data
+    if SCRIPT_STEPS[2] in steps:
+        cmd = [
+            'python {prep_script} {rec_file}',
+            '-o {ml_file}',
+            '--batch-size {batch}',
+            '-s {seed}',
+            '--nhits 50',
+            ]
+        if not args.no_epcut:
+            cmd.append('--epcut {epscan_dir}/{ntag}_result.json')
+        cmd = ' '.join(cmd).format(**kwargs).split(' ')
+        return_code = subprocess.run(cmd).returncode
+        if return_code is not None and return_code < 0:
+            print("ERROR running data preprocessing script!")
+            exit(return_code)
+
+    # train ML model and benchmarking
+    if SCRIPT_STEPS[3] in steps:
+        cmd = [
+            'python {ml_script} {ml_file}',
+            '-o {ml_dir} --name-tag {ntag}',
+            '--weight-pion-minus {wpi}',
+            '-s {seed}',
+            '--batch-size 256',
+            '--epochs 20',
+            '--test-size {test_size}',
+            ]
+        cmd = ' '.join(cmd).format(**kwargs).split(' ')
+        return_code = subprocess.run(cmd).returncode
+        if return_code is not None and return_code < 0:
+            print("ERROR running ML training script!")
+            exit(return_code)
+
+    # TODO: collect the two-step results (e/p cut and ML)
+
diff --git a/benchmarks/imaging_shower_ML/scripts/check_edep_dists.py b/benchmarks/imaging_shower_ML/scripts/check_edep_dists.py
deleted file mode 100644
index 8e4932cdbb060fa169e41a6599476e14ef677b45..0000000000000000000000000000000000000000
--- a/benchmarks/imaging_shower_ML/scripts/check_edep_dists.py
+++ /dev/null
@@ -1,135 +0,0 @@
-import ROOT
-import os
-import gc
-import numpy as np
-import pandas as pd
-import argparse
-import sys
-import matplotlib.pyplot as plt
-
-
-# read from RDataFrame and flatten a given collection, return pandas dataframe
-def flatten_collection(rdf, collection, cols=None, event_colname='event'):
-    if not cols:
-        cols = [str(c) for c in rdf.GetColumnNames() if str(c).startswith('{}.'.format(collection))]
-    else:
-        cols = ['{}.{}'.format(collection, c) for c in cols]
-    if not cols:
-        print('cannot find any branch under collection {}'.format(collection))
-        return pd.DataFrame()
-
-    data = rdf.AsNumpy(cols)
-    # flatten the data, add an event id to identify clusters from different events
-    evns = []
-    for i, vec in enumerate(data[cols[0]]):
-        evns += [i]*vec.size()
-    for n, vals in data.items():
-        # make sure ints are not converted to floats
-        typename = vals[0].__class__.__name__.lower()
-        dtype = np.int64 if 'int' in typename or 'long' in typename else np.float32
-        # type safe creation
-        data[n] = np.asarray([v for vec in vals for v in vec], dtype=dtype)
-    # build data frame
-    dfp = pd.DataFrame({c: pd.Series(v) for c, v in data.items()})
-    dfp.loc[:, event_colname] = evns
-    return dfp
-
-
-# load root macros, input is an argument string
-def load_root_macros(arg_macros):
-    for path in arg_macros.split(','):
-        path = path.strip()
-        if os.path.exists(path):
-            ROOT.gROOT.Macro(path)
-        else:
-            print('\"{}\" does not exist, skip loading it.'.format(path))
-
-
-def cartesian_to_polar(x, y, z):
-    r = np.sqrt(x**2 + y**2 + z**2)
-    rc = np.sqrt(x**2 + y**2)
-    theta = np.arccos(z / r)
-    phi = np.arctan2(y, x)
-    eta = -np.log(np.tan(theta / 2.))
-    return r, theta, phi, rc, eta
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser()
-    parser.add_argument('file', type=str, help='path to root file')
-    parser.add_argument('--sim', action='store_true', help='flag switch between sim and rec data (default rec)')
-    parser.add_argument('-o', type=str, default='.', dest='outdir', help='output directory')
-    # parser.add_argument('--compact', type=str, default='', dest='compact', help='compact file')
-    parser.add_argument('-m', '--macros', type=str, default='rootlogon.C', dest='macros',
-                         help='root macros to load (accept multiple paths separated by \",\")')
-    parser.add_argument('--branch', type=str, default='EcalBarrelImagingHitsReco', help='name of data branch (edm4eic::CalorimeterHitCollection)')
-    parser.add_argument('--truth-branch', type=str, default='MCParticles', help='name of truth mc branch')
-    parser.add_argument('--edep-max', type=float, default=0., help='maximum edep (GeV) to plot')
-    parser.add_argument('--edep-nbins', type=int, default=200, help='number of bins')
-    parser.add_argument('--name-tag', type=str, default='test', help='name tag to save the file')
-    parser.add_argument('--samp-frac', type=float, default=1.0, help='sampling fraction')
-    args = parser.parse_args()
-
-    os.makedirs(args.outdir, exist_ok=True)
-    load_root_macros(args.macros)
-
-    # read data and MCParticles
-    rdf = ROOT.RDataFrame("events", args.file)
-
-    mc_branch = args.truth_branch
-    dfm = flatten_collection(rdf, mc_branch, ['generatorStatus', 'PDG', 'momentum.x', 'momentum.y', 'momentum.z', 'mass'])
-    dfm.rename(columns={c: c.replace(mc_branch + '.', '') for c in dfm.columns}, inplace=True)
-    # selete incident particles
-    dfm = dfm[dfm['generatorStatus'].isin([0, 1])]
-    # NOTE: assumed single particles
-    dfm = dfm.groupby('event').first()
-    # p, theta, phi, pT, eta = cartesian_to_polar(*dfm[['momentum.x', 'momentum.y', 'momentum.z']].values.T)
-
-    if args.sim:
-        df = flatten_collection(rdf, args.branch, ['energyDeposit'])
-        df.rename(columns={c: c.replace(args.branch + '.', '') for c in df.columns}, inplace=True)
-        df.rename(columns={'energyDeposit': 'energy'}, inplace=True)
-    else:
-        df = flatten_collection(rdf, args.branch, ['layer', 'energy', 'position.x', 'position.y', 'position.z'])
-        df.rename(columns={c: c.replace(args.branch + '.', '') for c in df.columns}, inplace=True)
-
-    dfe = df.groupby('event')['energy'].sum().reset_index()
-    # determine histrogram bins
-    if args.edep_max <= 0.:
-        args.edep_max = dfe['energy'].quantile(0.99)*1.2
-    bins = np.linspace(0., args.edep_max, args.edep_nbins + 1)
-    bincenters = (bins[1:] + bins[:-1])/2.
-
-    # get particle types
-    fig, ax = plt.subplots(figsize=(16, 9), dpi=120, gridspec_kw={'left': 0.15, 'right': 0.95})
-    ax.set_xlabel('Energy Deposit / {:.2f} (GeV)'.format(args.samp_frac), fontsize=24)
-    ax.set_ylabel('Normalized Counts', fontsize=24)
-    ax.set_yscale('log')
-    ax.grid(linestyle=':')
-    ax.tick_params(labelsize=24)
-    ax.set_axisbelow(True)
-
-    hist_vals, hist_cols = [], []
-    pdgbase = ROOT.TDatabasePDG()
-    for pdgid in dfm['PDG'].unique():
-        particle = pdgbase.GetParticle(int(pdgid))
-        if not particle:
-            print("Unknown pdgcode {}, they are ignored".format(int(pdgid)))
-            continue
-        events_indices = dfm[dfm.loc[:, 'PDG'] == pdgid].index.unique()
-        print("{} entries of particle {}".format(len(events_indices), particle.GetName()))
-
-        dfe_part = dfe.loc[dfe['event'].isin(events_indices)]
-
-        edep_vals, _, _ = ax.hist(dfe_part['energy'] / args.samp_frac,
-                                  weights = [1. / dfe_part.shape[0]]*dfe_part.shape[0],
-                                  histtype='step', bins=bins, label=particle.GetName())
-        hist_vals.append(edep_vals)
-        hist_cols.append(particle.GetName())
-
-    pd.DataFrame(index=bincenters, data=np.vstack(hist_vals).T, columns=hist_cols)\
-      .to_csv('{}.csv'.format(args.name_tag))
-    ax.legend(fontsize=24)
-    fig.savefig('{}.png'.format(args.name_tag))
-
-
diff --git a/benchmarks/imaging_shower_ML/scripts/epcut_scan.py b/benchmarks/imaging_shower_ML/scripts/epcut_scan.py
new file mode 100644
index 0000000000000000000000000000000000000000..f153d70751991e8348eff6b5078c7c19055c91bf
--- /dev/null
+++ b/benchmarks/imaging_shower_ML/scripts/epcut_scan.py
@@ -0,0 +1,229 @@
+'''
+    A script to scan the optimized cut on layer and E/p.
+    It scan all the possible ScFi layers (20 in the EPIC brycecanyon configuration)
+    The results give the best cut (highest pion rejection) on [layer, E/p] with a targeted electron efficiency
+
+    Chao Peng (ANL)
+    2022/11/13
+'''
+import os
+import gc
+import sys
+import json
+import ROOT
+import argparse
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import matplotlib.backends.backend_pdf as mpdf
+from collections import OrderedDict
+from utils import flatten_collection, imcal_info
+
+
+# default color cycle of matplotlib
+prop_cycle = plt.rcParams['axes.prop_cycle']
+colors = prop_cycle.by_key()['color']
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+            'file', type=str,
+            help='path to root file.'
+            )
+    parser.add_argument(
+            '-o', type=str,
+            default='epcuts',
+            dest='outdir',
+            help='output directory.'
+            )
+    parser.add_argument(
+            '--batch-size', type=int,
+            default=100000,
+            help='batch size to process data.'
+            )
+    parser.add_argument(
+            '--name-tag', type=str,
+            default='imcal',
+            dest='ntag',
+            help='nametag for output file.'
+            )
+    # parser.add_argument('--compact', type=str, default='', dest='compact', help='compact file')
+    parser.add_argument(
+            '-b', '--branch', type=str,
+            default='EcalBarrelScFiGridReco',
+            help='name of data branch (edm4eic::CalorimeterHitCollection).'
+            )
+    parser.add_argument(
+            '-b2', '--truth-branch', type=str,
+            default='MCParticles',
+            help='name of truth mc branch.'
+            )
+    parser.add_argument(
+            '-s', '--seed', type=int,
+            default=-1,
+            help='random seed.'
+            )
+    # flat resolution now, may need add P dependent terms?
+    parser.add_argument(
+            '-r', '--tracking-resolution', type=float,
+            dest='res',
+            default=5e-3,
+            help='relative resolution of tracking (used to smear truth momentum).'
+            )
+    parser.add_argument(
+            '-e', '--efficiency', type=float,
+            dest='eff',
+            default=0.97,
+            help='targeted efficiency from the cuts.'
+            )
+    parser.add_argument(
+            '--sampling-fractions', type=str,
+            default='0,0.11,2200',
+            help='sampling fractions range (min,max,nbins).'
+            )
+    args = parser.parse_args()
+
+    os.makedirs(args.outdir, exist_ok=True)
+
+    if args.seed > 0:
+        np.random.seed(args.seed)
+
+    # read data and MCParticles
+    rdf = ROOT.RDataFrame('events', args.file)
+    # event range
+    ntotal = rdf.Count().GetValue()
+    ev_bins = np.append(np.arange(0, ntotal, args.batch_size), [ntotal])
+
+    # data container
+    nlayers = int(imcal_info.nlayers_scfi)
+    samps = args.sampling_fractions.split(',')
+    samp_range = (float(samps[0]), float(samps[1]))
+    samp_nbins = int(samps[2])
+    # print(nlayers)
+    ep_bins = np.linspace(*samp_range, samp_nbins + 1)
+    ep_centers = (ep_bins[:-1] + ep_bins[1:])/2.
+    el_hist = np.zeros(shape=(nlayers, samp_nbins))
+    pi_hist = np.zeros(shape=(nlayers, samp_nbins))
+
+    # process events
+    # avoid insane amount of memory use with a large dataset
+    for ev_begin, ev_end in zip(ev_bins[:-1], ev_bins[1:]):
+        gc.collect()
+        print('E/p cut: processing events {:d} - {:d}'.format(ev_begin, ev_end))
+
+        dfm = flatten_collection(rdf, args.truth_branch, (int(ev_begin), int(ev_end)), cols=[
+                'generatorStatus',
+                'PDG',
+                'momentum.x', 'momentum.y', 'momentum.z',
+                'mass'
+                ])
+
+        # select incident particles
+        dfm = dfm[dfm['generatorStatus'].isin([0, 1])]
+        # NOTE: assumed single particles
+        dfm = dfm.groupby('event').first().reset_index()
+        # True momentum
+        dfm.loc[:, 'P'] = np.sqrt(dfm['momentum.x']**2 + dfm['momentum.y']**2 + dfm['momentum.z']**2)
+        # Smear with tracking resolution (0.5%)
+        dfm.loc[:, 'Ptrack'] = dfm['P']*np.random.normal(1.0, args.res, len(dfm))
+        # print(dfm[['PDG', 'P', 'Ptrack']])
+        # print(dfm['PDG'].value_counts())
+
+        # reconstructed simulation hits
+        df = flatten_collection(rdf, args.branch, (int(ev_begin), int(ev_end)), cols=[
+                'layer',
+                'energy',
+                ])
+        # cumulative energy sum for layer
+        dfe = df.groupby(['event', 'layer'])['energy'].sum().groupby(level=0).cumsum()
+        # fill missing layer (no hits in that layer) edep values with the previous value
+        dfe = dfe.reset_index(level='event').groupby('event', as_index=False)\
+                 .apply(lambda grp: grp.reindex(np.arange(nlayers) + 1, method='ffill'))\
+                 .reset_index().drop('level_0', axis=1)
+        # print(dfe)
+        dfe = dfe.reset_index().merge(dfm[['event', 'Ptrack', 'PDG']], on='event')
+        dfe.loc[:, 'eoverp'] = dfe['energy']/dfe['Ptrack']
+
+        # NOTE: assumed only e- and pi-
+        el_mask = dfe['PDG'] == 11
+        dfel = dfe[el_mask]
+        dfpi = dfe[~el_mask]
+
+        # fill data layer by layer
+        for hist, dftmp in zip([el_hist, pi_hist], [dfel, dfpi]):
+            for i in np.arange(nlayers):
+                vals = dftmp[dftmp['layer'] == i + 1]['eoverp'].values
+                hvals, _ = np.histogram(vals, bins=ep_bins)
+                hist[i] += hvals
+
+    # save binned data
+    for hist, label in zip([el_hist, pi_hist], ['el', 'pi']):
+        dfr = pd.DataFrame(
+                columns=['bin_left', 'bin_right'] + ['layer_{:d}'.format(i+1) for i in np.arange(nlayers)],
+                data=np.vstack([ep_bins[:-1], ep_bins[1:], hist]).T
+                )
+        dfr.to_csv(os.path.join(args.outdir, '{}_eop_{}.csv'.format(args.ntag, label)), index=False)
+        # print(label, np.sum(hist, axis=1))
+        # print(dfr)
+
+
+    # study the epcut performance with binned data
+    best = {
+        'layer': int(nlayers + 1),
+        'ep_cut': 0.,
+        'el_eff': 0.,
+        'pi_rej': 0.,
+        }
+    ep_dict = OrderedDict([
+        ('info', {
+            'nsamples': int(ntotal),
+            'targeted_efficiency': args.eff,
+            'tracking_resolution': args.res
+            }),
+        ('best', best),
+        ])
+    pdf = mpdf.PdfPages(os.path.join(args.outdir, '{}_layers.pdf'.format(args.ntag)))
+    for i in np.arange(nlayers):
+        elvals, pivals = el_hist[i], pi_hist[i]
+        # cut position
+        # NOTE: larger error with wider bins
+        perc = np.cumsum(elvals)/np.sum(elvals)
+        idx = len(perc[perc < 1. - args.eff])
+        ep_cut = ep_centers[idx]
+        # efficiency
+        eff = np.sum(elvals[idx:])/np.sum(elvals)
+        # rejection power
+        rej = np.sum(pivals[:idx])/np.sum(pivals)
+        ep_dict['layer_{:d}'.format(i + 1)] = {'ep_cut': ep_cut, 'el_eff': eff, 'pi_rej': rej}
+        # greater or [equal] here because always favor the cut on more layers
+        if rej >= best['pi_rej']:
+            best.update({
+                'layer': int(i + 1),
+                'ep_cut': ep_cut,
+                'el_eff': eff,
+                'pi_rej': rej,
+                })
+        # plot showing the results
+        fig, ax = plt.subplots(figsize=(8, 8))
+        ax.hist(ep_centers, weights=pivals, bins=50, label='$\pi^-$', color=colors[0], ec=colors[0], alpha=0.5)
+        ax.hist(ep_centers, weights=elvals, bins=50, label='$e^-$', color=colors[1], ec=colors[1], alpha=0.5)
+        ax.legend(fontsize=20, ncol=2, loc='upper center', bbox_to_anchor=(0.5, 1.12),)
+        ax.tick_params(labelsize=20)
+        ax.set_xlabel('$E/p$', fontsize=20)
+        ax.set_ylabel('Counts', fontsize=20)
+        ax.axvline(x=ep_cut, color='k', ls='--', lw=2)
+        props = dict(boxstyle='round', facecolor='white', alpha=0.5)
+        ax.text(0.5, 0.97,
+                'Layer $\leq${:d}\n$\epsilon_e={:.2f}$%\n$R_{{\pi}}={:.2f}$%'.format(i + 1, eff*100., rej*100.),
+                transform=ax.transAxes, fontsize=20, va='top', ha='center', bbox=props)
+        pdf.savefig(fig)
+    pdf.close()
+
+    # save cut position and performance
+    ep_dict.update({'best': best})
+    ep_json = json.dumps(ep_dict, indent=4)
+    with open(os.path.join(args.outdir, '{}_result.json'.format(args.ntag)), 'w') as outfile:
+        outfile.write(ep_json)
+
diff --git a/benchmarks/imaging_shower_ML/scripts/ml_data_preprocess.py b/benchmarks/imaging_shower_ML/scripts/ml_data_preprocess.py
new file mode 100644
index 0000000000000000000000000000000000000000..88fdf6427e0f0d3aa74eb672f7152237f31d2fa8
--- /dev/null
+++ b/benchmarks/imaging_shower_ML/scripts/ml_data_preprocess.py
@@ -0,0 +1,288 @@
+'''
+    A script to prepare datasets for ML training
+'''
+import ROOT
+import os
+import gc
+import sys
+import json
+import numpy as np
+import pandas as pd
+import argparse
+from utils import flatten_collection, cartesian_to_polar, imcal_info
+
+
+pd.set_option('display.max_rows', 500)
+
+# normalizing range for features
+# NOTE: assumed for barrel ecal, using the range of 0.5 - 2 meters to normalize the r0 values
+WIN_ETA = (-0.2, 0.2) # unitless
+WIN_PHI = (-0.4, 0.4) # rad
+WIN_R0 = (500, 2000) # mm
+WIN_EH = (0, 0.05) # GeV
+
+
+# only positive weights are returned
+# basis defines the minimum contribution (4.6 is about 1%)
+def log_weights(val, basis=4.6):
+    w = np.log(val) + basis
+    return np.clip(w, 0., None)
+
+
+def read_epcut(path):
+    if not path or not os.path.exists(path):
+        print('Prepare ML data: not applying E/p cut because E/p file is not provided or missing.')
+        return None
+    with open(args.epcut, 'r') as f:
+        data = json.load(f)
+        epcut = data.get('best', {})
+        epcut.update(data.get('info', {}))
+        return epcut
+
+
+def lin_norm(vals, window, val_name='', var_name='', warn_win_size=True):
+    # statistically meaningful
+    if warn_win_size and len(vals) > 1000:
+        perc = (np.percentile(vals, 5), np.percentile(vals, 95))
+        if perc[0] < window[0] or perc[1] > window[1]:
+            warnstr = 'WARNING = Prepare ML data: normalization window {} does not fit {} values {}.'\
+                      .format(window, val_name, perc)
+            if var_name:
+                warnstr += ' Try adjust {}'.format(var_name)
+            print(warnstr)
+    return np.clip((vals - window[0])/(window[1] - window[0]), 0., 1.)
+
+
+# prepare ML datasets from dataframe
+def format_ml_data(dfh, dft, nlayers=20, nhits=50,
+                   win_eta=WIN_ETA, win_phi=WIN_PHI, win_r0=WIN_R0, win_eh=WIN_EH):
+    # sanity check
+    dfh = dfh[dfh['energy'] > 0.]
+
+    # add more data
+    r, theta, phi, rc, eta = cartesian_to_polar(*dfh[['position.x', 'position.y', 'position.z']].values.T)
+
+    event_group = dfh.groupby('event')
+    # convert eta, phi, edep so they are within [0, 1]
+    dfh.loc[:, 'etotal'] = event_group['energy'].transform('sum')
+    # get the hits energy ratio
+    dfh.loc[:, 'eratio'] = dfh['energy']/dfh['etotal']
+
+    # calculate weighted center of the event, in the future this can be replaced by tracking input
+    # NOTE: assumed single particle events
+    # log weighting (4.6
+    dfh.loc[:, 'logw'] = log_weights(dfh['eratio'].values)
+    dfh.loc[:, 'wtotal'] = event_group['logw'].transform('sum')
+    # sanity check
+    dfh = dfh[dfh['wtotal'] > 0.]
+    # regroup because of mutation
+    event_group = dfh.groupby('event')
+
+    dfh.loc[:, 'logw'] = dfh['logw']/dfh['wtotal']
+    dfh.loc[:, 'xw'] = dfh['position.x']*dfh['logw']
+    dfh.loc[:, 'yw'] = dfh['position.y']*dfh['logw']
+    dfh.loc[:, 'zw'] = dfh['position.z']*dfh['logw']
+
+    # position center
+    dfh.loc[:, 'xc'] = event_group['xw'].transform('sum')
+    dfh.loc[:, 'yc'] = event_group['yw'].transform('sum')
+    dfh.loc[:, 'zc'] = event_group['zw'].transform('sum')
+
+    # single hits features
+    r, theta, phi, r0, eta = cartesian_to_polar(*dfh[['position.x', 'position.y', 'position.z']].values.T)
+    # center
+    rc, thetac, phic, r0c, etac = cartesian_to_polar(*dfh[['xc', 'yc', 'zc']].values.T)
+
+    # data for ML
+    dfml = dfh[['event', 'layer']].copy()
+    # NOTE: not much a difference observed between eratio or normalized E with a narrow momentum range (e.g., 1.5-2.5 GeV/c)
+    dfml.loc[:, 'eh'] = dfh['eratio'].values
+    # dfml.loc[:, 'eh'] = lin_norm(dfh['energy'].values, win_eh, "Ehit")
+    dfml.loc[:, 'r0'] = lin_norm(r0, win_r0, "r0")
+    dfml.loc[:, 'eta'] = lin_norm(eta - etac, win_eta, "eta")
+    dfml.loc[:, 'phi'] = lin_norm(phi - phic, win_phi, "phi")
+
+    # sort by energy
+    dfml.sort_values(['event', 'layer', 'eh'], ascending=[True, True, False])
+    dfml.loc[:, 'hit'] = dfml.groupby(['event', 'layer']).cumcount() + 1
+    dfml.set_index(['event', 'layer', 'hit'], inplace=True)
+
+    # shape data (nevents, nlayers, nhits)
+    # padding with zeros
+    # NOTE: if more than nhits per layer, the least energetic hits are dropped because of the previous energy sorting
+    dfml = dfml.reindex(
+                pd.MultiIndex.from_product([dfml.index.levels[0], np.arange(nlayers) + 1, np.arange(nhits) + 1],
+                names=['event', 'layer', 'hit']),
+                fill_value=0
+                )
+
+    return dfml, list(event_group.groups.keys())
+
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Visualize the cluster (layer-wise) from analysis')
+    parser.add_argument(
+            'file', type=str,
+            help='path to the input file.'
+            )
+    parser.add_argument(
+            '-o', type=str,
+            default='.',
+            dest='outpath',
+            help='path to output data file (hdf5).'
+            )
+    parser.add_argument(
+            '--batch-size', type=int,
+            default=100000,
+            help='batch size to process data.'
+            )
+    parser.add_argument(
+            '-s', '--seed', type=int,
+            default=-1,
+            help='random seed.'
+            )
+    parser.add_argument(
+            '--nhits', type=int,
+            default=50,
+            dest='nhits',
+            help='number of hits per layer.'
+            )
+    parser.add_argument(
+            '-b', '--img-branch', type=str,
+            default='EcalBarrelImagingHitsReco',
+            help='name of data branch (edm4eic::CalorimeterHitCollection).'
+            )
+    parser.add_argument(
+            '-b2', '--scfi-branch', type=str,
+            default='EcalBarrelScFiGridReco',
+            help='name of data branch (edm4eic::CalorimeterHitCollection).'
+            )
+    parser.add_argument(
+            '-b3', '--truth-branch', type=str,
+            default='MCParticles',
+            help='name of truth mc branch.'
+            )
+    parser.add_argument(
+            '--epcut', type=str,
+            default=None,
+            help='A json file contains epcut information (best will be used).'
+            )
+    parser.add_argument(
+            '--append-data', action='store_true',
+            default=False,
+            help='Append data to pre-existing data file.'
+            )
+    args = parser.parse_args()
+
+    data_store = pd.HDFStore(args.outpath)
+    # print(data_store.keys())
+    # clear data store
+    if not args.append_data:
+        for dkey in [imcal_info.ml_data_key, imcal_info.truth_data_key]:
+            if dkey in data_store.keys():
+                data_store.remove(dkey)
+                print('Prepare ML data: remove pre-existed dataset {}'.format(dkey))
+
+    if args.seed > 0:
+        np.random.seed(args.seed)
+
+    # read data and MCParticles
+    rdf = ROOT.RDataFrame("events", args.file)
+    # event range
+    ntotal = rdf.Count().GetValue()
+    ev_bins = np.append(np.arange(0, ntotal, args.batch_size), [ntotal])
+
+    # epcut info
+    epcut = read_epcut(args.epcut)
+    # print(epcut)
+
+    # process events
+    # avoid insane amount of memory use with a large dataset
+    for ev_begin, ev_end in zip(ev_bins[:-1], ev_bins[1:]):
+        gc.collect()
+        print('Prepare ML data: processing events {:d} - {:d}'.format(ev_begin, ev_end))
+
+        dfm = flatten_collection(rdf, args.truth_branch, (int(ev_begin), int(ev_end)), cols=[
+                'generatorStatus',
+                'PDG',
+                'momentum.x', 'momentum.y', 'momentum.z',
+                'mass'
+                ])
+
+        # select incident particles
+        dfm = dfm[dfm['generatorStatus'].isin([0, 1])]
+        # NOTE: assumed single particles
+        dfm = dfm.groupby('event').first()
+        # True momentum
+        dfm.loc[:, 'P'] = np.sqrt(dfm['momentum.x']**2 + dfm['momentum.y']**2 + dfm['momentum.z']**2)
+        # print(dfm)
+
+        # scfi information
+        dfs = flatten_collection(rdf, args.scfi_branch, (int(ev_begin), int(ev_end)), cols=[
+                'layer',
+                'energy',
+                'position.x', 'position.y', 'position.z',
+                ])
+
+        #  information
+        dfi = flatten_collection(rdf, args.img_branch, (int(ev_begin), int(ev_end)), cols=[
+                'layer',
+                'energy',
+                'position.x', 'position.y', 'position.z',
+                ])
+
+        # apply ep_cut
+        if epcut:
+            # cumulative energy sum for layer
+            dfe = dfs.groupby(['event', 'layer'])['energy'].sum().groupby(level=0).cumsum()
+            # fill missing layer (no hits in that layer) edep values with the previous value
+            dfe = dfe.reset_index(level='event').groupby('event', as_index=False)\
+                     .apply(lambda grp: grp.reindex(np.arange(imcal_info.nlayers_scfi) + 1, method='ffill'))\
+                     .reset_index().drop('level_0', axis=1)
+            # unstack layer edeps
+            dfe = dfe.set_index(['event', 'layer'])\
+                     .stack().unstack(level='layer')
+            dfe.columns = ['layer_{}'.format(i) for i in dfe.columns]
+            dfe.columns.name = None
+            dfe = dfe.reset_index().drop('level_1', axis=1).set_index('event', drop=True)
+            # print(dfe)
+
+            p_smear = dfm['P']*np.random.normal(1.0, epcut['tracking_resolution'], len(dfm))
+            ep_mask = (dfe['layer_{layer}'.format(**epcut)]/p_smear) >= epcut['ep_cut']
+            # print(np.sum(ep_mask))
+            dfm = dfm[ep_mask]
+            dfs = dfs[dfs['event'].isin(dfm.index)]
+            dfi = dfi[dfi['event'].isin(dfm.index)]
+
+        # reformat data for ML
+        dfi, ievs = format_ml_data(dfi, dfm, nlayers=imcal_info.nlayers_img, nhits=args.nhits)
+        dfs, sevs = format_ml_data(dfs, dfm, nlayers=imcal_info.nlayers_scfi, nhits=args.nhits)
+        if ievs != sevs:
+            common = np.intersect1d(ievs, sevs)
+            dfi = dfi[dfi.index.isin(common, level='event')]
+            dfs = dfs[dfs.index.isin(common, level='event')]
+            dfm = dfm[dfm.index.isin(common)].sort_index()
+        else:
+            dfm = dfm[dfm.index.isin(ievs)].sort_index()
+
+        # assign layer type to different dataset
+        dfi.loc[:, 'ltype'] = 'img'
+        dfi.loc[:, 'lval'] = 0
+        dfs.loc[:, 'ltype'] = 'scfi'
+        dfs.loc[:, 'lval'] = 1
+
+        # null eta value for ScFi layer
+        # NOTE: add it back in the future with timing-z resolution
+        dfs['eta'] = 0.
+
+        # combine two datasets
+        df = pd.concat([dfi.reset_index(), dfs.reset_index()], ignore_index=True)\
+               .set_index(['event', 'ltype', 'layer', 'hit']).sort_index()
+        # print(df.head(100))
+        data_store.append(imcal_info.ml_data_key, df, format='t',  data_columns=True)
+        data_store.append(imcal_info.truth_data_key, dfm[['PDG', 'P', 'mass']], format='t', data_columns=True)
+
+    data_store.close()
+
+
diff --git a/benchmarks/imaging_shower_ML/scripts/ml_training.py b/benchmarks/imaging_shower_ML/scripts/ml_training.py
index 82517e3b9b53a65a04115ef464d07abffa78e2c1..730eccf6408edf6874178008424b4845afd140ad 100644
--- a/benchmarks/imaging_shower_ML/scripts/ml_training.py
+++ b/benchmarks/imaging_shower_ML/scripts/ml_training.py
@@ -1,283 +1,263 @@
+'''
+    Script for supervised training of a simple ML model using Tensorflow 2.
+    ML e/pi separation for imaging barrel ecal.
+
+    Chao Peng (ANL)
+    2022/11/11
+'''
 import os
+import sys
+import json
+import argparse
 import pandas as pd
 import numpy as np
+import ROOT
 from collections import OrderedDict
-from scipy.interpolate import CubicSpline
-import uproot as uproot
+
+import matplotlib as mpl
 import matplotlib.pyplot as plt
-import logging
-import sys
-import subprocess
-import argparse
+from matplotlib.ticker import MultipleLocator, FixedLocator, MaxNLocator
+from matplotlib.colors import LogNorm
 
 import tensorflow as tf
 from tensorflow import keras
 from tensorflow.keras import layers
-import matplotlib as mpl
-from matplotlib.ticker import MultipleLocator, FixedLocator, MaxNLocator
-from matplotlib.colors import LogNorm
-from particle import Particle
-
-print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
-
-# os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
-# 5 layers 0-1-2-6-9
-# 6 layers 0-1-2-3-4-6-9
-
-
-def get_layer_cum_edeps(edep):
-    return np.tile(np.arange(edep.shape[1]), (edep.shape[0], 1)).flatten(), np.cumsum(edep, axis=1).flatten()
-
-
-def draw_layer_edep(edep, label=''):
-    layer_num, edep_val = get_layer_cum_edeps(edep)
-    figure, axis = plt.subplots(figsize=(16, 9), dpi=120)
-    axis.hist2d(layer_num, edep_val,
-                bins=(np.arange(0.5, edep.shape[1] + 0.5, step=1.0), 100), norm=mpl.colors.LogNorm())
-    axis.grid(linestyle=':')
-    axis.set_axisbelow(True)
-    axis.tick_params(labelsize=24, direction='in', which='both')
-    axis.set_ylabel('Cumulative Edep (MeV)', fontsize=24)
-    axis.set_xlabel('Layer number', fontsize=24)
-    axis.xaxis.set_major_locator(MaxNLocator(edep.shape[1], integer=True))
-    if label:
-        axis.set_title(label, fontsize=26)
-    return figure, axis
-
-parser = argparse.ArgumentParser()
-parser.add_argument('-t', '--nametag', type=str, default='IMCAL_ML', help='Name tag for output files.')
-parser.add_argument('--pmin', type=float, default=0.5, help='Minimum momentum of particles.')
-parser.add_argument('--pmax', type=float, default=10, help='Maximum momentum of particles.')
-args = parser.parse_args()
-kwargs = vars(args)
-
-train_model = True
-do_epcut = False
-#nametag = 'imcal'
-#partag = 'epimuphka'
-#prange = (0.5, 10.0)
-#etag = '{:.1f}_{:.1f}_combined'.format(*prange)
-epoch = 20
-data_shape = (29, 20, 5)
-#data_dir = r'sim_output/tag_data/{name}_{part}_{etag}'.format(name=nametag, part=partag, etag=etag)
-#out_dir = r'sim_output/tag_data/output/{name}_{part}_{etag}'.format(name=nametag, part=partag, etag=etag)
-data_dir = os.path.join('sim_output/tag_data/', '{nametag}_{pmin}_{pmax}_combined'.format(**kwargs))
-out_dir = os.path.join('sim_output/tag_data/output/', '{nametag}_{pmin}_{pmax}_combined'.format(**kwargs))
-raw_data = np.load(os.path.join(data_dir, 'data.npy'))
-raw_tags = np.load(os.path.join(data_dir, 'tag.npy'))
-edeps = np.load(os.path.join(data_dir, 'edep.npy'))
-
-# for interlayered data, null the last few layers
-astro_nlayers = 9
-astro_nlayers_used = [0, 1, 2, 3, 4]
-astro_nlayers_not_used = [2*i for i in np.arange(astro_nlayers) if i not in astro_nlayers_used]
-if len(astro_nlayers_used) and len(astro_nlayers_not_used):
-    raw_data[:, astro_nlayers_not_used] = 0
-
-os.makedirs(out_dir, exist_ok=True)
-# label converter
-labels = OrderedDict([
-    (pid, [i, Particle.from_pdgid(pid).name, ''])
-     for i, pid in enumerate(sorted(np.unique(raw_tags.T[0].astype(int)), key=lambda x: np.abs(x)))
-])
-
-# add some labels in latex format
-name_dict = {'gamma': r'$\gamma$', '-': '$^-$', '+': '$^+$', 'pi': r'$\pi$', 'mu': r'$\mu$'}
-for pid, (i, name, _) in labels.items():
-    labelstr = name
-    for str1, str2 in name_dict.items():
-        labelstr = labelstr.replace(str1, str2)
-    labels[pid][2] = labelstr
-
-part_weights = {pid: 1.0 for pid, _ in labels.items()}
-part_weights.update({
-    11: 1.0,
-    -211: 10.0,
-})
-
-begin_layer, end_layer = 0, 9
-epcut = 0.085
-if do_epcut:
-    epi_ids = (11, -211)    # (11, -211)
-    # Eoverp cut in tags data
-    eovp = np.sum(edeps[:, begin_layer:end_layer], axis=1)*1e-3/raw_tags.T[1]
-    emask = raw_tags.T[0] == epi_ids[0]
-    pimask = raw_tags.T[0] == epi_ids[1]
-
-    epmask = tuple([eovp > epcut])
-    ep_eff = sum(raw_tags[epmask].T[0] == 11) / sum(emask)
-    ep_rej = 1. - sum(raw_tags[epmask].T[0] == -211) / sum(pimask)
-
-    bins = np.linspace(0, 0.15, 151)
-    fig, ax = plt.subplots(figsize=(12, 9), dpi=120)
-    ax.hist(eovp[emask], histtype='step', density=True, bins=bins, label=labels[epi_ids[0]][2])
-    ax.hist(eovp[pimask], histtype='step', density=True, bins=bins, label=labels[epi_ids[1]][2])
-    ax.axvline(epcut, color='k')
-    ax.set_xlabel('Edep / p', fontsize=24)
-    ax.set_ylabel('Normalized Counts', fontsize=24)
-    ax.tick_params(labelsize=24)
-    ax.legend(fontsize=24)
-    ax.text(0.2, 0.9, '$e$ eff. = {:.2f}%\n$\pi$ rej. = {:.2f}%'.format(ep_eff*100., ep_rej*100.),
-            fontsize=24, transform=ax.transAxes, ha='left', va='top')
-    ax.set_title('$E/p > {:.2f}$% @ {:d} - {:d} X$_0$'.format(epcut*100., begin_layer, end_layer), fontsize=24)
-    fig.savefig(os.path.join('results/ml/models/emcal/', 'pre_epcut.png'))
-
-    fig, _ = draw_layer_edep(edeps[emask], 'Cumulative Edep over layer for electrons')
-    fig.savefig(os.path.join('results/ml/models/emcal/', 'cum_edep_el.png'))
-    fig, _ = draw_layer_edep(edeps[pimask], 'Cumulative Edep over layer for pions')
-    fig.savefig(os.path.join('results/ml/models/emcal/', 'cum_edep_pi.png'))
-
-    data = raw_data[epmask]
-    tags = raw_tags[epmask]
-
-else:
-    ep_eff = 1.0
-    ep_rej = 0.
-    data = raw_data
-    tags = raw_tags
-
-pid = np.vectorize(lambda x: labels[x][0])(tags.T[0])
-weights = np.vectorize(lambda x: part_weights[x])(tags.T[0])
-
-if train_model:
-    indices = np.arange(len(data))
-    np.random.shuffle(indices)
-    id_train = indices[:int(len(data)*0.8)]
-    id_valid = indices[int(len(data)*0.8):]
-    x_train = data[id_train]
-    y_train = pid[id_train]
-    w_train = weights[id_train]
-
-    x_valid = data[id_valid]
-    y_valid = pid[id_valid]
-    w_valid = weights[id_valid]
-
-    model = keras.Sequential([
-        keras.layers.Conv2D(64, (3, 3), padding='same', activation='relu', input_shape=data_shape),
-        # keras.layers.MaxPooling2D((2, 2), strides=2),
-        keras.layers.Dropout(0.25),
-        keras.layers.Conv2D(128, (2, 2), padding='same', activation='relu'),
-        # keras.layers.MaxPooling2D((2, 2), strides=2),
-        keras.layers.Dropout(0.3),
-        keras.layers.Conv2D(64, (2, 2), padding='same', activation='relu'),
-        # keras.layers.MaxPooling2D((2, 2), strides=2),
-
-        keras.layers.Flatten(),
-        keras.layers.Dense(128, activation='relu'),
-        keras.layers.Dropout(0.25),
-        keras.layers.Dense(32, activation='relu'),
-        keras.layers.Dense(len(labels), activation='softmax')
+
+from utils import imcal_info
+
+
+# default color cycle of matplotlib
+prop_cycle = plt.rcParams['axes.prop_cycle']
+colors = prop_cycle.by_key()['color']
+
+
+# full name, short name, latex str
+PARTICLE_DICT = OrderedDict([
+    (11, ('electron', 'electron', '$e^-$')),
+    (-211, ('pion-minus', 'pion-', '$\pi^-$')),
     ])
 
-    model.compile(optimizer=keras.optimizers.Adam(learning_rate=1e-4),
-                  loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False),
-                  metrics=['accuracy'])
-    history = model.fit(x_train, y_train,
-                        epochs=epoch, sample_weight=w_train, validation_data=(x_valid, y_valid, w_valid))
-    model.save(os.path.join('results/ml/models/emcal/', 'model_epch{:d}'.format(epoch)))
-    
-    converter = tf.lite.TFLiteConverter.from_keras_model(model)
-    tflite_model = converter.convert()
-    
-    with open(os.path.join('results/ml/models/emcal/', 'lite_model_{nametag}.tflite'.format(**kwargs)), 'wb') as f:
-        f.write(tflite_model)
-else:
-    model = keras.models.load_model(os.path.join('results/ml/models/emcal/', 'model_epch{:d}'.format(epoch)))
-# ---- general ----
-# x_test = data
-# y_test = pid
-# prediction = model.predict(x_test)
-# result = prediction.argmax(axis=1)
-# pid_probs = []
-# for _, part in labels.items():
-#     mask = (y_test == part[0])
-#     res = result[mask]
-#     pid_probs.append(np.bincount(res, minlength=len(labels))/sum(mask))
-# pid_probs = np.asarray(pid_probs)
-
-# predictions
-x_test = data
-y_test = pid
-prediction = model.predict(x_test)
-
-# two labels (pion - electron)
-if len(labels) == 2:
-    # --- pion - electron ----
-    pion_prob_cut = 0.5
+
+
+def count_pid_labels(labels, pid_label_dict):
+    result = OrderedDict()
+    result['total'] = len(labels)
+    counts = np.bincount(labels)
+    label_pid_dict = {i: pid for pid, i in pid_label_dict.items()}
+    for i, cnt in enumerate(counts):
+        if cnt == 0 and i not in label_pid_dict.values():
+            continue
+        pid = label_pid_dict.get(i)
+        particle = PARTICLE_DICT.get(pid, (pid, pid, pid))
+        # print(i, pid, cnt, particle)
+        result[str(particle[1])] = '{:d}'.format(cnt)
+    return result
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+            'data_store', type=str,
+            help='path to data store.'
+            )
+    parser.add_argument(
+            '-o', type=str,
+            dest='outdir',
+            default='ml_result',
+            help='output directory.'
+            )
+    parser.add_argument(
+            '-s', '--seed', type=int,
+            default=-1,
+            help='random seed.'
+            )
+    parser.add_argument(
+            '-t', '--name-tag', type=str,
+            dest='ntag',
+            default='imcal_ml',
+            help='name tag for output files/models.'
+            )
+    parser.add_argument(
+            '--test-size', type=float,
+            default=0.2,
+            help='relative size of test samples (between 0-1).'
+            )
+    parser.add_argument(
+            '--epochs', type=int,
+            default=20,
+            help='epochs for ML training.'
+            )
+    parser.add_argument(
+            '--batch-size', type=int,
+            default=512,
+            help='batch size for ML training.'
+            )
+    parser.add_argument(
+            '-e', '--efficiency', type=float,
+            dest='eff',
+            default=0.98,
+            help='targeted efficiency from the cuts.'
+            )
+    parser.add_argument(
+            '--read-model', action='store_true',
+            default=False,
+            help='read a trained model'
+            )
+    parser.add_argument(
+            '--read-model-path', type=str,
+            default='',
+            help='only used when --read-model is enabled, if not specified it reads the model from saving path.'
+            )
+    # weights
+    for key, (name, sname, latex) in PARTICLE_DICT.items():
+        parser.add_argument(
+            '--weight-{}'.format(name), type=float,
+            default=1.0,
+            help='estimator weight for {} particles.'.format(name)
+            )
+    args = parser.parse_args()
+    kwargs = vars(args)
+
+    os.makedirs(args.outdir, exist_ok=True)
+
+    if args.seed > 0:
+        np.random.seed(args.seed)
+        tf.random.seed(args.seed)
+
+    df = pd.read_hdf(args.data_store, key=imcal_info.ml_data_key)
+    # NOTE: assumed event index is exactly the same as df
+    dft = pd.read_hdf(args.data_store, key=imcal_info.truth_data_key)
+    print(dft['PDG'].value_counts())
+
+    # fill dictionary if not pre-defined
+    data_pid = dft['PDG'].unique()
+    for pid in data_pid:
+        if pid not in PARTICLE_DICT.keys():
+            print('WARNING = ML training: particle ({}) not found in dictionary, update the script to include it.'\
+                    .format(pid))
+            PARTICLE_DICT[pid] = ('particle_{:d}'.format(pid), 'PDG_{:d}'.format(pid), 'PDG_{:d}'.format(pid))
+
+    # build labels and weights for particles
+    # NOTE: pid follows the dictionary orders
+    pid_labels = OrderedDict()
+    pid_weights = OrderedDict()
+    for pid, (pname, _, _) in PARTICLE_DICT.items():
+        if pid not in data_pid:
+            continue
+        pid_weights[pid] = kwargs.get('weight_{}'.format(pname.replace('-', '_')), 1.0)
+        pid_labels[pid] = len(pid_labels)
+    print(pid_labels)
+    print(pid_weights)
+
+    # data shape
+    # NOTE: number of layers for combined data
+    nlayers = imcal_info.nlayers_img + imcal_info.nlayers_scfi
+    # NOTE: index levels should be [event, layer_type, layer, hit], change the code if data structure is changed
+    nhits = df.index.levels[-1].max()
+
+    # training datasets
+    xdata = df.values.reshape(len(dft), nlayers, nhits, len(df.columns))
+    ydata = dft['PDG'].map(pid_labels).values
+    wdata = dft['PDG'].map(pid_weights).values
+
+    # seperate train and test data
+    indices = np.arange(len(xdata))
+    np.random.shuffle(indices)
+    id_train = indices[int(len(indices)*args.test_size):]
+    id_test = indices[:int(len(indices)*args.test_size)]
+    # test data will be used for benchmarking, no need for sampling weights
+    xtrain, ytrain, wtrain = xdata[id_train], ydata[id_train], wdata[id_train]
+    xtest, ytest = xdata[id_test], ydata[id_test]
+
+    # try to use GPUs
+    # tf.debugging.set_log_device_placement(True)
+    gpus = tf.config.list_logical_devices('GPU')
+    print("Number of GPUs Available: ", len(gpus))
+    strategy = tf.distribute.MirroredStrategy(gpus)
+    if args.read_model:
+        model_path = os.path.join(args.outdir, '{}_model'.format(args.ntag))
+        if args.read_model_path:
+            model_path = args.read_model_path
+        model = keras.models.load_model(model_path)
+    else:
+        with strategy.scope():
+            train_dataset = tf.data.Dataset.from_tensor_slices((xtrain, ytrain, wtrain))
+            model = keras.Sequential([
+                keras.layers.Conv2D(64, (3, 3), padding='same', activation='relu', input_shape=xdata.shape[1:]),
+                # keras.layers.MaxPooling2D((2, 2), strides=2),
+                keras.layers.Dropout(0.25),
+                keras.layers.Conv2D(128, (2, 2), padding='same', activation='relu'),
+                # keras.layers.MaxPooling2D((2, 2), strides=2),
+                keras.layers.Dropout(0.3),
+                keras.layers.Conv2D(64, (2, 2), padding='same', activation='relu'),
+                # keras.layers.MaxPooling2D((2, 2), strides=2),
+
+                keras.layers.Flatten(),
+                keras.layers.Dense(128, activation='relu'),
+                keras.layers.Dropout(0.25),
+                keras.layers.Dense(32, activation='relu'),
+                keras.layers.Dense(len(pid_labels), activation='softmax')
+            ])
+
+            model.compile(optimizer=keras.optimizers.Adam(learning_rate=1e-4),
+                          loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False),
+                          metrics=['accuracy'])
+            history = model.fit(train_dataset.batch(args.batch_size), epochs=args.epochs)
+            model.save(os.path.join(args.outdir, '{}_model'.format(args.ntag)))
+
+    # benchmark
+    prediction = model.predict(xtest)
+
+    # cut label probability
+    # NOTE: assumed two-labels (electron, pion) and ordered (see dictionary)
     pid_probs = np.zeros(shape=(2, 2))
-    fig, ax = plt.subplots(1, 1, figsize=(8, 6), dpi=160)
+    fig, ax = plt.subplots(1, 1, figsize=(12, 9), dpi=160)
+
+    # cut position (try to keep the targeted efficiency)
+    epi_probs = prediction[ytest == 0].T[1]
+    pion_prob_cut = np.percentile(epi_probs, args.eff*100.)
+    eff_string = ''
+    for pid, ilbl in pid_labels.items():
+        mask = (ytest == ilbl)
+        probs = prediction[mask]
+        label = PARTICLE_DICT.get(pid, [''])[-1]
+
+        ax.hist(probs.T[1], bins=np.linspace(0, 1, 101), label=label, color=colors[ilbl], ec=colors[ilbl], alpha=0.5)
+
+        pid_probs[ilbl][0] = sum(probs.T[1] < pion_prob_cut) / float(len(probs))
+        pid_probs[ilbl][1] = 1. - pid_probs[ilbl][0]
+        if pid == -211 or pid == 211:
+            eff_string += '{} rej. = {:.2f}%'.format(label, pid_probs[ilbl][1]*100.)
+        else:
+            eff_string += '{} eff. = {:.2f}%\n'.format(label, pid_probs[ilbl][0]*100.)
+    ax.axvline(x=pion_prob_cut, lw=2, color='k', ls='--')
     ax.set_yscale('log')
     ax.set_ylabel('Counts', fontsize=24)
     ax.set_xlabel(r'$P_{\pi}$', fontsize=24)
     ax.tick_params(direction='in', which='both', labelsize=24)
-    for i, (_, part) in enumerate(labels.items()):
-        mask = (y_test == part[0])
-        probs = prediction[mask]
-        ax.hist(probs.T[1], histtype='step', bins=np.linspace(0, 1, 101), label=part[1])
-        pid_probs[i][0] = sum(probs.T[1] < pion_prob_cut) / float(len(probs))
-        pid_probs[i][1] = 1. - pid_probs[i][0]
-    ax.axvline(x=pion_prob_cut, lw=2, color='k', ls='--')
-    ax.text(0.55, 0.9, '$e$ eff. = {:.2f}%\n$\pi$ rej. = {:.2f}%'.format(pid_probs[0][0]*100., pid_probs[1][1]*100.),
-            fontsize=24, transform=ax.transAxes, ha='left', va='top')
-    ax.legend(fontsize=24, loc='lower left')
-    fig.savefig(os.path.join('results/ml/models/emcal/', 'pid_tag_hist.png'))
-    fig.savefig(os.path.join('results/ml/models/emcal/', 'pid_tag_hist.pdf'))
-    # --- pion - electron ----
-    result_text = open(os.path.join(out_dir, 'rejection_result.txt'), 'w')
-    lines = [
-        'E/p cut position: {:.4f}, layer {:d} to {:d}, E/p efficiency: {:.4f}%, rejection power: {:.4f}'\
-        .format(epcut, begin_layer, end_layer, ep_eff*100., 1./(1. - ep_rej)),
-    ] if do_epcut else []
-    lines += [
-        'ML training weight: {}, pion cut position: {:.4f}, ML efficiency: {:.4f}%, rejection power: {:.4f}'\
-        .format(list(part_weights.values()), pion_prob_cut, pid_probs[0][0]*100., 1./(1. - pid_probs[1][1])),
-        'combined efficiency: {:.2f}%, rejection power: {:.1f}'\
-        .format(pid_probs[0][0]*ep_eff*100., 1./((1. - ep_rej)*(1. - pid_probs[1][1]))),
-    ]
-    result_text.write('\n'.join(lines))
-    result_text.close()
-    print('\n'.join(lines))
-
-else:
-    # --- multi-particles ---
-    pid_probs = np.zeros(shape=(len(labels), len(labels)))
-    fig, ax = plt.subplots(1, 1, figsize=(12, 8), dpi=160, gridspec_kw={'left': 0.15, 'right': 0.95})
-    ax.set_yscale('log')
-    ax.set_ylabel('Counts', fontsize=26)
-    ax.set_xlabel(r'Likelihood of the Corresponding Label', fontsize=26)
-    ax.tick_params(direction='in', which='both', labelsize=26)
-    # label with maximum likelihood
-    for _, (i, part, label) in labels.items():
-        mask = (y_test == i)
-        probs = prediction[mask]
-        pred_labels = probs.argmax(axis=1)
-        ax.hist(probs.T[i], histtype='step', bins=np.linspace(0, 1, 101), label=label)
-        for _, (j, _, _) in labels.items():
-            pid_probs[i][j] = np.sum(pred_labels == j)/float(np.sum(mask))
-
-    ax.legend(fontsize=26, loc='upper center', ncol=3, handletextpad=0.3, columnspacing=0.6)
-    fig.savefig(os.path.join('results/ml/models/emcal/', 'pid_tag_hist.png'))
-    fig.savefig(os.path.join('results/ml/models/emcal/', 'pid_tag_hist.pdf'))
-
-    # --- multi-particles ---
-fig, ax = plt.subplots(1, 1, figsize=(8, 8), dpi=160, gridspec_kw={'left': 0.15, 'right': 0.95})
-ax.imshow(pid_probs, cmap='PuBu', norm=LogNorm(vmin=0.01, vmax=1))
-ax.xaxis.set_major_locator(FixedLocator(np.arange(0, pid_probs.shape[0], step=1.0)))
-ax.yaxis.set_major_locator(FixedLocator(np.arange(0, pid_probs.shape[1], step=1.0)))
-ax.xaxis.set_minor_locator(FixedLocator(np.arange(0.5, pid_probs.shape[0], step=1.0)))
-ax.yaxis.set_minor_locator(FixedLocator(np.arange(0.5, pid_probs.shape[1], step=1.0)))
-ax.set_xticklabels([x[2] for _, x in labels.items()], fontsize=26, va='bottom')
-ax.set_yticklabels([x[2] for _, x in labels.items()], fontsize=26, ha='left')
-ax.tick_params(bottom=False, left=False, which='both')
-ax.tick_params(pad=30, axis='x')
-ax.tick_params(pad=40, axis='y')
-ax.set_ylabel('Truth', fontsize=26)
-ax.set_xlabel('Classification', fontsize=26)
-for i in range(pid_probs.shape[0]):
-    for j in range(pid_probs.shape[1]):
-        color = 'k' if pid_probs[i, j] < 0.15 else 'w'
-        text = ax.text(j, i, '{:.2f}%'.format(pid_probs[i, j]*100.), ha='center', va='center',
-                       color=color, fontsize=28 - len(labels)*2)
-ax.grid('-', color='k', which='minor')
-fig.savefig(os.path.join('results/ml/models/emcal/', 'pid_tag.png'))
-fig.savefig(os.path.join('results/ml/models/emcal/', 'pid_tag.pdf'))
+    ax.text(0.55, 0.9, eff_string, fontsize=24, transform=ax.transAxes, ha='left', va='top')
+    ax.legend(fontsize=24, ncol=4, loc='upper center', bbox_to_anchor=(0.5, 1.12),)
+    fig.savefig(os.path.join(args.outdir, '{}_pid_hist.pdf'.format(args.ntag)))
+
+    # save result
+    result = OrderedDict(
+            training= OrderedDict(
+                epochs=args.epochs,
+                batch_size=args.batch_size,
+                sample_size=count_pid_labels(ytrain, pid_labels),
+                sample_weights={PARTICLE_DICT.get(pid)[1]: weight for pid, weight in pid_weights.items()},
+                targeted_efficiency=args.eff,
+                pion_prob_cut=pion_prob_cut,
+                ),
+
+            test=OrderedDict(
+                sample_size=count_pid_labels(ytest, pid_labels),
+                el_eff=pid_probs[0][0],
+                pi_rej=pid_probs[1][1],
+                )
+            )
+    res_json = json.dumps(result, indent=4)
+    with open(os.path.join(args.outdir, '{}_result.json'.format(args.ntag)), 'w') as outfile:
+        outfile.write(res_json)
+
diff --git a/benchmarks/imaging_shower_ML/scripts/utils.py b/benchmarks/imaging_shower_ML/scripts/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..2ddbc61608d0f494ee3d8209f7362f5669418897
--- /dev/null
+++ b/benchmarks/imaging_shower_ML/scripts/utils.py
@@ -0,0 +1,63 @@
+import ROOT
+import pandas as pd
+import numpy as np
+
+
+class dotdict(dict):
+    """dot.notation access to dictionary attributes"""
+    __getattr__ = dict.get
+    __setattr__ = dict.__setitem__
+    __delattr__ = dict.__delitem__
+
+
+# basic information share among all scripts
+imcal_info = dotdict(
+        nlayers_img=6,
+        nlayers_scfi=20,
+        ml_data_key='/ml_data',
+        truth_data_key='/truth',
+    )
+
+# read from RDataFrame and flatten a given collection, return pandas dataframe
+def flatten_collection(rdf, collection, ev_range=None, cols=None, event_colname='event'):
+    if not cols:
+        cols = [str(c) for c in rdf.GetColumnNames() if str(c).startswith('{}.'.format(collection))]
+    else:
+        cols = ['{}.{}'.format(collection, c) for c in cols]
+    if not cols:
+        print('cannot find any branch under collection {}'.format(collection))
+        return pd.DataFrame()
+    # take data
+    if not ev_range:
+        data = rdf.AsNumpy(cols)
+    else:
+        data = rdf.Range(*ev_range).AsNumpy(cols)
+    # flatten the data, add an event id to identify clusters from different events
+    evns = []
+    for i, vec in enumerate(data[cols[0]]):
+        evns += [i]*vec.size()
+    for n, vals in data.items():
+        # make sure ints are not converted to floats
+        typename = vals[0].__class__.__name__.lower()
+        dtype = np.int64 if 'int' in typename or 'long' in typename else np.float32
+        # type safe creation
+        data[n] = np.asarray([v for vec in vals for v in vec], dtype=dtype)
+    # build data frame
+    dfp = pd.DataFrame({c: pd.Series(v) for c, v in data.items()})
+    evns = np.asarray(evns)
+    if ev_range is not None:
+        evns += ev_range[0]
+    dfp.loc[:, event_colname] = evns
+    dfp.rename(columns={c: c.replace(collection + '.', '') for c in dfp.columns}, inplace=True)
+    return dfp
+
+
+def cartesian_to_polar(x, y, z):
+    r = np.sqrt(x**2 + y**2 + z**2)
+    rc = np.sqrt(x**2 + y**2)
+    theta = np.arccos(z / r)
+    phi = np.arctan2(y, x)
+    eta = -np.log(np.tan(theta / 2.))
+    return r, theta, phi, rc, eta
+
+