update run_benchmark script

277f38d4 · Chao Peng · f642bc8d · 277f38d4 · 277f38d4
Commit 277f38d4 authored 2 years ago by Chao Peng
--- a/benchmarks/ECal/config.yml
+++ b/benchmarks/ECal/config.yml
@@ -10,16 +10,14 @@ ml_shower:tagging_e :
  when: manual
  stage: benchmarks1
  script:
-    - python3 ECal/run_benchmark.py -t imcal_e -n 100000 
+    - python3 ECal/run_benchmark.py -t imcal_e -n 100000 --particles "electron" --pmin 0.5 --pmax 10
-      --particles "electron" --pmin 0.5 --pmax 10
 ml_shower:tagging_pi :
  extends: .rec_benchmark
  when: manual
  stage: benchmarks1
  script:
-    - python3 ECal/run_benchmark.py -t imcal_pi -n 100000 
+    - python3 ECal/run_benchmark.py -t imcal_pi -n 100000 --particles "pion-" --pmin 0.5 --pmax 10
-      --particles "pion-" --pmin 0.5 --pmax 10
 ml_shower:training:
  extends: .rec_benchmark

--- a/benchmarks/ECal/run_benchmark.py
+++ b/benchmarks/ECal/run_benchmark.py
 #! /usr/local/bin/python3
 '''
-    A python script to facilitate the ML training benchmark for imaging Calorimeter:
+    A python script to facilitate the ML benchmarks for e/pi separation with the imaging calorimeter (single particles).
-    simulation -> reconstruction -> prepare tagging data for tensorflow
+    This process follows the steps below:
+    1. Simulation to generate training samples
+    2. Study and apply E/p cut to reduce the training samples
+    3. Train and test ML models with the "cleaned" (after E/p cut) samples
+    4. Benchmark the performance
    Author: Chao Peng (ANL)
-    Date: 06/20/2021
+    Date: 11/11/2022
 '''
 import os
 import sys
+import json
 import subprocess
 import argparse
-default_compact = os.path.join(os.environ.get('DETECTOR_PATH',  os.environ.get('DETECTOR_PATH', '')),
+SDIR = os.path.dirname(os.path.realpath(__file__))
-        '{}.xml'.format(os.environ.get('JUGGLER_DETECTOR', '')))
+# {var} is from args
-parser = argparse.ArgumentParser()
+FILE_NAMES = dict(
-parser.add_argument('-n', '--numberOfEvents', dest='nev', type=int, default=100, help='Number of events to process.')
+    gen_script = os.path.join(SDIR, 'scripts', 'gen_particles.py'),
-parser.add_argument('-t', '--nametag', type=str, default='IMCAL_ML', help='Name tag for output files.')
+    rec_script = os.path.join(SDIR, 'options', 'faec.py'),
-parser.add_argument('--seed', type=int, default=-1, help='Random seed to scripts.')
-parser.add_argument('--process', type=str, default='sim, rec', help='Processes to be executed (sim, rec).')
+    sim_dir = os.path.join('{outdir}', '{run_type}', 'sim_data'),
-parser.add_argument('--nlayers', dest='nlayers', type=int, default=9, help='number of layers in ML data.')
-parser.add_argument('--nhits', dest='nhits', type=int, default=20, help='number of hits in ML data.')
+    gen_file = os.path.join('{outdir}', '{run_type}', 'sim_data', '{ntag}_gen.hepmc'),
-parser.add_argument('--particles', type=str, default='electron', help='Partcile names, separated by \",\".')
+    sim_file = os.path.join('{outdir}', '{run_type}', 'sim_data', '{ntag}_sim.edm4hep.root'),
-parser.add_argument('--angmin', type=float, default=12, help='Minimum momentum of particles.')
+    rec_file = os.path.join('{outdir}', '{run_type}', 'sim_data', '{ntag}_rec.root'),
-parser.add_argument('--angmax', type=float, default=15, help='Maximum momentum of particles.')
+)
-parser.add_argument('--pmin', type=float, default=4, help='Minimum momentum of particles.')
+# default values for argument parser
-parser.add_argument('--pmax', type=float, default=10, help='Maximum momentum of particles.')
+DEFAULT_COMPACT = os.path.join(
-parser.add_argument('--compact', type=str, default=default_compact, help='Path to detector compact file.')
+        os.environ.get('DETECTOR_PATH', ''),
-parser.add_argument('--combine-method', type=str, default='interlayer', help='Path to detector compact file.')
+        '{}.xml'.format(os.environ.get('DETECTOR_CONFIG', ''))
-parser.add_argument('--physics-list', type=str, default='FTFP_BERT', help='Path to detector compact file.')
+        )
+# defined steps
-args = parser.parse_args()
+SCRIPT_STEPS = (
-kwargs = vars(args)
+    'sim',      # step 1; simulation to generate samples
+)
-for mdir in ['gen_data', 'sim_data', 'rec_data']:
-    os.makedirs(mdir, exist_ok=True)
+# simulation and reconstruction
-gen_file = os.path.join('gen_data', '{nametag}_{pmin}_{pmax}.hepmc'.format(**kwargs))
+def gen_sim_rec(**kwargs):
-sim_file = os.path.join('sim_data', '{nametag}_{pmin}_{pmax}.root'.format(**kwargs))
-rec_file = os.path.join('rec_data', '{nametag}_{pmin}_{pmax}.root'.format(**kwargs))
-tag_dir = os.path.join('tag_data', '{nametag}_{pmin}_{pmax}'.format(**kwargs))
-os.makedirs(tag_dir, exist_ok=True)
-procs = [p.strip() for p in args.process.split(',')]
-sdir = os.path.dirname(os.path.realpath(__file__))
-if 'sim' in procs:
    # generate particles
-    gen_cmd = ['python', os.path.join(sdir, 'scripts', 'gen_particles.py'), gen_file,
+    gen_cmd = [
-            '-n', '{}'.format(args.nev),
+        'python {gen_script} {gen_file}',
-            '-s', '{}'.format(args.seed),
+        '-n {nev}',
-            '--angmin', '{}'.format(args.angmin), '--angmax', '{}'.format(args.angmax),
+        '-s {seed}',
-            '--pmin', '{}'.format(args.pmin), '--pmax', '{}'.format(args.pmax),
+        '--angmin {angmin} --angmax {angmax}',
-            '--particles', args.particles]
+        '--pmin {pmin} --pmax {pmax}',
+        '--particles {particles}',
+        ]
+    gen_cmd = ' '.join(gen_cmd).format(**kwargs).split(' ')
    subprocess.run(gen_cmd)
    # simulation
-    sim_cmd = ['npsim',
+    sim_cmd = [
-            '--part.minimalKineticEnergy', '1*TeV',
+        'ddsim --runType batch --part.minimalKineticEnergy 1*TeV --filter.tracker edep0',
-            '--numberOfEvents', '{}'.format(args.nev),
+        '-v WARNING',
-            '--runType', 'batch',
+        '--numberOfEvents {nev}',
-            # '--physics.list', args.physics_list,
+        # '--physics.list {physics_list}',
-            '--inputFiles', gen_file,
+        '--inputFiles {gen_file}',
-            '--outputFile', sim_file,
+        '--outputFile {sim_file}',
-            '--compact', args.compact,
+        '--compact {compact}',
-            '-v', 'WARNING']
+        ]
-    if args.seed > 0:
+    if 'seed' in kwargs and kwargs['seed'] > 0:
-        sim_cmd += ['--random.seed', args.seed]
+        sim_cmd += ['--random.seed {seed}']
+    sim_cmd = ' '.join(sim_cmd).format(**kwargs).split(' ')
    return_code = subprocess.run(sim_cmd).returncode
    print(return_code)
    if return_code is not None and return_code < 0:
        print("ERROR running simulation!")
        exit(return_code)
-    subprocess.run(['rootls', '-t', sim_file])
+    subprocess.run(['rootls', '-t', kwargs['sim_file']])
-if 'rec' in procs:
+    # reconstruction with juggler
    # export to environment variables (used to pass arguments to the option file)
    run_env = os.environ.copy()
-    run_env.update({
+    juggler_vars = [
-        'JUGGLER_SIM_FILE': sim_file,
+        'JUGGLER_SIM_FILE {sim_file}',
-        'JUGGLER_REC_FILE': rec_file,
+        'JUGGLER_REC_FILE {rec_file}',
-        'JUGGLER_COMPACT_PATH': args.compact,
+        'JUGGLER_COMPACT_PATH {compact}',
-        'JUGGLER_N_EVENTS': str(args.nev),
+        'JUGGLER_N_EVENTS {nev}',
-        'IMCAL_ML_IMG_NLAYERS': str(args.nlayers),
+        ]
-        'IMCAL_ML_NHITS': str(args.nhits),
+    lst = ' '.join(juggler_vars).format(**kwargs).split(' ')
-        'IMCAL_ML_COMBINE': str(args.combine_method),
+    run_env.update({lst[i]: lst[i + 1] for i in range(0, len(lst), 2)})
-    })
+    rec_cmd = 'gaudirun.py {rec_script}'.format(**kwargs).split(' ')
-    juggler_xenv = os.path.join(os.environ.get('JUGGLER_INTALL_PREFIX', '../local'), 'Juggler.xenv')
+    print(rec_cmd)
-    rec_cmd = [
-        # 'xenv', '-x', juggler_xenv,   # v35+ do not need xenv anymore
-        'gaudirun.py', os.path.join(sdir, 'options', 'faec.py')
-    ]
    return_code = subprocess.run(rec_cmd, env=run_env).returncode
    print(return_code)
    if return_code is not None and return_code < 0:
        print("ERROR running juggler (reconstruction)!")
        exit(return_code)
-    process = subprocess.run(['rootls', '-t', rec_file])
+    process = subprocess.run(['rootls', '-t', kwargs['rec_file']])
+if __name__ == '__main__':
+    # argument parser
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+            '-n', '--n-events', type=int,
+            dest='nev',
+            default=100,
+            help='number of events.'
+            )
+    parser.add_argument(
+            '-o', '--outdir', type=str,
+            dest='outdir',
+            default='sim_output',
+            help='output directory.'
+            )
+    parser.add_argument(
+            '-r', '--run-type', type=str,
+            dest='run_type',
+            default='ecal',
+            help='a name specify the run type.'
+            )
+    parser.add_argument(
+            '-t', '--name-tag', type=str,
+            dest='ntag',
+            default='solid',
+            help='a name tag for output files.'
+            )
+    parser.add_argument(
+            '-c', '--compact', type=str,
+            dest='compact',
+            default=DEFAULT_COMPACT,
+            help='path to detector compact file.'
+            )
+    parser.add_argument(
+            '-s', '--seed', type=int,
+            default=-1,
+            help='random seed to child scripts (only pass it if > 0).'
+            )
+    parser.add_argument(
+            '--batch-size', type=int,
+            dest='batch',
+            default=100000,
+            help='batch size to process data.'
+            )
+    parser.add_argument(
+            '--p-min', type=float,
+            dest='pmin',
+            default=5.0,
+            help='minimum momentum of particles.'
+            )
+    parser.add_argument(
+            '--p-max', type=float,
+            dest='pmax',
+            default=5.0,
+            help='maximum momentum of particles.'
+            )
+    parser.add_argument(
+            '--angle-min', type=float,
+            dest='angmin',
+            default=5,
+            help='minimum scattering angle of particles.'
+            )
+    parser.add_argument(
+            '--angle-max', type=float,
+            dest='angmax',
+            default=25,
+            help='maximum scattering angle of particles.'
+            )
+    parser.add_argument(
+            '--particles', type=str,
+            default='electron',
+            help='partcile names, separated by \",\".'
+            )
+    parser.add_argument(
+            '--steps', type=str,
+            default=', '.join(SCRIPT_STEPS),
+            help='FOR DEV: choose the steps to be executed ({}).'.format(', '.join(SCRIPT_STEPS))
+            )
+    args = parser.parse_args()
+    kwargs = vars(args)
+    # prepare
+    steps = [p.strip() for p in args.steps.split(',')]
+    # make dirs, add paths to kwargs
+    FILE_NAMES.update({key: val.format(**kwargs) for key, val in FILE_NAMES.items()})
+    for key, val in FILE_NAMES.items():
+        if key.endswith('_dir'):
+            os.makedirs(val, exist_ok=True)
+    kwargs.update(FILE_NAMES)
+    # simulation for benchmark samples
+    if SCRIPT_STEPS[0] in steps:
+        gen_sim_rec(**kwargs)
+    # save run information, combine runs with the same run_type
+    run_data = {args.run_type: {args.ntag: kwargs}}
+    try:
+        with open(os.path.join(args.outdir, 'result.json'), 'r') as f:
+            run_data = json.load(f)
+            run_info = run_data.get(args.run_type, {})
+            run_info.update({args.ntag: kwargs})
+            run_data[args.run_type] = run_info
+    except (FileNotFoundError, json.decoder.JSONDecodeError):
+        pass
+    with open(os.path.join(args.outdir, 'result.json'), 'w') as f:
+        f.write(json.dumps(run_data, sort_keys=True, indent=4))