Skip to content
Snippets Groups Projects
Commit 97d30e8c authored by Chao Peng's avatar Chao Peng
Browse files

Add ML benchmark for imaging calorimetry

parent 5fa57743
No related branches found
No related tags found
1 merge request!115Add ML benchmark for imaging calorimetry
...@@ -61,6 +61,7 @@ include: ...@@ -61,6 +61,7 @@ include:
- local: 'benchmarks/clustering/config.yml' - local: 'benchmarks/clustering/config.yml'
- local: 'benchmarks/rich/config.yml' - local: 'benchmarks/rich/config.yml'
- local: 'benchmarks/imaging_ecal/config.yml' - local: 'benchmarks/imaging_ecal/config.yml'
- local: 'benchmarks/imaging_shower_ML/config.yml'
final_report: final_report:
......
ml_shower:tagging_epi :
extends: .rec_benchmark
when: manual
stage: benchmarks1
script:
- python3 benchmarks/imaging_shower_ML/sim_rec_tag.py -t imcal_epi -n 100000
--particles "electron,pion-,pion-" --pmin 0.5 --pmax 10
ml_shower:tagging_e :
extends: .rec_benchmark
when: manual
stage: benchmarks1
script:
- python3 benchmarks/imaging_shower_ML/sim_rec_tag.py -t imcal_e -n 100000
--particles "electron" --pmin 0.5 --pmax 10
ml_shower:tagging_pi :
extends: .rec_benchmark
when: manual
stage: benchmarks1
script:
- python3 benchmarks/imaging_shower_ML/sim_rec_tag.py -t imcal_pi -n 100000
--particles "pion-" --pmin 0.5 --pmax 10
ml_shower:training:
extends: .rec_benchmark
when: manual
stage: process
needs: ["ml_shower:tagging_epi", "ml_shower:tagging_e", "ml_shower:tagging_pi"]
script:
- ls -lrth
# TODO
ml_shower:test:
extends: .rec_benchmark
when: manual
stage: collect
needs: ["ml_shower:training"]
script:
- ls -lrth
# TODO
import os
import ROOT
from Gaudi.Configuration import *
from GaudiKernel import SystemOfUnits as units
from GaudiKernel.DataObjectHandleBase import DataObjectHandleBase
from Configurables import ApplicationMgr, EICDataSvc, PodioOutput, GeoSvc
from Configurables import PodioInput
from Configurables import Jug__Base__InputCopier_dd4pod__Geant4ParticleCollection_dd4pod__Geant4ParticleCollection_ as MCCopier
from Configurables import Jug__Digi__CalorimeterHitDigi as CalorimeterHitDigi
from Configurables import Jug__Reco__ImagingPixelReco as ImagingPixelReco
from Configurables import Jug__Reco__ImagingPixelMerger as ImagingPixelMerger
# input arguments through environment variables
kwargs = dict()
kwargs['input'] = os.environ.get('JUGGLER_SIM_FILE', '')
kwargs['output'] = os.environ.get('JUGGLER_REC_FILE', '')
kwargs['compact'] = os.environ.get('JUGGLER_COMPACT_PATH', '')
kwargs['nev'] = int(os.environ.get('JUGGLER_N_EVENTS', -1))
kwargs['nlayers'] = int(os.environ.get('IMCAL_ML_N_LAYERS', 20))
kwargs['nhits'] = int(os.environ.get('IMCAL_ML_N_HITS', 20))
if kwargs['nev'] < 1:
f = ROOT.TFile(kwargs['input'])
kwargs['nev'] = f.events.GetEntries()
print(kwargs)
# get sampling fraction from system environment variable, 1.0 by default
sf = float(os.environ.get('JUGGLER_SAMP_FRAC', '1.0'))
geo_service = GeoSvc('GeoSvc', detectors=[f.strip() for f in kwargs['compact'].split(',')])
podev = EICDataSvc('EventDataSvc', inputs=[f.strip() for f in kwargs['input'].split(',')])
podin = PodioInput('PodioReader', collections=['mcparticles', 'EcalBarrelHits'])
podout = PodioOutput('out', filename=kwargs['output'])
copier = MCCopier('MCCopier',
OutputLevel=DEBUG,
inputCollection='mcparticles',
outputCollection='mcparticles2')
imcaldigi = CalorimeterHitDigi('imcal_digi',
inputHitCollection='EcalBarrelHits',
outputHitCollection='EcalBarrelHitsDigi',
energyResolutions=[0., 0.02, 0.],
dynamicRangeADC=3*units.MeV,
pedestalSigma=40)
imcalreco = ImagingPixelReco('imcal_reco',
inputHitCollection='EcalBarrelHitsDigi',
outputHitCollection='EcalBarrelHitsReco',
dynamicRangeADC=3.*units.MeV,
pedestalSigma=40,
thresholdFactor=3.5,
readoutClass='EcalBarrelHits',
layerField='layer',
sectorField='module')
imcaldata = ImagingPixelMerger('imcal_merger',
# OutputLevel=DEBUG,
inputHitCollection='EcalBarrelHitsReco',
outputHitCollection='EcalBarrelHitsML',
etaSize=0.001,
phiSize=0.001,
numberOfHits=kwargs['nhits'],
numberOfLayers=kwargs['nlayers'])
podout.outputCommands = ['keep *']
ApplicationMgr(
TopAlg=[podin, copier, imcaldigi, imcalreco, imcaldata, podout],
EvtSel='NONE',
EvtMax=kwargs['nev'],
ExtSvc=[podev],
OutputLevel=DEBUG
)
import os
from pyHepMC3 import HepMC3 as hm
import numpy as np
import argparse
PARTICLES = {
"pion0": (111, 0.1349766), # pi0
"pion+": (211, 0.13957018), # pi+
"pion-": (-211, 0.13957018), # pi-
"kaon0": (311, 0.497648), # K0
"kaon+": (321, 0.493677), # K+
"kaon-": (-321, 0.493677), # K-
"proton": (2212, 0.938272), # proton
"neutron": (2112, 0.939565), # neutron
"electron": (11, 0.51099895e-3), # electron
"positron": (-11, 0.51099895e-3),# positron
"photon": (22, 0), # photon
}
def gen_event(p, theta, phi, pid, mass):
evt = hm.GenEvent(momentum_unit=hm.Units.MomentumUnit.GEV, length_unit=hm.Units.LengthUnit.MM)
# final state
state = 1
e0 = np.sqrt(p*p + mass*mass)
px = np.cos(phi)*np.sin(theta)
py = np.sin(phi)*np.sin(theta)
pz = np.cos(theta)
# beam
pbeam = hm.GenParticle(hm.FourVector(0, 0, 0, 0.938272), 2212, 4)
ebeam = hm.GenParticle(hm.FourVector(0, 0, e0, np.sqrt(e0*e0 + 0.511e-3*0.511e-3)), 11, 4)
# out particle
hout = hm.GenParticle(hm.FourVector(px*p, py*p, pz*p, e0), pid, state)
# vertex
vert = hm.GenVertex()
vert.add_particle_in(ebeam)
vert.add_particle_in(pbeam)
vert.add_particle_out(hout)
evt.add_vertex(vert)
return evt
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('output', help='path to the output file')
parser.add_argument('-n', type=int, default=1000, dest='nev', help='number of events to generate')
parser.add_argument('-s', type=int, default=-1, dest='seed', help='seed for random generator')
parser.add_argument('--parray', type=str, default="", dest='parray',
help='an array of momenta in GeV, separated by \",\"')
parser.add_argument('--pmin', type=float, default=8.0, dest='pmin', help='minimum momentum in GeV')
parser.add_argument('--pmax', type=float, default=100.0, dest='pmax', help='maximum momentum in GeV')
parser.add_argument('--angmin', type=float, default=0.0, dest='angmin', help='minimum angle in degree')
parser.add_argument('--angmax', type=float, default=20.0, dest='angmax', help='maximum angle in degree')
parser.add_argument('--phmin', type=float, default=0.0, dest='phmin', help='minimum angle in degree')
parser.add_argument('--phmax', type=float, default=360.0, dest='phmax', help='maximum angle in degree')
parser.add_argument('--particles', type=str, default='electron', dest='particles',
help='particle names, support {}'.format(list(PARTICLES.keys())))
args = parser.parse_args()
# random seed (< 0 will get it from enviroment variable 'SEED', or a system random number)
if args.seed < 0:
args.seed = os.environ.get('SEED', int.from_bytes(os.urandom(4), byteorder='big', signed=False))
print("Random seed is {}".format(args.seed))
np.random.seed(args.seed)
output = hm.WriterAscii(args.output);
if output.failed():
print("Cannot open file \"{}\"".format(args.output))
sys.exit(2)
# build particle info
parts = []
for pid in args.particles.split(','):
pid = pid.strip()
if pid not in PARTICLES.keys():
print('pid {:d} not found in dictionary, ignored.'.format(pid))
continue
parts.append(PARTICLES[pid])
# p values
pvals = np.random.uniform(args.pmin, args.pmax, args.nev) if not args.parray else \
np.random.choice([float(p.strip()) for p in args.parray.split(',')], args.nev)
thvals = np.random.uniform(args.angmin, args.angmax, args.nev)/180.*np.pi
phivals = np.random.uniform(args.phmin, args.phmax, args.nev)/180.*np.pi
partvals = [parts[i] for i in np.random.choice(len(parts), args.nev)]
count = 0
for p, theta, phi, (pid, mass) in zip(pvals, thvals, phivals, partvals):
if (count % 1000 == 0):
print("Generated {} events".format(count), end='\r')
evt = gen_event(p, theta, phi, pid, mass)
output.write_event(evt)
evt.clear()
count += 1
print("Generated {} events".format(args.nev))
output.close()
import ROOT
import os
import numpy as np
import pandas as pd
import argparse
import sys
# read hits data from root file
def get_ml_data(path, evnums=None, shape=(20, 20, 3), branch='EcalBarrelHitsML'):
f = ROOT.TFile(path)
events = f.events
if evnums is None:
evnums = np.arange(events.GetEntries())
elif isinstance(evnums, int):
evnums = [evnums]
dbuf = np.zeros(shape=np.hstack([len(evnums), shape]))
idb = 0
for iev in evnums:
if iev >= events.GetEntries():
print('Error: event {:d} is out of range (0 - {:d})'.format(iev, events.GetEntries() - 1))
continue
events.GetEntry(iev)
for hit in getattr(events, branch):
dbuf[iev][hit.layerID][hit.hitID] = (hit.edep, hit.eta, hit.polar.phi)
return dbuf
# execute this script
# read mc particles from root file
def get_mcp_simple(path, evnums=None, branch='mcparticles2'):
f = ROOT.TFile(path)
events = f.events
if evnums is None:
evnums = np.arange(events.GetEntries())
elif isinstance(evnums, int):
evnums = [evnums]
dbuf = np.zeros(shape=(len(evnums), 6))
idb = 0
for iev in evnums:
if iev >= events.GetEntries():
print('Error: event {:d} is out of range (0 - {:d})'.format(iev, events.GetEntries() - 1))
continue
events.GetEntry(iev)
# extract full mc particle data
part = getattr(events, branch)[2]
dbuf[idb] = (iev, part.psx, part.psy, part.psz, part.pdgID, part.status)
idb += 1
return pd.DataFrame(data=dbuf[:idb], columns=['event', 'px', 'py', 'pz', 'pid', 'status'])
# load root macros, input is an argument string
def load_root_macros(arg_macros):
for path in arg_macros.split(','):
path = path.strip()
if os.path.exists(path):
ROOT.gROOT.Macro(path)
else:
print('\"{}\" does not exist, skip loading it.'.format(path))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Visualize the cluster (layer-wise) from analysis')
parser.add_argument('file', type=str, help='path to root file')
parser.add_argument('-o', type=str, default='.', dest='outdir', help='output directory')
# parser.add_argument('--compact', type=str, default='', dest='compact', help='compact file')
parser.add_argument('-m', '--macros', type=str, default='rootlogon.C', dest='macros',
help='root macros to load (accept multiple paths separated by \",\")')
parser.add_argument('--topo-size', type=float, default=2.0, dest='topo_size',
help='bin size for projection plot (mrad)')
parser.add_argument('--topo-range', type=float, default=200.0, dest='topo_range',
help='half range for projection plot (mrad)')
parser.add_argument('--shape', type=str, default="20, 20, 3", dest='shape',
help='shape of data (3D), default (20, 20, 3)')
args = parser.parse_args()
os.makedirs(args.outdir, exist_ok=True)
load_root_macros(args.macros)
data = get_ml_data(args.file, shape=[int(i.strip()) for i in args.shape.split(',')])
dfmcp = get_mcp_simple(args.file, branch='mcparticles2')
# dfmcp = dfmcp[dfmcp['status'] == 24578]
dfmcp.loc[:, 'p'] = np.linalg.norm(dfmcp[['px', 'py', 'pz']].values, axis=1)
dfmcp['phi'] = np.arctan2(dfmcp['py'].values, dfmcp['px'].values)
dfmcp['theta'] = np.arccos(dfmcp['pz'].values/dfmcp['p'].values)
dfmcp['eta'] = -np.log(np.tan(dfmcp['theta'].values/2.))
tag = np.zeros(shape=(data.shape[0], 7))
trg = args.topo_range/1000.
for iev in np.arange(data.shape[0]):
ev = data[iev].transpose(2,0,1).reshape(3,-1)
truth = dfmcp[dfmcp['event'] == iev]
if not truth.shape[0]:
continue
mask = ev[0] > 0.
if not sum(mask):
continue
eta = ev[1][ev[0] > 0.]
phi = ev[2][ev[0] > 0.]
# normalize
# print(ev.T.reshape(20, 20, 3) == data[iev])
ev[0] /= 10.
# ev[1][mask] = (ev[1][mask] - eta.mean() + trg)/(2.*trg)
# ev[2][mask] = (ev[2][mask] - phi.mean() + trg)/(2.*trg)
ev[1][mask] = (ev[1][mask] - truth['eta'].values + trg)/(2.*trg)
ev[2][mask] = (ev[2][mask] - truth['phi'].values + trg)/(2.*trg)
# ev[1][mask] = (ev[1][mask] + 1.0)/2.
# ev[2][mask] = (ev[2][mask] + np.pi)/2./np.pi
out_range = (ev[1] > 1.) | (ev[1] < 0.) | (ev[2] > 1.) | (ev[2] < 0.)
ev.T[out_range] = (0., 0., 0.)
test = data[iev].transpose(2,0,1).reshape(3,-1)
tag[iev] = np.hstack([truth[['pid', 'eta', 'phi', 'p']].values[0], (eta.mean(), phi.mean(), trg)])
# print(ev[1])
# print((eta.shape, phi.shape), (eta.mean(), phi.mean()), truth[['eta', 'phi']].values)
# momentum
mask = tag.T[3] > 0.
print('training data shape: {}'.format(data[mask].shape))
print('tagging data shape: {}'.format(tag[mask].shape))
np.save(os.path.join(args.outdir, 'data.npy'), data[mask])
np.save(os.path.join(args.outdir, 'tag.npy'), tag[mask])
#! /usr/local/bin/python3
'''
A python script to facilitate the ML training benchmark for imaging Calorimeter:
simulation -> reconstruction -> prepare tagging data for tensorflow
Author: Chao Peng (ANL)
Date: 06/20/2021
'''
import os
import sys
import subprocess
import argparse
default_compact = os.path.join(os.environ.get('DETECTOR_PATH', ''),
'{}.xml'.format(os.environ.get('JUGGLER_DETECTOR', '')))
parser = argparse.ArgumentParser()
parser.add_argument('-n', '--numberOfEvents', dest='nev', type=int, default=100, help='Number of events to process.')
parser.add_argument('-t', '--nametag', type=str, default='IMCAL_ML', help='Name tag for output files.')
parser.add_argument('--seed', type=int, default=-1, help='Random seed to scripts.')
parser.add_argument('--process', type=str, default='sim, rec, tag', help='Processes to be executed (sim, rec, tag).')
parser.add_argument('--numberOfLayers', dest='nlayer', type=int, default=20, help='number of layers in ML data.')
parser.add_argument('--numberOfHits', dest='nhit', type=int, default=20, help='number of hits in ML data.')
parser.add_argument('--particles', type=str, default='electron', help='Partcile names, separated by \",\".')
parser.add_argument('--pmin', type=float, default=0.5, help='Minimum momentum of particles.')
parser.add_argument('--pmax', type=float, default=10, help='Maximum momentum of particles.')
parser.add_argument('--compact', type=str, default=default_compact, help='Path to detector compact file.')
args = parser.parse_args()
kwargs = vars(args)
for mdir in ['gen_data', 'sim_data', 'rec_data', 'tag_data']:
os.makedirs(mdir, exist_ok=True)
gen_file = os.path.join('gen_data', '{nametag}_{pmin}_{pmax}.hepmc'.format(**kwargs))
sim_file = os.path.join('sim_data', '{nametag}_{pmin}_{pmax}.root'.format(**kwargs))
rec_file = os.path.join('rec_data', '{nametag}_{pmin}_{pmax}.root'.format(**kwargs))
tag_dir = os.path.join('tag_data', '{nametag}_{pmin}_{pmax}'.format(**kwargs))
os.makedirs(tag_dir, exist_ok=True)
procs = [p.strip() for p in args.process.split(',')]
sdir = os.path.dirname(os.path.realpath(__file__))
if 'sim' in procs:
# generate particles
gen_cmd = ['python', os.path.join(sdir, 'scripts', 'gen_particles.py'), gen_file,
'-n', '{}'.format(args.nev),
'-s', '{}'.format(args.seed),
'--angmin', '50', '--angmax', '130',
'--pmin', '{}'.format(args.pmin), '--pmax', '{}'.format(args.pmax),
'--particles', args.particles]
subprocess.run(gen_cmd)
# simulation
sim_cmd = ['npsim',
'--part.minimalKineticEnergy', '1*TeV',
'--numberOfEvents', '{}'.format(args.nev),
'--runType', 'batch',
'--inputFiles', gen_file,
'--outputFile', sim_file,
'--compact', args.compact,
'-v', 'WARNING']
if args.seed > 0:
sim_cmd += ['--random.seed', args.seed]
return_code = subprocess.run(sim_cmd).returncode
print(return_code)
if return_code is not None and return_code < 0:
print("ERROR running simulation!")
exit(return_code)
subprocess.run(['rootls', '-t', sim_file])
if 'rec' in procs:
# export to environment variables (used to pass arguments to the option file)
os.environ['JUGGLER_SIM_FILE'] = sim_file
os.environ['JUGGLER_REC_FILE'] = rec_file
os.environ['JUGGLER_COMPACT_PATH'] = args.compact
os.environ['JUGGLER_N_EVENTS'] = '{}'.format(args.nev)
os.environ['IMCAL_ML_N_LAYERS'] = '{}'.format(args.nlayer)
os.environ['IMCAL_ML_N_HITS'] = '{}'.format(args.nhit)
juggler_xenv = os.path.join(os.environ.get('JUGGLER_INTALL_PREFIX', '../local'), 'Juggler.xenv')
rec_cmd = ['xenv', '-x', juggler_xenv, 'gaudirun.py', os.path.join(sdir, 'options', 'imaging_ml_data.py')]
return_code = subprocess.run(rec_cmd).returncode
print(return_code)
if return_code is not None and return_code < 0:
print("ERROR running juggler (reconstruction)!")
exit(return_code)
process = subprocess.run(['rootls', '-t', rec_file])
if 'tag' in procs:
tag_cmd = ['python', os.path.join(sdir, 'scripts', 'prepare_tf_dataset.py'), rec_file,
'-o', tag_dir,
'--shape', '{nlayer},{nhit},3'.format(**kwargs)]
return_code = subprocess.run(tag_cmd).returncode
print(return_code)
if return_code is not None and return_code < 0:
print("ERROR running ML data tagging!")
exit(return_code)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment