Skip to content
Snippets Groups Projects
Commit 3842b0d4 authored by Chao Peng's avatar Chao Peng
Browse files

update for latest benchmarks

parent c528fa60
No related tags found
1 merge request!201Update ECal Barrel ML: combine Imaging and ScFi layers
......@@ -8,6 +8,7 @@ from GaudiKernel.SystemOfUnits import MeV, GeV, mm, cm, mrad
from Configurables import Jug__Base__InputCopier_dd4pod__Geant4ParticleCollection_dd4pod__Geant4ParticleCollection_ as MCCopier
from Configurables import Jug__Digi__CalorimeterHitDigi as CalHitDigi
from Configurables import Jug__Digi__CalorimeterBirksCorr as CalBirksCorr
from Configurables import Jug__Reco__CalorimeterHitReco as CalHitReco
from Configurables import Jug__Reco__CalorimeterHitsMerger as CalHitsMerger
from Configurables import Jug__Reco__ImagingPixelMerger as MLDataMerger
......@@ -20,8 +21,10 @@ kwargs = dict()
kwargs['input'] = os.environ.get('JUGGLER_SIM_FILE', '')
kwargs['output'] = os.environ.get('JUGGLER_REC_FILE', '')
kwargs['compact'] = os.environ.get('JUGGLER_COMPACT_PATH', '')
kwargs['combine'] = os.environ.get('IMCAL_ML_COMBINE', 'concatenate')
kwargs['nev'] = int(os.environ.get('JUGGLER_N_EVENTS', -1))
kwargs['combine'] = os.environ.get('IMCAL_ML_COMBINE', 'concatenate')
kwargs['img_nlayers'] = int(os.environ.get('IMCAL_ML_IMG_NLAYERS', 9))
kwargs['nhits'] = int(os.environ.get('IMCAL_ML_NHITS', 20))
if kwargs['nev'] < 1:
f = ROOT.TFile(kwargs['input'])
......@@ -72,8 +75,8 @@ becal_img_merger = MLDataMerger('becal_img_merger',
becal_img_sorter = MLDataSorter('becal_img_sorter',
inputHitCollection=becal_img_merger.outputHitCollection,
outputHitCollection='EcalBarrelImagingHitsML',
numberOfLayers=9,
numberOfHits=50)
numberOfLayers=kwargs['img_nlayers'],
numberOfHits=kwargs['nhits'])
#Central ECAL SciFi
......@@ -83,9 +86,20 @@ becal_scfi_daq = dict(
capacityADC=32768,
pedestalMean=400,
pedestalSigma=10)
# becal_scfi_daq = dict(
# dynamicRangeADC=50.*MeV,
# capacityADC=2**16,
# pedestalMean=100,
# pedestalSigma=0)
becal_scfi_digi = CalHitDigi('becal_scfi_digi',
becal_scfi_birks = CalBirksCorr('becal_scfi_birks',
inputHitCollection='EcalBarrelScFiHits',
outputHitCollection='EcalBarrelScFiHitsCorr',
birksConstant=0.126*mm/MeV)
becal_scfi_digi = CalHitDigi('becal_scfi_digi',
inputHitCollection=becal_scfi_birks.outputHitCollection,
# inputHitCollection='EcalBarrelScFiHits',
outputHitCollection='EcalBarrelScFiHitsDigi',
**becal_scfi_daq)
......@@ -111,8 +125,8 @@ becal_scfi_merger = CalHitsMerger('becal_scfi_merger',
becal_scfi_sorter = MLDataSorter('becal_scfi_sorter',
inputHitCollection=becal_scfi_merger.outputHitCollection,
outputHitCollection='EcalBarrelScFiHitsML',
numberOfLayers=9,
numberOfHits=50)
numberOfLayers=20,
numberOfHits=kwargs['nhits'])
# combine layers
becal_combiner = MLDataCombiner('becal_combiner',
......@@ -123,13 +137,17 @@ becal_combiner = MLDataCombiner('becal_combiner',
rule=kwargs['combine'])
podout.outputCommands = [
# 'keep *',
'drop *',
'keep mcparticles2',
'keep EcalBarrel*ML']
'keep EcalBarrel*ML',
'keep *Corr',
]
ApplicationMgr(
TopAlg=[podin, copier,
becal_img_digi, becal_img_reco, becal_img_merger, becal_img_sorter,
becal_scfi_birks,
becal_scfi_digi, becal_scfi_reco, becal_scfi_merger, becal_scfi_sorter,
becal_combiner,
podout],
......
import ROOT
import os
import gc
import numpy as np
import pandas as pd
import argparse
......@@ -97,8 +98,10 @@ if __name__ == '__main__':
dfm.loc[:, 'pT'] = pT
dfm.loc[:, 'eta'] = eta
# energy unit is in GeV, this put 1.0 = 5 MeV, which is enough for imaging pixels
df.loc[:, 'energy'] = df['energy']*200
# energy unit is in GeV, converting to MeV first
df.loc[:, 'eMeV'] = df['energy']*1000
# scaled by 5 MeV to have most of them in [0, 1]
df.loc[:, 'energy'] = df['eMeV']/5.
# convert eta, phi, edep so they are within [0, 1]
df.loc[:, 'etotal'] = df.groupby('event')['energy'].transform('sum')
df = df[df['etotal'] > 0.]
......@@ -134,6 +137,7 @@ if __name__ == '__main__':
for col in nullcols:
df.loc[:, col] = 0.
gc.collect()
# features and data-shape
featcols = [f.strip() for f in args.features.split(',')]
dshape = [int(i.strip()) for i in args.shape.split(',')]
......@@ -142,7 +146,10 @@ if __name__ == '__main__':
event_ids = df['event'].unique()
data = df.set_index('event')[featcols].clip(0, 1).values.reshape([len(event_ids)] + dshape)
tags = dfm.loc[event_ids, ['pdgID', 'p', 'pT', 'eta', 'phi', 'mass']]
tags.loc[:, 'Emeas'] = event_group['energy'].sum()
# also save Edep per layers
# merge the sandwich layers
df['layer'] = df['layer'] % 100
edeps = df.groupby(['event', 'layer'])['eMeV'].sum().unstack()
""" TEST
from matplotlib import pyplot as plt
......@@ -155,7 +162,9 @@ if __name__ == '__main__':
"""
print('Training data set shape = {}'.format(data.shape))
print('Tagging data set shape = {}'.format(tags.shape))
print('Edep/layer data set shape = {}'.format(edeps.shape))
np.save(os.path.join(args.outdir, 'data.npy'), data)
np.save(os.path.join(args.outdir, 'tag.npy'), tags.values)
np.save(os.path.join(args.outdir, 'edep.npy'), edeps.values)
import ROOT
import os
import gc
import numpy as np
import pandas as pd
import argparse
import sys
import matplotlib.pyplot as plt
# read from RDataFrame and flatten a given collection, return pandas dataframe
def flatten_collection(rdf, collection, cols=None, event_colname='event'):
if not cols:
cols = [str(c) for c in rdf.GetColumnNames() if str(c).startswith('{}.'.format(collection))]
else:
cols = ['{}.{}'.format(collection, c) for c in cols]
if not cols:
print('cannot find any branch under collection {}'.format(collection))
return pd.DataFrame()
data = rdf.AsNumpy(cols)
# flatten the data, add an event id to identify clusters from different events
evns = []
for i, vec in enumerate(data[cols[0]]):
evns += [i]*vec.size()
for n, vals in data.items():
# make sure ints are not converted to floats
typename = vals[0].__class__.__name__.lower()
dtype = np.int64 if 'int' in typename or 'long' in typename else np.float32
# type safe creation
data[n] = np.asarray([v for vec in vals for v in vec], dtype=dtype)
# build data frame
dfp = pd.DataFrame({c: pd.Series(v) for c, v in data.items()})
dfp.loc[:, event_colname] = evns
return dfp
# load root macros, input is an argument string
def load_root_macros(arg_macros):
for path in arg_macros.split(','):
path = path.strip()
if os.path.exists(path):
ROOT.gROOT.Macro(path)
else:
print('\"{}\" does not exist, skip loading it.'.format(path))
def cartesian_to_polar(x, y, z):
r = np.sqrt(x**2 + y**2 + z**2)
rc = np.sqrt(x**2 + y**2)
theta = np.arccos(z / r)
phi = np.arctan2(y, x)
eta = -np.log(np.tan(theta / 2.))
return r, theta, phi, rc, eta
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Visualize the cluster (layer-wise) from analysis')
parser.add_argument('file', type=str, help='path to root file')
parser.add_argument('--sim', action='store_true', help='flag switch between sim and rec data (default rec)')
parser.add_argument('-o', type=str, default='.', dest='outdir', help='output directory')
# parser.add_argument('--compact', type=str, default='', dest='compact', help='compact file')
parser.add_argument('-m', '--macros', type=str, default='rootlogon.C', dest='macros',
help='root macros to load (accept multiple paths separated by \",\")')
parser.add_argument('--topo-range', type=float, default=1000.0, dest='topo_range',
help='half range for projection plot (mrad)')
parser.add_argument('--shape', type=str, default='20, 20', dest='shape',
help='shape of data (Nlayer, Nhits), default (20, 20)')
parser.add_argument('--features', type=str, default='energy, eta, phi',
help='features of hit data, default (eta, phi, energy)')
parser.add_argument('--branch', type=str, default='EcalBarrelImagingHitsML', help='name of data branch')
parser.add_argument('--edep-max', type=float, default=0., help='maximum edep (GeV) to plot')
parser.add_argument('--edep-nbins', type=int, default=200, help='number of bins')
parser.add_argument('--null-columns', type=str, default='', help='make the columns zeros')
parser.add_argument('--name-tag', type=str, default='test', help='name tag to save the file')
parser.add_argument('--samp', type=float, default=0.103, help='sampling fraction')
args = parser.parse_args()
os.makedirs(args.outdir, exist_ok=True)
load_root_macros(args.macros)
# read data and mcparticles
rdf = ROOT.RDataFrame("events", args.file)
mc_branch = 'mcparticles2' if args.sim else 'mcparticles2'
dfm = flatten_collection(rdf, mc_branch, ['genStatus', 'pdgID', 'ps.x', 'ps.y', 'ps.z', 'mass'])
dfm.rename(columns={c: c.replace(mc_branch + '.', '') for c in dfm.columns}, inplace=True)
# selete incident particles
dfm = dfm[dfm['genStatus'].isin([0, 1])]
# NOTE: assumed single particles
dfm = dfm.groupby('event').first()
# p, theta, phi, pT, eta = cartesian_to_polar(*dfm[['ps.x', 'ps.y', 'ps.z']].values.T)
pions_events = dfm[dfm.loc[:, 'pdgID'] == -211].index.unique()
els_events = dfm[dfm.loc[:, 'pdgID'] == 11].index.unique()
print(len(pions_events), len(els_events))
if args.sim:
df = flatten_collection(rdf, args.branch, ['energyDeposit'])
df.rename(columns={c: c.replace(args.branch + '.', '') for c in df.columns}, inplace=True)
df.rename(columns={'energyDeposit': 'energy'}, inplace=True)
else:
df = flatten_collection(rdf, args.branch, ['layer', 'energy', 'position.x', 'position.y', 'position.z'])
df.rename(columns={c: c.replace(args.branch + '.', '') for c in df.columns}, inplace=True)
dfe = df.groupby('event')['energy'].sum().reset_index()
dfe_pions = dfe.loc[dfe['event'].isin(pions_events)]
dfe_els = dfe.loc[dfe['event'].isin(els_events)]
fig, ax = plt.subplots(figsize=(16, 9), dpi=120)
if args.edep_max <= 0.:
args.edep_max = dfe['energy'].quantile(0.99)*1.2
bins = np.linspace(0., args.edep_max, args.edep_nbins + 1)
scale = 1. / args.samp
pion_vals, _, _ = ax.hist(dfe_pions['energy']*scale,
weights=[1./dfe_pions.shape[0]]*dfe_pions.shape[0],
# density=True,
histtype='step', bins=bins, label='pions')
el_vals, _, _ = ax.hist(dfe_els['energy']*scale,
weights=[1./dfe_els.shape[0]]*dfe_els.shape[0],
# density=True,
histtype='step', bins=bins, label='electrons')
bincenters = (bins[1:] + bins[:-1])/2.
pd.DataFrame(index=bincenters, data=np.vstack([pion_vals, el_vals]).T, columns=['pions', 'electrons'])\
.to_csv('{}.csv'.format(args.name_tag))
ax.set_xlabel('Energy Deposit (GeV)', fontsize=24)
ax.set_ylabel('Normalized Counts', fontsize=24)
ax.set_yscale('log')
ax.grid(linestyle=':')
ax.legend(fontsize=24)
ax.tick_params(labelsize=24)
fig.savefig('{}.png'.format(args.name_tag))
......@@ -18,13 +18,14 @@ parser.add_argument('-n', '--numberOfEvents', dest='nev', type=int, default=100,
parser.add_argument('-t', '--nametag', type=str, default='IMCAL_ML', help='Name tag for output files.')
parser.add_argument('--seed', type=int, default=-1, help='Random seed to scripts.')
parser.add_argument('--process', type=str, default='sim, rec, tag', help='Processes to be executed (sim, rec, tag).')
parser.add_argument('--numberOfLayers', dest='nlayer', type=int, default=9, help='number of layers in ML data.')
parser.add_argument('--numberOfHits', dest='nhit', type=int, default=50, help='number of hits in ML data.')
parser.add_argument('--nlayers', dest='nlayers', type=int, default=9, help='number of layers in ML data.')
parser.add_argument('--nhits', dest='nhits', type=int, default=20, help='number of hits in ML data.')
parser.add_argument('--particles', type=str, default='electron', help='Partcile names, separated by \",\".')
parser.add_argument('--pmin', type=float, default=0.5, help='Minimum momentum of particles.')
parser.add_argument('--pmax', type=float, default=10, help='Maximum momentum of particles.')
parser.add_argument('--compact', type=str, default=default_compact, help='Path to detector compact file.')
parser.add_argument('--combine-method', type=str, default='interlayer', help='Path to detector compact file.')
parser.add_argument('--physics-list', type=str, default='FTFP_BERT', help='Path to detector compact file.')
args = parser.parse_args()
kwargs = vars(args)
......@@ -46,7 +47,7 @@ if 'sim' in procs:
gen_cmd = ['python', os.path.join(sdir, 'scripts', 'gen_particles.py'), gen_file,
'-n', '{}'.format(args.nev),
'-s', '{}'.format(args.seed),
'--angmin', '50', '--angmax', '130',
'--angmin', '45', '--angmax', '135',
'--pmin', '{}'.format(args.pmin), '--pmax', '{}'.format(args.pmax),
'--particles', args.particles]
subprocess.run(gen_cmd)
......@@ -55,6 +56,7 @@ if 'sim' in procs:
'--part.minimalKineticEnergy', '1*TeV',
'--numberOfEvents', '{}'.format(args.nev),
'--runType', 'batch',
# '--physics.list', args.physics_list,
'--inputFiles', gen_file,
'--outputFile', sim_file,
'--compact', args.compact,
......@@ -71,20 +73,23 @@ if 'sim' in procs:
if 'rec' in procs:
# export to environment variables (used to pass arguments to the option file)
os.environ['JUGGLER_SIM_FILE'] = sim_file
os.environ['JUGGLER_REC_FILE'] = rec_file
os.environ['JUGGLER_COMPACT_PATH'] = args.compact
os.environ['JUGGLER_N_EVENTS'] = '{}'.format(args.nev)
os.environ['IMCAL_ML_N_LAYERS'] = '{}'.format(args.nlayer)
os.environ['IMCAL_ML_N_HITS'] = '{}'.format(args.nhit)
os.environ['IMCAL_ML_COMBINE'] = '{}'.format(args.combine_method)
run_env = os.environ.copy()
run_env.update({
'JUGGLER_SIM_FILE': sim_file,
'JUGGLER_REC_FILE': rec_file,
'JUGGLER_COMPACT_PATH': args.compact,
'JUGGLER_N_EVENTS': str(args.nev),
'IMCAL_ML_IMG_NLAYERS': str(args.nlayers),
'IMCAL_ML_NHITS': str(args.nhits),
'IMCAL_ML_COMBINE': str(args.combine_method),
})
juggler_xenv = os.path.join(os.environ.get('JUGGLER_INTALL_PREFIX', '../local'), 'Juggler.xenv')
rec_cmd = [
'xenv', '-x', juggler_xenv, # v35+ do not need xenv anymore
'gaudirun.py', os.path.join(sdir, 'options', 'imaging_ml_data.py')
]
return_code = subprocess.run(rec_cmd).returncode
return_code = subprocess.run(rec_cmd, env=run_env).returncode
print(return_code)
if return_code is not None and return_code < 0:
print("ERROR running juggler (reconstruction)!")
......@@ -98,7 +103,7 @@ if 'tag' in procs:
tag_cmd = ['python', os.path.join(sdir, 'scripts', 'prepare_tf_dataset.py'), rec_file,
'-o', tag_dir,
'--branch', 'EcalBarrelImagingHitsML',
'--shape', '{nlayer}, {nhit}'.format(**kwargs),
'--shape', '{nlayers}, {nhits}'.format(**kwargs),
'--features', 'energy, rc, eta, phi',
]
return_code = subprocess.run(tag_cmd).returncode
......@@ -111,8 +116,8 @@ if 'tag' in procs:
tag_cmd = ['python', os.path.join(sdir, 'scripts', 'prepare_tf_dataset.py'), rec_file,
'-o', tag_dir + '_scfi',
'--branch', 'EcalBarrelScFiHitsML',
'--shape', '{nlayer}, {nhit}'.format(**kwargs),
# '--shape', '{}, {}'.format(20 - kwargs['nlayer'], kwargs['nhit']),
'--shape', '{nlayers}, {nhits}'.format(**kwargs),
# '--shape', '{}, {}'.format(20 - kwargs['nlayers'], kwargs['nhits']),
'--features', 'energy, rc, eta, phi',
'--null-columns', 'eta',
]
......@@ -126,7 +131,7 @@ if 'tag' in procs:
tag_cmd = ['python', os.path.join(sdir, 'scripts', 'prepare_tf_dataset.py'), rec_file,
'-o', tag_dir + '_combined',
'--branch', 'EcalBarrelHitsCombinedML',
'--shape', '{}, {}'.format(kwargs['nlayer']*2, kwargs['nhit']),
'--shape', '{}, {}'.format(20 + kwargs['nlayers'], kwargs['nhits']),
'--features', 'layer_type, energy, rc, eta, phi',
]
return_code = subprocess.run(tag_cmd).returncode
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment