Skip to content
Snippets Groups Projects

add benchmark for pion0

Merged Chao Peng requested to merge add_pi0_benchmark into master
1 file
+ 72
0
Compare changes
  • Side-by-side
  • Inline
+ 136
0
'''
A simple analysis script to extract some basic info of Monte-Carlo hits
'''
import os
import ROOT
import pandas as pd
import numpy as np
import argparse
from matplotlib import pyplot as plt
import matplotlib.ticker as ticker
from lxml import etree as ET
class AthenaDecoder:
def __init__(self, compact, readout):
self.readouts = self.getReadouts(compact)
self.changeReadout(readout)
def changeReadout(self, readout):
self.fieldsmap = self.decomposeIDs(self.readouts[readout])
def get(self, idvals, field):
start, width = self.fieldsmap[field]
if width >= 0:
return np.bitwise_and(np.right_shift(idvals, start), (1 << width) - 1)
# first bit is sign bit
else:
width = abs(width) - 1
vals = np.bitwise_and(np.right_shift(idvals, start), (1 << width) - 1)
return np.where(np.bitwise_and(np.right_shift(idvals, start + width), 1), vals - (1 << width), vals)
def decode(self, idvals):
return {field: self.get(idvals, field) for field, _ in self.fieldsmap.items()}
@staticmethod
def getReadouts(path):
res = dict()
AthenaDecoder.__getReadoutsRecur(path, res)
return res
@staticmethod
def __getReadoutsRecur(path, res):
if not os.path.exists(path):
print('Xml file {} not exist! Ignored it.'.format(path))
return
lccdd = ET.parse(path).getroot()
readouts = lccdd.find('readouts')
if readouts is not None:
for readout in readouts.getchildren():
ids = readout.find('id')
if ids is not None:
res[readout.attrib['name']] = ids.text
for child in lccdd.getchildren():
if child.tag == 'include':
root_dir = os.path.dirname(os.path.realpath(path))
AthenaDecoder.__getReadoutsRecur(os.path.join(root_dir, child.attrib['ref']), res)
@staticmethod
def decomposeIDs(id_str):
res = dict()
curr_bit = 0
for field_bits in id_str.split(','):
elements = field_bits.split(':')
field_name = elements[0]
bit_width = int(elements[-1])
if len(elements) == 3:
curr_bit = int(elements[1])
res[field_name] = (curr_bit, bit_width)
curr_bit += abs(bit_width)
return res
# read from RDataFrame and flatten a given collection, return pandas dataframe
def flatten_collection(rdf, collection, cols=None):
if not cols:
cols = [str(c) for c in rdf.GetColumnNames() if str(c).startswith('{}.'.format(collection))]
else:
cols = ['{}.{}'.format(collection, c) for c in cols]
if not cols:
print('cannot find any branch under collection {}'.format(collection))
return pd.DataFrame()
data = rdf.AsNumpy(cols)
# flatten the data, add an event id to identify clusters from different events
evns = []
for i, vec in enumerate(data[cols[0]]):
evns += [i]*vec.size()
for n, vals in data.items():
# make sure ints are not converted to floats
typename = vals[0].__class__.__name__.lower()
dtype = np.int64 if 'int' in typename or 'long' in typename else np.float64
# type safe creation
data[n] = np.asarray([v for vec in vals for v in vec], dtype=dtype)
# build data frame
dfp = pd.DataFrame({c: pd.Series(v) for c, v in data.items()})
dfp.loc[:, 'event'] = evns
return dfp
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('rec_file', help='Path to reconstruction output file.')
parser.add_argument('-o', dest='outdir', default='.', help='Output directory.')
parser.add_argument('-c', '--compact', dest='compact', required=True,
help='Top-level xml file of the detector description')
parser.add_argument('--collection', dest='coll', required=True,
help='Hits collection name in the reconstruction file')
parser.add_argument('--readout', dest='readout', required=True,
help='Readout name for the hits collection')
args = parser.parse_args()
# decoder
decoder = AthenaDecoder(args.compact, args.readout)
# get hits
rdf_rec = ROOT.RDataFrame('events', args.rec_file)
df = flatten_collection(rdf_rec, args.coll)
df.rename(columns={c: c.replace(args.coll + '.', '') for c in df.columns}, inplace=True)
# initialize dd4hep detector
# import DDG4
# kernel = DDG4.Kernel()
# description = kernel.detectorDescription()
# kernel.loadGeometry("file:{}".format(args.compact))
# dd4hep_decoder = description.readout(args.readout).idSpec().decoder()
# lindex = dd4hep_decoder.index('x')
# get_layer_id = np.vectorize(lambda cid: dd4hep_decoder.get(cid, lindex))
# df.loc[:, 'layerID'] = get_layer_id(df['cellID'].astype(int).values)
# always terminate dd4hep kernel
# kernel.terminate()
# faster way to get layerids
df.loc[:, 'layerID'] = decoder.get(df['cellID'].values, 'layer')
df.loc[:, 'xID'] = decoder.get(df['cellID'].values, 'x')
print(df[['cellID', 'layerID', 'xID', 'position.x', 'position.y', 'position.z', 'energy']])
Loading