Skip to content
Snippets Groups Projects
Commit 43c01cc4 authored by Chao Peng's avatar Chao Peng
Browse files

Analysis script for checking merged hits for imaging barrel ecal

parent 34060d9f
Branches
Tags
1 merge request!279Analysis script for checking merged hits for imaging barrel ecal
...@@ -160,8 +160,8 @@ if has_ecal_barrel_scfi: ...@@ -160,8 +160,8 @@ if has_ecal_barrel_scfi:
#OutputLevel=DEBUG, #OutputLevel=DEBUG,
inputHitCollection=scfi_barrel_reco.outputHitCollection, inputHitCollection=scfi_barrel_reco.outputHitCollection,
outputHitCollection="EcalBarrelScFiGridReco", outputHitCollection="EcalBarrelScFiGridReco",
fields=["fiber"], fields=["fiber", "z"],
fieldRefNumbers=[1], fieldRefNumbers=[1, 1],
readoutClass="EcalBarrelScFiHits") readoutClass="EcalBarrelScFiHits")
algorithms.append(scfi_barrel_merger) algorithms.append(scfi_barrel_merger)
......
'''
A simple analysis script to check raw hits vs. merged hits
* ReadoutDecoder class solely depends on python xml parser and is thus much faster, but it may not work
if some of the features are not supported by the parser (but supported by dd4hep parser)
** dd4hep decoder can also be used (more reliable but much slower), check get_layer_ids.py for the code
Chao Peng (ANL)
'''
import os
import ROOT
import pandas as pd
import numpy as np
import argparse
from matplotlib import pyplot as plt
import matplotlib.ticker as ticker
from utils import ReadoutDecoder, flatten_collection
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('rec_file', help='Path to reconstruction output file.')
parser.add_argument('-o', dest='outdir', default='.', help='Output directory.')
parser.add_argument('-c', '--compact', dest='compact', required=True,
help='Top-level xml file of the detector description')
parser.add_argument('--raw-coll', dest='rcoll', required=True,
help='Raw hits collection name in the reconstruction file')
parser.add_argument('--merged-coll', dest='mcoll', required=True,
help='Merged hits collection name in the reconstruction file')
parser.add_argument('--readout', dest='readout', required=True,
help='Readout name for the hits collection')
parser.add_argument('--merged-fields', dest='mfields', required=True,
help='Merged fields (separated by \",\")')
args = parser.parse_args()
# decoder and merge mask
decoder = ReadoutDecoder(args.compact, args.readout)
mmask = np.uint64(0)
for field in args.mfields.split(','):
fmask = decoder.mask(field.strip())
mmask = np.bitwise_or(mmask, fmask)
print('{:20s} {:#066b}'.format('Field Mask - ' + field, fmask))
mmask = np.bitwise_not(mmask)
print('{:20s} {:#066b}'.format('Merging Mask', mmask))
# build dataframes from reconstructed root files
# get hits
columns = ['cellID', 'position.x', 'position.y', 'position.z', 'energy']
rdf_rec = ROOT.RDataFrame('events', args.rec_file)
# raw hits
dfr = flatten_collection(rdf_rec.Range(100), args.rcoll, columns)
dfr.rename(columns={c: c.replace(args.rcoll + '.', '') for c in dfr.columns}, inplace=True)
# merged hits
dfm = flatten_collection(rdf_rec, args.mcoll, columns)
dfm.rename(columns={c: c.replace(args.mcoll + '.', '') for c in dfm.columns}, inplace=True)
dfr.loc[:, 'masked_id'] = np.bitwise_and(mmask, dfr['cellID'])
dfm.loc[:, 'merged_id'] = np.bitwise_and(mmask, dfm['cellID'])
dfr = dfr.merge(dfm.groupby('event')['merged_id'].apply(np.array), on='event')
dfr.loc[:, 'raw_merge_num'] = dfr.apply(lambda r: np.count_nonzero(r.masked_id == r.merged_id), axis=1)
dfm = dfm.merge(dfr.groupby('event')['masked_id'].apply(np.array), on='event')
dfm.loc[:, 'merge_raw_num'] = dfm.apply(lambda r: np.count_nonzero(r.merged_id == r.masked_id), axis=1)
print(dfr[['masked_id', 'merged_id', 'raw_merge_num']])
print(dfm[['masked_id', 'merged_id', 'merge_raw_num']])
# dfr.to_csv('raw_hits.csv')
# dfm.to_csv('merged_hits.csv')
# merged hits (grid)
# dfm = pd.read_csv('merged_hits.csv', index_col=0)
ev_grpm = dfm.groupby('event')
# energy scaling (assume mean at 5 GeV)
escale = 5000./np.mean(ev_grpm['energy'].sum())
dfm['energy'] = dfm['energy']*escale
ev_ngrids = ev_grpm['energy'].size()
nev = len(ev_ngrids)
# number of grids per event
fig, ax = plt.subplots(figsize=(12, 9), dpi=160)
ax.hist(ev_ngrids, ec='k', bins=30, rwidth=1.0, weights=np.repeat(1./nev, nev))
ax.tick_params(labelsize=24)
ax.set_title('5.0 GeV $e^-$', fontsize=26)
ax.set_xlabel('# of Grids per Event', fontsize=24)
ax.set_ylabel('Normalized Counts', fontsize=24)
fig.savefig('grid_per_event.png')
# energy deposit per grid
fig, ax = plt.subplots(figsize=(12, 9), dpi=160)
ax.hist(dfm['energy'], ec='k', bins=30, rwidth=1.0, weights=np.repeat(1./len(dfm), len(dfm)))
ax.tick_params(labelsize=24)
ax.set_title('5.0 GeV $e^-$', fontsize=26)
ax.set_xlabel('Grid $E_{dep}$ (MeV)', fontsize=24)
ax.set_ylabel('Normalized Counts', fontsize=24)
fig.savefig('edep_per_grid.png')
# number of fired fibers per grid
fig, ax = plt.subplots(figsize=(12, 9), dpi=160)
ax.hist(dfm['merge_raw_num'], ec='k', bins=30, rwidth=1.0, weights=np.repeat(1./len(dfm), len(dfm)))
ax.tick_params(labelsize=24)
ax.set_title('5.0 GeV $e^-$', fontsize=26)
ax.set_xlabel('# of Fiber-Signals per Grid', fontsize=24)
ax.set_ylabel('Normalized Counts', fontsize=24)
fig.savefig('signals_per_grid.png')
# raw hits (fiber)
# dfr = pd.read_csv('raw_hits.csv', index_col=0)
dfr['energy'] = dfr['energy']*escale
ev_grpr = dfr.groupby('event')
ev_nfibers = ev_grpr['energy'].size()
nev = len(ev_nfibers)
# number of fiber per event
fig, ax = plt.subplots(figsize=(12, 9), dpi=160)
ax.hist(ev_nfibers, ec='k', bins=30, rwidth=1.0, weights=np.repeat(1./nev, nev))
ax.tick_params(labelsize=24)
ax.set_title('5.0 GeV $e^-$', fontsize=26)
ax.set_xlabel('# of Fibers per Event', fontsize=24)
ax.set_ylabel('Normalized Counts', fontsize=24)
fig.savefig('fiber_per_event.png')
# energy deposit per fiber
fig, ax = plt.subplots(figsize=(12, 9), dpi=160)
ax.hist(dfr['energy'], ec='k', bins=30, rwidth=1.0, weights=np.repeat(1./len(dfr), len(dfr)))
ax.tick_params(labelsize=24)
ax.set_title('5.0 GeV $e^-$', fontsize=26)
ax.set_xlabel('Fiber $E_{dep}$ (MeV)', fontsize=24)
ax.set_ylabel('Normalized Counts', fontsize=24)
fig.savefig('edep_per_fiber.png')
# number of fired fibers per grid
fig, ax = plt.subplots(figsize=(12, 9), dpi=160)
ax.hist(dfr['raw_merge_num'], ec='k', bins=np.arange(-0.5, 7.5, 1.0),
rwidth=1.0, weights=np.repeat(1./len(dfr), len(dfr)))
ax.tick_params(labelsize=24)
ax.set_title('5.0 GeV $e^-$', fontsize=26)
ax.set_xlabel('# of Grids per Fiber-Signal', fontsize=24)
ax.set_ylabel('Normalized Counts', fontsize=24)
fig.savefig('grids_per_fiber.png')
# build relations between two datasets
dfe = dfr.groupby(['event', 'masked_id'])['energy'].sum().reset_index()
print(dfe)
dfe.rename(columns={'energy':'energy_fibers'}, inplace=True)
dfem = dfm[['event', 'merged_id', 'energy']].rename(columns={'merged_id': 'masked_id'})
dfe = dfe.merge(dfem, on=['event', 'masked_id'])
fig, ax = plt.subplots(figsize=(12, 9), dpi=160)
ax.scatter(*dfe[['energy_fibers', 'energy']].values.T)
ax.tick_params(labelsize=24)
ax.set_title('5.0 GeV $e^-$ (100 events)', fontsize=26)
ax.set_xlabel('Fiber Group Energy Sum (MeV)', fontsize=24)
ax.set_ylabel('Grid Energy (MeV)', fontsize=24)
fig.savefig('fiber_grid_energy.png')
''' '''
A simple analysis script to extract some basic info of Monte-Carlo hits An example script showing how to get data from reconstructed root file and decode the cellID column
* Commented code uses dd4hep decoder which will take a significant amount of time for initialization
** ReadoutDecoder class solely depends on python xml parser and is thus much faster, but it may not work
if some of the features are not supported by the parser (but supported by dd4hep parser)
Chao Peng (ANL)
''' '''
import os import os
import ROOT import ROOT
...@@ -8,93 +12,7 @@ import numpy as np ...@@ -8,93 +12,7 @@ import numpy as np
import argparse import argparse
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
import matplotlib.ticker as ticker import matplotlib.ticker as ticker
from lxml import etree as ET from utils import ReadoutDecoder, flatten_collection
class AthenaDecoder:
def __init__(self, compact, readout):
self.readouts = self.getReadouts(compact)
self.changeReadout(readout)
def changeReadout(self, readout):
self.fieldsmap = self.decomposeIDs(self.readouts[readout])
def get(self, idvals, field):
start, width = self.fieldsmap[field]
if width >= 0:
return np.bitwise_and(np.right_shift(idvals, start), (1 << width) - 1)
# first bit is sign bit
else:
width = abs(width) - 1
vals = np.bitwise_and(np.right_shift(idvals, start), (1 << width) - 1)
return np.where(np.bitwise_and(np.right_shift(idvals, start + width), 1), vals - (1 << width), vals)
def decode(self, idvals):
return {field: self.get(idvals, field) for field, _ in self.fieldsmap.items()}
@staticmethod
def getReadouts(path):
res = dict()
AthenaDecoder.__getReadoutsRecur(path, res)
return res
@staticmethod
def __getReadoutsRecur(path, res):
if not os.path.exists(path):
print('Xml file {} not exist! Ignored it.'.format(path))
return
lccdd = ET.parse(path).getroot()
readouts = lccdd.find('readouts')
if readouts is not None:
for readout in readouts.getchildren():
ids = readout.find('id')
if ids is not None:
res[readout.attrib['name']] = ids.text
for child in lccdd.getchildren():
if child.tag == 'include':
root_dir = os.path.dirname(os.path.realpath(path))
AthenaDecoder.__getReadoutsRecur(os.path.join(root_dir, child.attrib['ref']), res)
@staticmethod
def decomposeIDs(id_str):
res = dict()
curr_bit = 0
for field_bits in id_str.split(','):
elements = field_bits.split(':')
field_name = elements[0]
bit_width = int(elements[-1])
if len(elements) == 3:
curr_bit = int(elements[1])
res[field_name] = (curr_bit, bit_width)
curr_bit += abs(bit_width)
return res
# read from RDataFrame and flatten a given collection, return pandas dataframe
def flatten_collection(rdf, collection, cols=None):
if not cols:
cols = [str(c) for c in rdf.GetColumnNames() if str(c).startswith('{}.'.format(collection))]
else:
cols = ['{}.{}'.format(collection, c) for c in cols]
if not cols:
print('cannot find any branch under collection {}'.format(collection))
return pd.DataFrame()
data = rdf.AsNumpy(cols)
# flatten the data, add an event id to identify clusters from different events
evns = []
for i, vec in enumerate(data[cols[0]]):
evns += [i]*vec.size()
for n, vals in data.items():
# make sure ints are not converted to floats
typename = vals[0].__class__.__name__.lower()
dtype = np.int64 if 'int' in typename or 'long' in typename else np.float64
# type safe creation
data[n] = np.asarray([v for vec in vals for v in vec], dtype=dtype)
# build data frame
dfp = pd.DataFrame({c: pd.Series(v) for c, v in data.items()})
dfp.loc[:, 'event'] = evns
return dfp
if __name__ == '__main__': if __name__ == '__main__':
...@@ -110,7 +28,7 @@ if __name__ == '__main__': ...@@ -110,7 +28,7 @@ if __name__ == '__main__':
args = parser.parse_args() args = parser.parse_args()
# decoder # decoder
decoder = AthenaDecoder(args.compact, args.readout) decoder = ReadoutDecoder(args.compact, args.readout)
# get hits # get hits
rdf_rec = ROOT.RDataFrame('events', args.rec_file) rdf_rec = ROOT.RDataFrame('events', args.rec_file)
...@@ -131,6 +49,6 @@ if __name__ == '__main__': ...@@ -131,6 +49,6 @@ if __name__ == '__main__':
# faster way to get layerids # faster way to get layerids
df.loc[:, 'layerID'] = decoder.get(df['cellID'].values, 'layer') df.loc[:, 'layerID'] = decoder.get(df['cellID'].values, 'layer')
df.loc[:, 'xID'] = decoder.get(df['cellID'].values, 'x') df.loc[:, 'zID'] = decoder.get(df['cellID'].values, 'z')
print(df[['cellID', 'layerID', 'xID', 'position.x', 'position.y', 'position.z', 'energy']]) print(df[['cellID', 'layerID', 'zID', 'position.x', 'position.y', 'position.z', 'energy']])
...@@ -16,8 +16,104 @@ import pandas as pd ...@@ -16,8 +16,104 @@ import pandas as pd
import matplotlib import matplotlib
import DDG4 import DDG4
from ROOT import gROOT, gInterpreter from ROOT import gROOT, gInterpreter
from lxml import etree as ET
class ReadoutDecoder:
def __init__(self, compact, readout):
self.readouts = self.getReadouts(compact)
self.changeReadout(readout)
def changeReadout(self, readout):
self.fieldsmap = self.decomposeIDs(self.readouts[readout])
def get(self, idvals, field):
start, width = self.fieldsmap[field]
if width >= 0:
return np.bitwise_and(np.right_shift(idvals, start), (1 << width) - 1)
# first bit is sign bit
else:
width = abs(width) - 1
vals = np.bitwise_and(np.right_shift(idvals, start), (1 << width) - 1)
return np.where(np.bitwise_and(np.right_shift(idvals, start + width), 1), vals - (1 << width), vals)
def mask(self, field):
start, width = self.fieldsmap[field]
return np.uint64((2**abs(width) - 1) << start)
def decode(self, idvals):
return {field: self.get(idvals, field) for field, _ in self.fieldsmap.items()}
@staticmethod
def getReadouts(path):
res = dict()
ReadoutDecoder.__getReadoutsRecur(path, res)
return res
@staticmethod
def __getReadoutsRecur(path, res):
if not os.path.exists(path):
print('Xml file {} not exist! Ignored it.'.format(path))
return
lccdd = ET.parse(path).getroot()
readouts = lccdd.find('readouts')
if readouts is not None:
for readout in readouts.getchildren():
ids = readout.find('id')
if ids is not None:
res[readout.attrib['name']] = ids.text
for child in lccdd.getchildren():
if child.tag == 'include':
root_dir = os.path.dirname(os.path.realpath(path))
ReadoutDecoder.__getReadoutsRecur(os.path.join(root_dir, child.attrib['ref']), res)
@staticmethod
def decomposeIDs(id_str):
res = dict()
curr_bit = 0
for field_bits in id_str.split(','):
elements = field_bits.split(':')
field_name = elements[0]
bit_width = int(elements[-1])
if len(elements) == 3:
curr_bit = int(elements[1])
res[field_name] = (curr_bit, bit_width)
curr_bit += abs(bit_width)
return res
# read from RDataFrame and flatten a given collection, return pandas dataframe
def flatten_collection(rdf, collection, cols=None):
if not cols:
cols = [str(c) for c in rdf.GetColumnNames() if str(c).startswith('{}.'.format(collection))]
else:
cols = ['{}.{}'.format(collection, c) for c in cols]
if not cols:
print('cannot find any branch under collection {}'.format(collection))
return pd.DataFrame()
# print(rdf.GetColumnNames())
data = rdf.AsNumpy(cols)
# flatten the data, add an event id to identify clusters from different events
evns = []
for i, vec in enumerate(data[cols[0]]):
evns += [i]*vec.size()
for n, vals in data.items():
# make sure ints are not converted to floats
typename = vals[0].__class__.__name__.lower()
# default
dtype = np.float64
if 'unsigned int' in typename or 'unsigned long' in typename:
dtype = np.uint64
elif 'int' in typename or 'long' in typename:
dtype = np.int64
# print(n, typename, dtype)
# type safe creation
data[n] = np.asarray([v for vec in vals for v in vec], dtype=dtype)
# build data frame
dfp = pd.DataFrame({c: pd.Series(v) for c, v in data.items()})
dfp.loc[:, 'event'] = evns
return dfp
# helper function to truncate color map (for a better view from the rainbow colormap) # helper function to truncate color map (for a better view from the rainbow colormap)
def truncate_colormap(cmap, minval=0.0, maxval=1.0, n=100): def truncate_colormap(cmap, minval=0.0, maxval=1.0, n=100):
new_cmap = matplotlib.colors.LinearSegmentedColormap.from_list( new_cmap = matplotlib.colors.LinearSegmentedColormap.from_list(
...@@ -106,7 +202,7 @@ def get_all_mcp(path, evnums=None, branch='MCParticles'): ...@@ -106,7 +202,7 @@ def get_all_mcp(path, evnums=None, branch='MCParticles'):
for ptl in getattr(events, branch): for ptl in getattr(events, branch):
dbuf[idb] = (iev, ptl.momentum.x, ptl.momentum.y, ptl.momentum.z, ptl.PDG, ptl.simulatorStatus, ptl.endpoint.x, ptl.endpoint.y, ptl.endpoint.z) dbuf[idb] = (iev, ptl.momentum.x, ptl.momentum.y, ptl.momentum.z, ptl.PDG, ptl.simulatorStatus, ptl.endpoint.x, ptl.endpoint.y, ptl.endpoint.z)
idb += 1 idb += 1
return pd.DataFrame(data=dbuf[:idb], columns=['event', 'px', 'py', 'pz', 'pid', 'status', 'vex', 'vey', 'vez']) return pd.DataFrame(data=dbuf[:idb], columns=['event', 'px', 'py', 'pz', 'pid', 'status', 'vex', 'vey', 'vez'])
# read hits data from root file # read hits data from root file
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment