Chao Peng
--- a/benchmarks/imaging_ecal/scripts/get_layerids.py 0 → 100644

+ 136

− 0
+++ b/benchmarks/imaging_ecal/scripts/get_layerids.py 0 → 100644

+ 136

− 0
+'''
+    A simple analysis script to extract some basic info of Monte-Carlo hits
+'''
+import os
+import ROOT
+import pandas as pd
+import numpy as np
+import argparse
+from matplotlib import pyplot as plt
+import matplotlib.ticker as ticker
+from lxml import etree as ET
+
+
+class AthenaDecoder:
+    def __init__(self, compact, readout):
+        self.readouts = self.getReadouts(compact)
+        self.changeReadout(readout)
+
+    def changeReadout(self, readout):
+        self.fieldsmap = self.decomposeIDs(self.readouts[readout])
+
+    def get(self, idvals, field):
+        start, width = self.fieldsmap[field]
+        if width >= 0:
+            return np.bitwise_and(np.right_shift(idvals, start), (1 << width) - 1)
+        # first bit is sign bit
+        else:
+            width = abs(width) - 1
+            vals = np.bitwise_and(np.right_shift(idvals, start), (1 << width) - 1)
+            return np.where(np.bitwise_and(np.right_shift(idvals, start + width), 1), vals - (1 << width), vals)
+
+    def decode(self, idvals):
+        return {field: self.get(idvals, field) for field, _ in self.fieldsmap.items()}
+
+    @staticmethod
+    def getReadouts(path):
+        res = dict()
+        AthenaDecoder.__getReadoutsRecur(path, res)
+        return res
+
+    @staticmethod
+    def __getReadoutsRecur(path, res):
+        if not os.path.exists(path):
+            print('Xml file {} not exist! Ignored it.'.format(path))
+            return
+        lccdd = ET.parse(path).getroot()
+        readouts = lccdd.find('readouts')
+        if readouts is not None:
+            for readout in readouts.getchildren():
+                ids = readout.find('id')
+                if ids is not None:
+                    res[readout.attrib['name']] = ids.text
+        for child in lccdd.getchildren():
+            if child.tag == 'include':
+                root_dir = os.path.dirname(os.path.realpath(path))
+                AthenaDecoder.__getReadoutsRecur(os.path.join(root_dir, child.attrib['ref']), res)
+
+    @staticmethod
+    def decomposeIDs(id_str):
+        res = dict()
+        curr_bit = 0
+        for field_bits in id_str.split(','):
+            elements = field_bits.split(':')
+            field_name = elements[0]
+            bit_width = int(elements[-1])
+            if len(elements) == 3:
+                curr_bit = int(elements[1])
+            res[field_name] = (curr_bit, bit_width)
+            curr_bit += abs(bit_width)
+        return res
+
+
+# read from RDataFrame and flatten a given collection, return pandas dataframe
+def flatten_collection(rdf, collection, cols=None):
+    if not cols:
+        cols = [str(c) for c in rdf.GetColumnNames() if str(c).startswith('{}.'.format(collection))]
+    else:
+        cols = ['{}.{}'.format(collection, c) for c in cols]
+    if not cols:
+        print('cannot find any branch under collection {}'.format(collection))
+        return pd.DataFrame()
+
+    data = rdf.AsNumpy(cols)
+    # flatten the data, add an event id to identify clusters from different events
+    evns = []
+    for i, vec in enumerate(data[cols[0]]):
+        evns += [i]*vec.size()
+    for n, vals in data.items():
+        # make sure ints are not converted to floats
+        typename = vals[0].__class__.__name__.lower()
+        dtype = np.int64 if 'int' in typename or 'long' in typename else np.float64
+        # type safe creation
+        data[n] = np.asarray([v for vec in vals for v in vec], dtype=dtype)
+    # build data frame
+    dfp = pd.DataFrame({c: pd.Series(v) for c, v in data.items()})
+    dfp.loc[:, 'event'] = evns
+    return dfp
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('rec_file', help='Path to reconstruction output file.')
+    parser.add_argument('-o', dest='outdir', default='.', help='Output directory.')
+    parser.add_argument('-c', '--compact', dest='compact', required=True,
+            help='Top-level xml file of the detector description')
+    parser.add_argument('--collection', dest='coll', required=True,
+            help='Hits collection name in the reconstruction file')
+    parser.add_argument('--readout', dest='readout', required=True,
+            help='Readout name for the hits collection')
+    args = parser.parse_args()
+
+    # decoder
+    decoder = AthenaDecoder(args.compact, args.readout)
+
+    # get hits
+    rdf_rec = ROOT.RDataFrame('events', args.rec_file)
+    df = flatten_collection(rdf_rec, args.coll)
+    df.rename(columns={c: c.replace(args.coll + '.', '') for c in df.columns}, inplace=True)
+
+    # initialize dd4hep detector
+    # import DDG4
+    # kernel = DDG4.Kernel()
+    # description = kernel.detectorDescription()
+    # kernel.loadGeometry("file:{}".format(args.compact))
+    # dd4hep_decoder = description.readout(args.readout).idSpec().decoder()
+    # lindex = dd4hep_decoder.index('x')
+    # get_layer_id = np.vectorize(lambda cid: dd4hep_decoder.get(cid, lindex))
+    # df.loc[:, 'layerID'] = get_layer_id(df['cellID'].astype(int).values)
+    # always terminate dd4hep kernel
+    # kernel.terminate()
+
+    # faster way to get layerids
+    df.loc[:, 'layerID'] = decoder.get(df['cellID'].values, 'layer')
+    df.loc[:, 'xID'] = decoder.get(df['cellID'].values, 'x')
+    print(df[['cellID', 'layerID', 'xID', 'position.x', 'position.y', 'position.z', 'energy']])
+