Skip to content
Snippets Groups Projects
Commit 62751498 authored by Chao Peng's avatar Chao Peng
Browse files

improve some functions in analysis scripts

parent a1b3797c
No related branches found
No related tags found
1 merge request!128add benchmark for pion0
This commit is part of merge request !128. Comments created here will be created in the context of that merge request.
......@@ -27,15 +27,23 @@ def flatten_collection(rdf, collection, cols=None):
cols = [str(c) for c in rdf.GetColumnNames() if str(c).startswith('{}.'.format(collection))]
else:
cols = ['{}.{}'.format(collection, c) for c in cols]
if not cols:
print('cannot find any branch under collection {}'.format(collection))
return pd.DataFrame()
data = rdf.AsNumpy(cols)
# flatten the data, add an event id to identify clusters from different events
evns = []
for i, vec in enumerate(data[cols[0]]):
evns += [i]*vec.size()
for n, vals in data.items():
data[n] = np.asarray([v for vec in vals for v in vec])
# make sure ints are not converted to floats
typename = vals[0].__class__.__name__.lower()
dtype = np.int64 if 'int' in typename or 'long' in typename else np.float64
# type safe creation
data[n] = np.asarray([v for vec in vals for v in vec], dtype=dtype)
# build data frame
dfp = pd.DataFrame(columns=cols, data=np.vstack(list(data.values())).T)
dfp = pd.DataFrame({c: pd.Series(v) for c, v in data.items()})
dfp.loc[:, 'event'] = evns
return dfp
......@@ -152,6 +160,7 @@ if __name__ == '__main__':
# calculate eta
if 'eta' not in df.columns:
df.loc[:, 'eta'] = -np.log(np.tan(df['polar.theta'].values/2.))
# print(df[['eta', 'polar.theta', 'position.x', 'position.y', 'position.z']])
fig, axs = plt.subplots(2, 2, figsize=(12, 8), dpi=160)
ncl = df.groupby('event')['clusterID'].nunique().values
axs[0][0].hist(ncl, weights=np.repeat(1./float(ncl.shape[0]), ncl.shape[0]),
......
......@@ -28,15 +28,23 @@ def flatten_collection(rdf, collection, cols=None):
cols = [str(c) for c in rdf.GetColumnNames() if str(c).startswith('{}.'.format(collection))]
else:
cols = ['{}.{}'.format(collection, c) for c in cols]
if not cols:
print('cannot find any branch under collection {}'.format(collection))
return pd.DataFrame()
data = rdf.AsNumpy(cols)
# flatten the data, add an event id to identify clusters from different events
evns = []
for i, vec in enumerate(data[cols[0]]):
evns += [i]*vec.size()
for n, vals in data.items():
data[n] = np.asarray([v for vec in vals for v in vec])
# make sure ints are not converted to floats
typename = vals[0].__class__.__name__.lower()
dtype = np.int64 if 'int' in typename or 'long' in typename else np.float64
# type safe creation
data[n] = np.asarray([v for vec in vals for v in vec], dtype=dtype)
# build data frame
dfp = pd.DataFrame(columns=cols, data=np.vstack(list(data.values())).T)
dfp = pd.DataFrame({c: pd.Series(v) for c, v in data.items()})
dfp.loc[:, 'event'] = evns
return dfp
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment