Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
R
reconstruction_benchmarks
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
EIC
benchmarks
reconstruction_benchmarks
Commits
1c377c8f
Commit
1c377c8f
authored
3 years ago
by
Chao Peng
Browse files
Options
Downloads
Patches
Plain Diff
add a script to draw edep
parent
193b5399
No related branches found
Branches containing commit
No related tags found
1 merge request
!232
add a script to draw edep
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
benchmarks/imaging_shower_ML/scripts/check_edep_dists.py
+135
-0
135 additions, 0 deletions
benchmarks/imaging_shower_ML/scripts/check_edep_dists.py
with
135 additions
and
0 deletions
benchmarks/imaging_shower_ML/scripts/check_edep_dists.py
0 → 100644
+
135
−
0
View file @
1c377c8f
import
ROOT
import
os
import
gc
import
numpy
as
np
import
pandas
as
pd
import
argparse
import
sys
import
matplotlib.pyplot
as
plt
# read from RDataFrame and flatten a given collection, return pandas dataframe
def
flatten_collection
(
rdf
,
collection
,
cols
=
None
,
event_colname
=
'
event
'
):
if
not
cols
:
cols
=
[
str
(
c
)
for
c
in
rdf
.
GetColumnNames
()
if
str
(
c
).
startswith
(
'
{}.
'
.
format
(
collection
))]
else
:
cols
=
[
'
{}.{}
'
.
format
(
collection
,
c
)
for
c
in
cols
]
if
not
cols
:
print
(
'
cannot find any branch under collection {}
'
.
format
(
collection
))
return
pd
.
DataFrame
()
data
=
rdf
.
AsNumpy
(
cols
)
# flatten the data, add an event id to identify clusters from different events
evns
=
[]
for
i
,
vec
in
enumerate
(
data
[
cols
[
0
]]):
evns
+=
[
i
]
*
vec
.
size
()
for
n
,
vals
in
data
.
items
():
# make sure ints are not converted to floats
typename
=
vals
[
0
].
__class__
.
__name__
.
lower
()
dtype
=
np
.
int64
if
'
int
'
in
typename
or
'
long
'
in
typename
else
np
.
float32
# type safe creation
data
[
n
]
=
np
.
asarray
([
v
for
vec
in
vals
for
v
in
vec
],
dtype
=
dtype
)
# build data frame
dfp
=
pd
.
DataFrame
({
c
:
pd
.
Series
(
v
)
for
c
,
v
in
data
.
items
()})
dfp
.
loc
[:,
event_colname
]
=
evns
return
dfp
# load root macros, input is an argument string
def
load_root_macros
(
arg_macros
):
for
path
in
arg_macros
.
split
(
'
,
'
):
path
=
path
.
strip
()
if
os
.
path
.
exists
(
path
):
ROOT
.
gROOT
.
Macro
(
path
)
else
:
print
(
'
\"
{}
\"
does not exist, skip loading it.
'
.
format
(
path
))
def
cartesian_to_polar
(
x
,
y
,
z
):
r
=
np
.
sqrt
(
x
**
2
+
y
**
2
+
z
**
2
)
rc
=
np
.
sqrt
(
x
**
2
+
y
**
2
)
theta
=
np
.
arccos
(
z
/
r
)
phi
=
np
.
arctan2
(
y
,
x
)
eta
=
-
np
.
log
(
np
.
tan
(
theta
/
2.
))
return
r
,
theta
,
phi
,
rc
,
eta
if
__name__
==
'
__main__
'
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
'
file
'
,
type
=
str
,
help
=
'
path to root file
'
)
parser
.
add_argument
(
'
--sim
'
,
action
=
'
store_true
'
,
help
=
'
flag switch between sim and rec data (default rec)
'
)
parser
.
add_argument
(
'
-o
'
,
type
=
str
,
default
=
'
.
'
,
dest
=
'
outdir
'
,
help
=
'
output directory
'
)
# parser.add_argument('--compact', type=str, default='', dest='compact', help='compact file')
parser
.
add_argument
(
'
-m
'
,
'
--macros
'
,
type
=
str
,
default
=
'
rootlogon.C
'
,
dest
=
'
macros
'
,
help
=
'
root macros to load (accept multiple paths separated by
\"
,
\"
)
'
)
parser
.
add_argument
(
'
--branch
'
,
type
=
str
,
default
=
'
EcalBarrelImagingHitsReco
'
,
help
=
'
name of data branch (eic::CalorimeterHitCollection)
'
)
parser
.
add_argument
(
'
--truth-branch
'
,
type
=
str
,
default
=
'
mcparticles
'
,
help
=
'
name of truth mc branch
'
)
parser
.
add_argument
(
'
--edep-max
'
,
type
=
float
,
default
=
0.
,
help
=
'
maximum edep (GeV) to plot
'
)
parser
.
add_argument
(
'
--edep-nbins
'
,
type
=
int
,
default
=
200
,
help
=
'
number of bins
'
)
parser
.
add_argument
(
'
--name-tag
'
,
type
=
str
,
default
=
'
test
'
,
help
=
'
name tag to save the file
'
)
parser
.
add_argument
(
'
--samp-frac
'
,
type
=
float
,
default
=
1.0
,
help
=
'
sampling fraction
'
)
args
=
parser
.
parse_args
()
os
.
makedirs
(
args
.
outdir
,
exist_ok
=
True
)
load_root_macros
(
args
.
macros
)
# read data and mcparticles
rdf
=
ROOT
.
RDataFrame
(
"
events
"
,
args
.
file
)
mc_branch
=
args
.
truth_branch
dfm
=
flatten_collection
(
rdf
,
mc_branch
,
[
'
genStatus
'
,
'
pdgID
'
,
'
ps.x
'
,
'
ps.y
'
,
'
ps.z
'
,
'
mass
'
])
dfm
.
rename
(
columns
=
{
c
:
c
.
replace
(
mc_branch
+
'
.
'
,
''
)
for
c
in
dfm
.
columns
},
inplace
=
True
)
# selete incident particles
dfm
=
dfm
[
dfm
[
'
genStatus
'
].
isin
([
0
,
1
])]
# NOTE: assumed single particles
dfm
=
dfm
.
groupby
(
'
event
'
).
first
()
# p, theta, phi, pT, eta = cartesian_to_polar(*dfm[['ps.x', 'ps.y', 'ps.z']].values.T)
if
args
.
sim
:
df
=
flatten_collection
(
rdf
,
args
.
branch
,
[
'
energyDeposit
'
])
df
.
rename
(
columns
=
{
c
:
c
.
replace
(
args
.
branch
+
'
.
'
,
''
)
for
c
in
df
.
columns
},
inplace
=
True
)
df
.
rename
(
columns
=
{
'
energyDeposit
'
:
'
energy
'
},
inplace
=
True
)
else
:
df
=
flatten_collection
(
rdf
,
args
.
branch
,
[
'
layer
'
,
'
energy
'
,
'
position.x
'
,
'
position.y
'
,
'
position.z
'
])
df
.
rename
(
columns
=
{
c
:
c
.
replace
(
args
.
branch
+
'
.
'
,
''
)
for
c
in
df
.
columns
},
inplace
=
True
)
dfe
=
df
.
groupby
(
'
event
'
)[
'
energy
'
].
sum
().
reset_index
()
# determine histrogram bins
if
args
.
edep_max
<=
0.
:
args
.
edep_max
=
dfe
[
'
energy
'
].
quantile
(
0.99
)
*
1.2
bins
=
np
.
linspace
(
0.
,
args
.
edep_max
,
args
.
edep_nbins
+
1
)
bincenters
=
(
bins
[
1
:]
+
bins
[:
-
1
])
/
2.
# get particle types
fig
,
ax
=
plt
.
subplots
(
figsize
=
(
16
,
9
),
dpi
=
120
,
gridspec_kw
=
{
'
left
'
:
0.15
,
'
right
'
:
0.95
})
ax
.
set_xlabel
(
'
Energy Deposit / {:.2f} (GeV)
'
.
format
(
args
.
samp_frac
),
fontsize
=
24
)
ax
.
set_ylabel
(
'
Normalized Counts
'
,
fontsize
=
24
)
ax
.
set_yscale
(
'
log
'
)
ax
.
grid
(
linestyle
=
'
:
'
)
ax
.
tick_params
(
labelsize
=
24
)
ax
.
set_axisbelow
(
True
)
hist_vals
,
hist_cols
=
[],
[]
pdgbase
=
ROOT
.
TDatabasePDG
()
for
pdgid
in
dfm
[
'
pdgID
'
].
unique
():
particle
=
pdgbase
.
GetParticle
(
int
(
pdgid
))
if
not
particle
:
print
(
"
Unknown pdgcode {}, they are ignored
"
.
format
(
int
(
pdgid
)))
continue
events_indices
=
dfm
[
dfm
.
loc
[:,
'
pdgID
'
]
==
pdgid
].
index
.
unique
()
print
(
"
{} entries of particle {}
"
.
format
(
len
(
events_indices
),
particle
.
GetName
()))
dfe_part
=
dfe
.
loc
[
dfe
[
'
event
'
].
isin
(
events_indices
)]
edep_vals
,
_
,
_
=
ax
.
hist
(
dfe_part
[
'
energy
'
]
/
args
.
samp_frac
,
weights
=
[
1.
/
dfe_part
.
shape
[
0
]]
*
dfe_part
.
shape
[
0
],
histtype
=
'
step
'
,
bins
=
bins
,
label
=
particle
.
GetName
())
hist_vals
.
append
(
edep_vals
)
hist_cols
.
append
(
particle
.
GetName
())
pd
.
DataFrame
(
index
=
bincenters
,
data
=
np
.
vstack
(
hist_vals
).
T
,
columns
=
hist_cols
)
\
.
to_csv
(
'
{}.csv
'
.
format
(
args
.
name_tag
))
ax
.
legend
(
fontsize
=
24
)
fig
.
savefig
(
'
{}.png
'
.
format
(
args
.
name_tag
))
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment