Skip to content
Snippets Groups Projects
Commit bdedaaca authored by Simon Gardner's avatar Simon Gardner
Browse files

Merge remote-tracking branch 'origin/master' into add_lowq2_benchmarks

parents 07f9c09e 9a60f27d
No related tags found
1 merge request!151Add lowq2 benchmarks
Pipeline #118726 skipped
Showing
with 1456 additions and 242 deletions
name: Mirror and Trigger EICweb
on:
delete:
push:
workflow_dispatch:
concurrency:
group: mirror
group: mirror-${{ github.event_name }}
cancel-in-progress: false
jobs:
......@@ -15,6 +16,7 @@ jobs:
permissions:
actions: write
contents: read
statuses: write
steps:
- name: Checkout
uses: actions/checkout@v4
......@@ -28,7 +30,9 @@ jobs:
username: ${{ secrets.GITLAB_USERNAME }}
ciskip: true
- name: Trigger EICweb
id: trigger_eicweb
uses: eic/trigger-gitlab-ci@v3
if: ${{ github.event_name != 'delete' }}
with:
url: https://eicweb.phy.anl.gov
project_id: 399
......@@ -39,3 +43,17 @@ jobs:
GITHUB_SHA=${{ github.event.pull_request.head.sha || github.sha }}
GITHUB_PR=${{ github.event.pull_request.number }}
PIPELINE_NAME=${{ github.repository }}: ${{ github.event.pull_request.title || github.ref_name }}
- name: Set pending EICweb status
if: ${{ github.event_name != 'delete' }}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
DETECTOR_CONFIG: epic_craterlake
run: |
curl \
--fail-with-body \
-X POST \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer $GITHUB_TOKEN" \
-H "X-GitHub-Api-Version: 2022-11-28" \
-d '{"context": "eicweb/detector_benchmarks (nightly, '"$DETECTOR_CONFIG"')", "state": "pending", "description": "Waiting for response from the EICweb", "target_url": "${{ fromJson(steps.trigger_eicweb.outputs.json).web_url }}"}' \
"https://api.github.com/repos/${{ github.repository }}/statuses/${{ github.event.pull_request.head.sha || github.sha }}"
BENCHMARKS_REGISTRY: docker.io/eicweb
BENCHMARKS_CONTAINER: jug_xl
BENCHMARKS_CONTAINER: eic_xl
BENCHMARKS_TAG: nightly
......@@ -3,9 +3,9 @@ image: ${BENCHMARKS_REGISTRY}/${BENCHMARKS_CONTAINER}:${BENCHMARKS_TAG}
variables:
DETECTOR: epic
DETECTOR_CONFIG: epic_craterlake
DETECTOR_REPOSITORYURL: 'https://github.com/eic/epic.git'
GITHUB_SHA: ''
GITHUB_REPOSITORY: ''
SNAKEMAKE_FLAGS: '--cache'
workflow:
name: '$PIPELINE_NAME'
......@@ -28,12 +28,14 @@ default:
- .local/bin
- .local/include
- .local/share
- .snakemake/log
- results
- config
- .env
- summary.txt
reports:
dotenv: .env
when: always
stages:
- status-pending
......@@ -45,7 +47,6 @@ stages:
- benchmarks
- collect
- deploy
- trigger
- status-report
.status:
......@@ -60,8 +61,8 @@ stages:
"https://api.github.com/repos/${GITHUB_REPOSITORY}/statuses/${GITHUB_SHA}" \
-d '{"state":"'"${STATE}"'",
"target_url":"'"${CI_PIPELINE_URL}"'",
"description":"'"${DESCRIPTION} $(TZ=America/New_York date)"'",
"context":"eicweb/detector_benchmarks ('"$DETECTOR_CONFIG"')"
"description":"'"$(TZ=America/New_York date)"'",
"context":"eicweb/detector_benchmarks ('"${BENCHMARKS_TAG}"', '"$DETECTOR_CONFIG"')"
}' ;
fi
......@@ -71,7 +72,6 @@ benchmarks:detector:pending:
extends: .status
variables:
STATE: "pending"
DESCRIPTION: "Started..."
when: always
common:setup:
......@@ -89,13 +89,6 @@ common:setup:
echo "BENCHMARKS_CONTAINER: ${BENCHMARKS_CONTAINER}"
echo "BENCHMARKS_REGISTRY: ${BENCHMARKS_REGISTRY}"
- source setup/bin/env.sh && ./setup/bin/install_common.sh
common:detector:
stage: initialize
needs: ["common:setup"]
script:
- source .local/bin/env.sh && build_detector.sh
- mkdir_local_data_link sim_output
- mkdir -p results
- mkdir -p config
......@@ -103,7 +96,7 @@ common:detector:
get_data:
stage: data_init
needs: ["common:detector"]
needs: ["common:setup"]
script:
- source .local/bin/env.sh
- ln -s "${LOCAL_DATA_PATH}/sim_output" sim_output
......@@ -112,9 +105,10 @@ get_data:
.det_benchmark:
needs:
- ["get_data","common:detector"]
- ["get_data","common:setup"]
before_script:
- source .local/bin/env.sh
- source /opt/detector/epic-main/setup.sh
- ls -lrtha
- ln -s "${LOCAL_DATA_PATH}/sim_output" sim_output
- ln -s "${LOCAL_DATA_PATH}/datasets/data" data
......@@ -122,6 +116,7 @@ get_data:
- mkdir "${DETECTOR_CONFIG}"
- ln -s "${LOCAL_DATA_PATH}/sim_output" "${DETECTOR_CONFIG}/sim_output"
- ln -s "../results" "${DETECTOR_CONFIG}/results"
- mkdir -p "$SNAKEMAKE_OUTPUT_CACHE"
- ls -lrtha
retry:
max: 2
......@@ -129,26 +124,71 @@ get_data:
- runner_system_failure
include:
# - local: 'benchmarks/backgrounds/config.yml'
# - local: 'benchmarks/ecal_gaps/config.yml'
# - local: 'benchmarks/tracking_detectors/config.yml'
# - local: 'benchmarks/barrel_ecal/config.yml'
# - local: 'benchmarks/barrel_hcal/config.yml'
# - local: 'benchmarks/zdc/config.yml'
# - local: 'benchmarks/material_maps/config.yml'
# - local: 'benchmarks/material_scan/config.yml'
# - local: 'benchmarks/pid/config.yml'
# - local: 'benchmarks/timing/config.yml'
# - local: 'benchmarks/b0_tracker/config.yml'
- local: 'benchmarks/backgrounds/config.yml'
- local: 'benchmarks/backwards_ecal/config.yml'
- local: 'benchmarks/calo_pid/config.yml'
- local: 'benchmarks/ecal_gaps/config.yml'
- local: 'benchmarks/tracking_detectors/config.yml'
- local: 'benchmarks/tracking_performances/config.yml'
- local: 'benchmarks/tracking_performances_dis/config.yml'
- local: 'benchmarks/barrel_ecal/config.yml'
- local: 'benchmarks/barrel_hcal/config.yml'
- local: 'benchmarks/lfhcal/config.yml'
- local: 'benchmarks/zdc/config.yml'
- local: 'benchmarks/zdc_lyso/config.yml'
- local: 'benchmarks/zdc_neutron/config.yml'
- local: 'benchmarks/zdc_photon/config.yml'
- local: 'benchmarks/zdc_pi0/config.yml'
- local: 'benchmarks/material_scan/config.yml'
- local: 'benchmarks/pid/config.yml'
- local: 'benchmarks/timing/config.yml'
- local: 'benchmarks/b0_tracker/config.yml'
- local: 'benchmarks/insert_muon/config.yml'
- local: 'benchmarks/insert_tau/config.yml'
- local: 'benchmarks/zdc_sigma/config.yml'
- local: 'benchmarks/zdc_lambda/config.yml'
- local: 'benchmarks/insert_neutron/config.yml'
- local: 'benchmarks/femc_electron/config.yml'
- local: 'benchmarks/femc_photon/config.yml'
- local: 'benchmarks/femc_pi0/config.yml'
- local: 'benchmarks/LOWQ2/analysis/config.yml'
deploy_results:
allow_failure: true
stage: deploy
needs:
# - ["collect_results:zdc","collect_results:barrel_ecal","collect_results:barrel_hcal","collect_results:material_scan"]
- "collect_results:backgrounds"
- "collect_results:backwards_ecal"
- "collect_results:barrel_ecal"
- "collect_results:barrel_hcal"
- "collect_results:calo_pid"
- "collect_results:ecal_gaps"
- "collect_results:lfhcal"
- "collect_results:material_scan"
- "collect_results:pid"
- "collect_results:tracking_performance"
- "collect_results:tracking_performance_campaigns"
- "collect_results:zdc_sigma"
- "collect_results:zdc_lambda"
- "collect_results:insert_neutron"
- "collect_results:tracking_performances_dis"
- "collect_results:zdc"
- "collect_results:zdc_lyso"
- "collect_results:zdc_neutron"
- "collect_results:insert_muon"
- "collect_results:insert_tau"
- "collect_results:zdc_photon"
- "collect_results:zdc_pi0"
- "collect_results:femc_electron"
- "collect_results:femc_photon"
- "collect_results:femc_pi0"
script:
- echo "deploy results!"
- snakemake $SNAKEMAKE_FLAGS --cores 1 results/metadata.json
- find results -print | sort | tee summary.txt
- wget https://dl.pelicanplatform.org/7.13.0/pelican_Linux_x86_64.tar.gz
- sha256sum -c <(echo '38ac8548c67302299e50a1b81c159ed418e90d84a6606ddd377fd2c8b164d114 pelican_Linux_x86_64.tar.gz')
- tar zxf pelican_Linux_x86_64.tar.gz
- mv results pipeline-$CI_PIPELINE_ID; tar cf pipeline-$CI_PIPELINE_ID.tar pipeline-$CI_PIPELINE_ID/; mv pipeline-$CI_PIPELINE_ID results
- ./pelican-*/pelican object copy pipeline-$CI_PIPELINE_ID.tar $OSDF_ENDPOINT$OSDF_OUTPUT_PREFIX/pipeline-$CI_PIPELINE_ID.tar
benchmarks:detector:success:
stage: status-report
......@@ -156,7 +196,9 @@ benchmarks:detector:success:
extends: .status
variables:
STATE: "success"
DESCRIPTION: "Succeeded!"
after_script:
# Cleanup scratch space
- rm -rfv $LOCAL_DATA_PATH
when: on_success
benchmarks:detector:failure:
......@@ -165,51 +207,8 @@ benchmarks:detector:failure:
extends: .status
variables:
STATE: "failure"
DESCRIPTION: "Failed!"
when: on_failure
benchmarks:reconstruction:
stage: trigger
variables:
GITHUB_SHA: "${GITHUB_SHA}"
GITHUB_REPOSITORY: "${GITHUB_REPOSITORY}"
DETECTOR: "$DETECTOR"
DETECTOR_CONFIG: "$DETECTOR_CONFIG"
DETECTOR_VERSION: "$DETECTOR_VERSION"
DETECTOR_REPOSITORYURL: "$DETECTOR_REPOSITORYURL"
DETECTOR_DEPLOY_TOKEN_USERNAME: "${DETECTOR_DEPLOY_TOKEN_USERNAME}"
DETECTOR_DEPLOY_TOKEN_PASSWORD: "${DETECTOR_DEPLOY_TOKEN_PASSWORD}"
COMMON_BENCH_VERSION: "$COMMON_BENCH_VERSION"
BENCHMARKS_TAG: "${BENCHMARKS_TAG}"
BENCHMARKS_CONTAINER: "${BENCHMARKS_CONTAINER}"
BENCHMARKS_REGISTRY: "${BENCHMARKS_REGISTRY}"
PIPELINE_NAME: "$PIPELINE_NAME"
trigger:
project: EIC/benchmarks/reconstruction_benchmarks
strategy: depend
needs: ["deploy_results"]
benchmarks:physics:
stage: trigger
variables:
GITHUB_SHA: "${GITHUB_SHA}"
GITHUB_REPOSITORY: "${GITHUB_REPOSITORY}"
DETECTOR: "$DETECTOR"
DETECTOR_CONFIG: "$DETECTOR_CONFIG"
DETECTOR_VERSION: "$DETECTOR_VERSION"
DETECTOR_REPOSITORYURL: "$DETECTOR_REPOSITORYURL"
DETECTOR_DEPLOY_TOKEN_USERNAME: "${DETECTOR_DEPLOY_TOKEN_USERNAME}"
DETECTOR_DEPLOY_TOKEN_PASSWORD: "${DETECTOR_DEPLOY_TOKEN_PASSWORD}"
COMMON_BENCH_VERSION: "$COMMON_BENCH_VERSION"
BENCHMARKS_TAG: "${BENCHMARKS_TAG}"
BENCHMARKS_CONTAINER: "${BENCHMARKS_CONTAINER}"
BENCHMARKS_REGISTRY: "${BENCHMARKS_REGISTRY}"
PIPELINE_NAME: "$PIPELINE_NAME"
trigger:
project: EIC/benchmarks/physics_benchmarks
strategy: depend
needs: ["deploy_results"]
pages:
stage: deploy
rules:
......
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
hooks:
- id: check-added-large-files
args: ['--maxkb=128']
- id: check-yaml
EIC Detector Benchmarks
=======================
ePIC Detector Benchmarks
========================
[![pipeline status](https://eicweb.phy.anl.gov/EIC/benchmarks/detector_benchmarks/badges/master/pipeline.svg)](https://eicweb.phy.anl.gov/EIC/benchmarks/detector_benchmarks/-/commits/master)
## Overview
Detector benchmarks are meant to test for regressions in individual detector subsystems.
The analysis is meant to avoid a reconstruction step.
So this precludes using [juggler](https://eicweb.phy.anl.gov/EIC/juggler) for processing the events.
Detector benchmarks are meant to provide a maintained set of performance plots for individual detector subsystems.
## Documentation
See [common_bench](https://eicweb.phy.anl.gov/EIC/benchmarks/common_bench/).
- See [tutorial](https://eic.github.io/tutorial-developing-benchmarks/)
- See [common_bench](https://eicweb.phy.anl.gov/EIC/benchmarks/common_bench/).
## Adding new benchmarks
To get an idea of what to do look at an existing benchmark in the
[`benchmarks` directory](https://eicweb.phy.anl.gov/EIC/benchmarks/detector_benchmarks/-/tree/master/benchmarks).
## Running Locally
### Local development example
Here we setup to use our local build of the `juggler` library.
Note juggler is not needed for `detector_benchmarks` because it is not used but this is the same setup for
`reconstruction_benchmarks` and `physics_benchmarks`.
First set some environment variables.
```
export DETECTOR=epic # epic is the default
```
```
git clone https://eicweb.phy.anl.gov/EIC/benchmarks/detector_benchmarks.git && cd detector_benchmarks
git clone https://eicweb.phy.anl.gov/EIC/benchmarks/common_bench.git setup
source setup/bin/env.sh && ./setup/bin/install_common.sh
source .local/bin/env.sh && build_detector.sh
mkdir_local_data_link sim_output
mkdir -p results
mkdir -p config
```
[`benchmarks` directory](https://github.com/eic/detector_benchmarks/tree/master/benchmarks).
Currently a good reference for Snakemake instrumentation is available in the `tracking\_performances` benchmark.
It relies on single particle simulations that can be either produced on eicweb or downloaded from official campagins.
### File organization
For a minimal benchmark you'll need to add
`benchmarks/<benchmark_name_here>/config.yml` and
`benchmarks/<benchmark_name_here>/Snakemake`, plus the analysis script/macro.
The `Snakefile` has to be included in the root `./Snakefile` of the repository.
That common entry point is needed to ensure that common simulation samples can
be defined to be re-used by several benchmarks at a time.
The `config.yml` will require an include from the `./.gitlab-ci.yml`.
### Pass/Fail tests
- Create a script that returns exit status 0 for success.
- Any non-zero value will be considered failure.
- Script
configfile: "config.yaml"
if config["remote"] == "S3":
from snakemake.remote.S3 import RemoteProvider as S3RemoteProvider
provider = S3RemoteProvider(
endpoint_url="https://eics3.sdcc.bnl.gov:9000",
access_key_id=os.environ["S3_ACCESS_KEY"],
secret_access_key=os.environ["S3_SECRET_KEY"],
)
remote_path = lambda path: f"eictest/{path}"
elif config["remote"] == "XRootD":
from snakemake.remote.XRootD import RemoteProvider as XRootDRemoteProvider
provider = XRootDRemoteProvider(
stay_on_remote=False,
)
remote_path = lambda path: f"root://dtn-eic.jlab.org//work/eic2/{path}"
else:
raise ValueError(f"Unexpected config[\"remote\"] = {config['remote']}")
configfile: "snakemake.yml"
import functools
import os
from snakemake.logging import logger
@functools.cache
def get_spack_package_hash(package_name):
import json
try:
ver_info = json.loads(subprocess.check_output(["spack", "find", "--json", package_name]))
return ver_info[0]["package_hash"]
except FileNotFoundError as e:
logger.warning("Spack is not installed")
return ""
except subprocess.CalledProcessError as e:
print(e)
return ""
@functools.cache
def find_epic_libraries():
import ctypes.util
# if library is not found (not avaliable) we return an empty list to let DAG still evaluate
libs = []
lib = ctypes.util.find_library("epic")
if lib is not None:
libs.append(os.environ["DETECTOR_PATH"] + "/../../lib/" + lib)
return libs
include: "benchmarks/backgrounds/Snakefile"
include: "benchmarks/backwards_ecal/Snakefile"
include: "benchmarks/barrel_ecal/Snakefile"
include: "benchmarks/calo_pid/Snakefile"
include: "benchmarks/ecal_gaps/Snakefile"
include: "benchmarks/material_scan/Snakefile"
include: "benchmarks/tracking_performances/Snakefile"
include: "benchmarks/tracking_performances_dis/Snakefile"
include: "benchmarks/lfhcal/Snakefile"
include: "benchmarks/zdc_lyso/Snakefile"
include: "benchmarks/zdc_neutron/Snakefile"
include: "benchmarks/insert_muon/Snakefile"
include: "benchmarks/zdc_lambda/Snakefile"
include: "benchmarks/zdc_photon/Snakefile"
include: "benchmarks/zdc_pi0/Snakefile"
include: "benchmarks/zdc_sigma/Snakefile"
include: "benchmarks/insert_neutron/Snakefile"
include: "benchmarks/insert_tau/Snakefile"
include: "benchmarks/femc_electron/Snakefile"
include: "benchmarks/femc_photon/Snakefile"
include: "benchmarks/femc_pi0/Snakefile"
use_s3 = config["remote_provider"].lower() == "s3"
use_xrootd = config["remote_provider"].lower() == "xrootd"
def get_remote_path(path):
if use_s3:
return f"s3https://eics3.sdcc.bnl.gov:9000/eictest/{path}"
elif use_xrootd:
return f"root://dtn-eic.jlab.org//volatile/eic/{path}"
else:
raise runtime_exception('Unexpected value for config["remote_provider"]: {config["remote_provider"]}')
rule fetch_epic:
output:
filepath="EPIC/{PATH}"
params:
# wildcards are not included in hash for caching, we need to add them as params
PATH=lambda wildcards: wildcards.PATH
cache: True
retries: 3
shell: """
xrdcp --debug 2 root://dtn-eic.jlab.org//volatile/eic/{output.filepath} {output.filepath}
""" if use_xrootd else """
mc cp S3/eictest/{output.filepath} {output.filepath}
""" if use_s3 else f"""
echo 'Unexpected value for config["remote_provider"]: {config["remote_provider"]}'
exit 1
"""
rule warmup_run:
......@@ -28,7 +88,8 @@ rule warmup_run:
"warmup/{DETECTOR_CONFIG}.edm4hep.root",
message: "Ensuring that calibrations/fieldmaps are available for {wildcards.DETECTOR_CONFIG}"
shell: """
ddsim \
set -m # monitor mode to prevent lingering processes
exec ddsim \
--runType batch \
--numberOfEvents 1 \
--compactFile "$DETECTOR_PATH/{wildcards.DETECTOR_CONFIG}.xml" \
......@@ -57,3 +118,26 @@ rule org2py:
"""
awk -f {input.converter} {input.notebook} > {output}
"""
rule metadata:
output:
"results/metadata.json"
shell:
"""
cat > {output} <<EOF
{{
"CI_COMMIT_REF_NAME": "${{CI_COMMIT_REF_NAME:-}}",
"CI_COMMIT_SHA": "${{CI_COMMIT_SHA:-}}",
"CI_PIPELINE_ID": "${{CI_PIPELINE_ID:-}}",
"CI_PIPELINE_SOURCE": "${{CI_PIPELINE_SOURCE:-}}",
"CI_PROJECT_ID": "${{CI_PROJECT_ID:-}}",
"GITHUB_REPOSITORY": "${{GITHUB_REPOSITORY:-}}",
"GITHUB_SHA": "${{GITHUB_SHA:-}}",
"GITHUB_PR": "${{GITHUB_PR:-}}",
"PIPELINE_NAME": "${{PIPELINE_NAME:-}}"
}}
EOF
# validate JSON
jq '.' {output}
"""
......@@ -2,56 +2,27 @@ import os
import shutil
rule backgrounds_get_beam_gas_electron:
input:
provider.remote(remote_path("EPIC/EVGEN/BACKGROUNDS/BEAMGAS/electron/beam_gas_ep_10GeV_foam_emin10keV_10Mevt_vtx.hepmc")),
output:
"input/backgrounds/beam_gas_electron.hepmc",
run:
shutil.move(input[0], output[0])
rule backgrounds_get_beam_gas_proton:
input:
provider.remote(remote_path("EPIC/EVGEN/BACKGROUNDS/BEAMGAS/proton/ProtonBeamGasEvents/100GeV/100GeV_1.hepmc")),
output:
"input/backgrounds/beam_gas_proton.hepmc",
run:
shutil.move(input[0], output[0])
rule backgrounds_get_DIS:
input:
provider.remote(remote_path("EPIC/EVGEN/DIS/NC/{BEAM}/minQ2={MINQ2}/pythia8NCDIS_{BEAM}_minQ2={MINQ2}_{SUFFIX}.hepmc")),
wildcard_constraints:
BEAM="\d+x\d+",
MINQ2="\d+",
output:
"input/backgrounds/pythia8NCDIS_{BEAM}_minQ2={MINQ2}_{SUFFIX}.hepmc",
run:
shutil.move(input[0], output[0])
rule backgrounds_sim:
input:
hepmc="input/backgrounds/{NAME}.hepmc",
warmup="warmup/{DETECTOR_CONFIG}.edm4hep.root",
output:
"sim_output/{DETECTOR_CONFIG}/backgrounds/{NAME}.edm4hep.root",
"sim_output/{DETECTOR_CONFIG}/backgrounds/{PATH}.edm4hep.root",
log:
"sim_output/{DETECTOR_CONFIG}/backgrounds/{NAME}.edm4hep.root.log",
"sim_output/{DETECTOR_CONFIG}/backgrounds/{PATH}.edm4hep.root.log",
params:
N_EVENTS=100
N_EVENTS=100,
hepmc=lambda wildcards: get_remote_path(f"{wildcards.PATH}.hepmc3.tree.root"),
shell:
"""
ddsim \
set -m # monitor mode to prevent lingering processes
exec ddsim \
--runType batch \
--part.minimalKineticEnergy 100*GeV \
--filter.tracker edep0 \
-v WARNING \
--numberOfEvents {params.N_EVENTS} \
--compactFile $DETECTOR_PATH/{wildcards.DETECTOR_CONFIG}.xml \
--inputFiles {input.hepmc} \
--inputFiles {params.hepmc} \
--outputFile {output}
"""
......@@ -62,30 +33,37 @@ rule backgrounds_ecal_backwards:
input:
matplotlibrc=".matplotlibrc",
script="benchmarks/backgrounds/ecal_backwards.py",
electron_beam_gas_gen="input/backgrounds/beam_gas_electron.hepmc",
electron_beam_gas_sim="sim_output/" + DETECTOR_CONFIG + "/backgrounds/beam_gas_electron.edm4hep.root",
physics_signal_sim="sim_output/" + DETECTOR_CONFIG + "/backgrounds/pythia8NCDIS_10x100_minQ2=1_beamEffects_xAngle=-0.025_hiDiv_1.edm4hep.root",
proton_beam_gas_gen="input/backgrounds/beam_gas_proton.hepmc",
proton_beam_gas_sim="sim_output/" + DETECTOR_CONFIG + "/backgrounds/beam_gas_proton.edm4hep.root",
electron_beam_gas_sim="sim_output/" + DETECTOR_CONFIG + "/backgrounds/EPIC/EVGEN/BACKGROUNDS/BEAMGAS/electron/GETaLM1.0.0-1.0/10GeV/GETaLM1.0.0-1.0_ElectronBeamGas_10GeV_foam_emin10keV_run001.edm4hep.root",
physics_signal_sim="sim_output/" + DETECTOR_CONFIG + "/backgrounds/EPIC/EVGEN/DIS/NC/10x100/minQ2=1/pythia8NCDIS_10x100_minQ2=1_beamEffects_xAngle=-0.025_hiDiv_1.edm4hep.root",
proton_beam_gas_sim="sim_output/" + DETECTOR_CONFIG + "/backgrounds/EPIC/EVGEN/BACKGROUNDS/BEAMGAS/proton/pythia8.306-1.0/100GeV/pythia8.306-1.0_ProtonBeamGas_100GeV_run001.edm4hep.root",
output:
directory("results/backgrounds/backwards_ecal")
log:
scheduler=".logs/results/backgrounds/backwards_ecal/scheduler.log",
worker=".logs/results/backgrounds/backwards_ecal/worker.log",
threads: workflow.cores
shell:
"""
set -m # monitor mode to prevent lingering processes
cleanup() {{
echo Cleaning up
kill $WORKER_PID $SCHEDULER_PID
}}
trap cleanup EXIT
PORT=$RANDOM
dask scheduler --port $PORT &
dask scheduler --port $PORT 2>{log.scheduler} &
export DASK_SCHEDULER=localhost:$PORT
SCHEDULER_PID=$!
dask worker tcp://$DASK_SCHEDULER --nworkers {threads} --nthreads 1 &
dask worker tcp://$DASK_SCHEDULER --nworkers {threads} --nthreads 1 2>{log.worker} &
WORKER_PID=$!
env \
MATPLOTLIBRC={input.matplotlibrc} \
ELECTRON_BEAM_GAS_GEN=$(realpath {input.electron_beam_gas_gen}) \
ELECTRON_BEAM_GAS_GEN=root://dtn-eic.jlab.org//volatile/eic/EPIC/EVGEN/BACKGROUNDS/BEAMGAS/electron/GETaLM1.0.0-1.0/10GeV/GETaLM1.0.0-1.0_ElectronBeamGas_10GeV_foam_emin10keV_run001.hepmc3.tree.root \
ELECTRON_BEAM_GAS_SIM=$(realpath {input.electron_beam_gas_sim}) \
PHYSICS_PROCESS_SIM=$(realpath {input.physics_signal_sim}) \
PROTON_BEAM_GAS_GEN=$(realpath {input.proton_beam_gas_gen}) \
PROTON_BEAM_GAS_GEN=root://dtn-eic.jlab.org//volatile/eic/EPIC/EVGEN/BACKGROUNDS/BEAMGAS/proton/pythia8.306-1.0/100GeV/pythia8.306-1.0_ProtonBeamGas_100GeV_run001.hepmc3.tree.root \
PROTON_BEAM_GAS_SIM=$(realpath {input.proton_beam_gas_sim}) \
OUTPUT_DIR={output} \
python {input.script}
kill $WORKER_PID $SCHEDULER_PID
"""
......@@ -4,7 +4,11 @@ sim:backgrounds:
script:
- mkdir -p $LOCAL_DATA_PATH/input
- ln -s $LOCAL_DATA_PATH/input input
- snakemake --cores 2 sim_output/$DETECTOR_CONFIG/beam_gas_{electron,proton}.edm4hep.root sim_output/$DETECTOR_CONFIG/pythia8NCDIS_10x100_minQ2=1_beamEffects_xAngle=-0.025_hiDiv_vtxfix_1.edm4hep.root
- |
snakemake $SNAKEMAKE_FLAGS --cores 2 \
sim_output/$DETECTOR_CONFIG/backgrounds/EPIC/EVGEN/BACKGROUNDS/BEAMGAS/electron/GETaLM1.0.0-1.0/10GeV/GETaLM1.0.0-1.0_ElectronBeamGas_10GeV_foam_emin10keV_run001.edm4hep.root \
sim_output/$DETECTOR_CONFIG/backgrounds/EPIC/EVGEN/DIS/NC/10x100/minQ2=1/pythia8NCDIS_10x100_minQ2=1_beamEffects_xAngle=-0.025_hiDiv_1.edm4hep.root \
sim_output/$DETECTOR_CONFIG/backgrounds/EPIC/EVGEN/BACKGROUNDS/BEAMGAS/proton/pythia8.306-1.0/100GeV/pythia8.306-1.0_ProtonBeamGas_100GeV_run001.edm4hep.root
bench:backgrounds_emcal_backwards:
extends: .det_benchmark
......@@ -15,7 +19,7 @@ bench:backgrounds_emcal_backwards:
- ln -s $LOCAL_DATA_PATH/input input
- export PYTHONUSERBASE=$LOCAL_DATA_PATH/deps
- pip install -r benchmarks/backgrounds/requirements.txt
- snakemake --cores 8 backgrounds_ecal_backwards
- snakemake $SNAKEMAKE_FLAGS --cores 8 backgrounds_ecal_backwards
collect_results:backgrounds:
extends: .det_benchmark
......@@ -24,3 +28,6 @@ collect_results:backgrounds:
- "bench:backgrounds_emcal_backwards"
script:
- ls -lrht
- mv results{,_save}/ # move results directory out of the way to preserve it
- snakemake $SNAKEMAKE_FLAGS --cores 1 --delete-all-output backgrounds_ecal_backwards
- mv results{_save,}/
......@@ -15,7 +15,7 @@ import dask_awkward as dak
import dask_histogram as dh
import numpy as np
import uproot
import pyhepmc
from pyHepMC3 import HepMC3
#+end_src
#+begin_src jupyter-python :results silent
......@@ -42,6 +42,8 @@ def setup_presentation_style():
'legend.loc': 'upper right',
'xtick.labelsize': 8,
'ytick.labelsize': 8,
'xaxis.labellocation': 'right',
'yaxis.labellocation': 'top',
'pgf.rcfonts': False,
})
......@@ -66,21 +68,21 @@ output_dir.mkdir(parents=True, exist_ok=True)
#+begin_src jupyter-python :results silent
builder = ak.ArrayBuilder()
n = 0
with pyhepmc.open(PROTON_BEAM_GAS_GEN) as f:
it = iter(f)
for event in f:
f = HepMC3.ReaderPlugin(PROTON_BEAM_GAS_GEN, "libHepMC3rootIO.so", "newReaderRootTreefile")
event = HepMC3.GenEvent()
while f.read_event(event):
builder.begin_list()
for vertex in event.vertices:
assert event.length_unit.name == "MM"
assert event.length_unit().name == "MM"
for vertex in event.vertices():
with builder.record("Vector4D"):
builder.field("x")
builder.real(vertex.position.x)
builder.real(vertex.position().x())
builder.field("y")
builder.real(vertex.position.y)
builder.real(vertex.position().y())
builder.field("z")
builder.real(vertex.position.z)
builder.real(vertex.position().z())
builder.field("t")
builder.real(vertex.position.t)
builder.real(vertex.position().t())
builder.end_list()
n += 1
if n > 10000: break
......@@ -89,21 +91,21 @@ vertices_proton_beam_gas = builder.snapshot()
builder = ak.ArrayBuilder()
n = 0
with pyhepmc.open(ELECTRON_BEAM_GAS_GEN) as f:
it = iter(f)
for event in f:
f = HepMC3.ReaderPlugin(ELECTRON_BEAM_GAS_GEN, "libHepMC3rootIO.so", "newReaderRootTreefile")
event = HepMC3.GenEvent()
while f.read_event(event):
builder.begin_list()
for vertex in event.vertices:
assert event.length_unit.name == "MM"
with builder.record("Vector3D"):
assert event.length_unit().name == "MM"
for vertex in event.vertices():
with builder.record("Vector4D"):
builder.field("x")
builder.real(vertex.position.x)
builder.real(vertex.position().x())
builder.field("y")
builder.real(vertex.position.y)
builder.real(vertex.position().y())
builder.field("z")
builder.real(vertex.position.z)
builder.real(vertex.position().z())
builder.field("t")
builder.real(vertex.position.t)
builder.real(vertex.position().t())
builder.end_list()
n += 1
if n > 10000: break
......@@ -153,6 +155,7 @@ plt.xlabel("vertex[0].t, mm")
plt.legend()
plt.savefig(output_dir / "vertex_time_distribution.png", bbox_inches="tight")
plt.show()
plt.clf()
for label, ds in datasets.items():
if "vertices" not in ds: continue
......@@ -164,6 +167,7 @@ plt.xlabel("vertex[0].z, mm")
plt.legend()
plt.savefig(output_dir / "vertex_z_distribution.png", bbox_inches="tight")
plt.show()
plt.clf()
for label, ds in datasets.items():
if "vertices" not in ds: continue
......@@ -178,6 +182,7 @@ plt.ylabel("vertex[0].x, mm")
plt.legend()
plt.savefig(output_dir / "vertex_xz_distribution.png", bbox_inches="tight")
plt.show()
plt.clf()
for ix, (label, ds) in enumerate(datasets.items()):
if "vertices" not in ds: continue
......@@ -192,6 +197,7 @@ for ix, (label, ds) in enumerate(datasets.items()):
plt.title(label)
plt.savefig(output_dir / f"vertex_yz_distribution_{ix}.png", bbox_inches="tight")
plt.show()
plt.clf()
#+end_src
** Simulation results
......@@ -210,7 +216,8 @@ for collection_name in ["EcalEndcapNHits", "EcalEndcapPHits"]:
for ix, vals in enumerate([xs, ys]):
centers = np.unique(vals)
diffs = centers[1:] - centers[:-1]
bin_widths[ix] = np.min(diffs[diffs > 0]) if np.sum(diffs > 0) > 0 else 1.
EPSILON = 1e-5
bin_widths[ix] = np.min(diffs[diffs > EPSILON]) if np.sum(diffs > EPSILON) > 0 else 1.
print(f"bin_widths[{ix}]", bin_widths[ix])
bins = {
......@@ -227,10 +234,11 @@ for collection_name in ["EcalEndcapNHits", "EcalEndcapPHits"]:
)
plt.colorbar().set_label("energy, GeV", loc="top")
plt.title(f"{label}, event_id={event_id}\n{collection_name}")
plt.xlabel("hit x, mm", loc="right")
plt.ylabel("hit y, mm", loc="top")
plt.xlabel("hit x, mm")
plt.ylabel("hit y, mm")
plt.savefig(output_dir / f"{collection_name}_event_display_{dataset_ix}.png", bbox_inches="tight")
plt.show()
plt.clf()
#+end_src
** Discovering number of cells
......@@ -280,11 +288,12 @@ for collection_name in ["EcalEndcapNHits", "EcalEndcapPHits"]:
plt.title(f"{collection_name}")
plt.legend()
plt.xlabel("r, mm", loc="right")
plt.xlabel("r, mm")
dr = (r_axis.edges[1] - r_axis.edges[0])
plt.ylabel(f"Number of towers per {dr} mm slice in $r$", loc="top")
plt.ylabel(f"Number of towers per {dr} mm slice in $r$")
plt.savefig(output_dir / f"{collection_name}_num_towers.png", bbox_inches="tight")
plt.show()
plt.clf()
#+end_src
** Plotting the rates
......@@ -319,11 +328,12 @@ for collection_name in ["EcalEndcapNHits", "EcalEndcapPHits"]:
plt.title(f"for $E_{{dep.}} >$ {edep_min * 1000} MeV\n{collection_name}")
plt.legend()
plt.xlabel("r, mm", loc="right")
plt.ylabel("rate per tower, Hz", loc="top")
plt.xlabel("r, mm")
plt.ylabel("rate per tower, Hz")
plt.yscale("log")
plt.savefig(output_dir / f"{collection_name}_hit_rate_vs_r_edep_min_{edep_min:.3f}.png", bbox_inches="tight")
plt.show()
plt.clf()
#+end_src
#+begin_src jupyter-python
......@@ -349,11 +359,12 @@ for collection_name in ["EcalEndcapNHits", "EcalEndcapPHits"]:
plt.title(rf"for events with $E_{{\mathrm{{dep. tot.}}}}$ $>$ {totedep_min} GeV" + f"\n{collection_name}")
plt.legend()
plt.xlabel("$z$ of the first interaction vertex, mm", loc="right")
plt.ylabel("rate, Hz", loc="top")
plt.xlabel("$z$ of the first interaction vertex, mm")
plt.ylabel("rate, Hz")
plt.yscale("log")
plt.savefig(output_dir / f"{collection_name}_hit_rate_vs_z_totedep_min_{totedep_min:.1f}.png", bbox_inches="tight")
plt.show()
plt.clf()
#+end_src
#+begin_src jupyter-python
......@@ -429,7 +440,7 @@ for edep_min in [0]: # GeV
plt.sca(ax_cur)
plt.legend()
plt.title(f"for $E_{{dep.}} >$ {edep_min * 1000} MeV")
plt.ylabel("rate, Hz", loc="top")
plt.ylabel("rate, Hz")
plt.yscale("log")
plt.xticks(rotation=90, ha='right')
fig_cur.savefig(f"rates_edep_min_{edep_min}_{dataset_ix}.png", bbox_inches="tight")
......@@ -439,9 +450,10 @@ for edep_min in [0]: # GeV
plt.sca(ax_cmb)
plt.legend()
plt.title(f"for $E_{{dep.}} >$ {edep_min * 1000} MeV")
plt.ylabel("rate per tower, Hz", loc="top")
plt.ylabel("rate per tower, Hz")
plt.yscale("log")
plt.xticks(rotation=90, ha='right')
fig_cmb.savefig(f"rates_edep_min_{edep_min}.png", bbox_inches="tight")
plt.show()
plt.clf()
#+end_src
awkward >= 2.4.0
dask >= 2023
dask_awkward
dask_histogram
distributed >= 2023
awkward ~= 2.7.2
dask ~= 2024.12.1
dask_awkward ~= 2024.12.2
dask_histogram ~= 2024.12.1
distributed ~= 2024.12.1
pyhepmc
uproot >= 5.2.0
uproot ~= 5.5.1
def get_n_events(wildcards):
energy = float(wildcards.ENERGY.replace("GeV", "").replace("MeV", "e-3"))
n_events = 1000 if wildcards.PARTICLE == "e-" else 2000
n_events = int(n_events // (energy ** 0.5))
return n_events
rule backwards_ecal_sim:
input:
steering_file=ancient("EPIC/EVGEN/SINGLE/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.steer"),
warmup="warmup/{DETECTOR_CONFIG}.edm4hep.root",
geometry_lib=find_epic_libraries(),
output:
"sim_output/backwards_ecal/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root",
log:
"sim_output/backwards_ecal/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root.log",
wildcard_constraints:
PARTICLE="(e-|pi-)",
ENERGY="[0-9]+[kMG]eV",
PHASE_SPACE="(3to50|45to135|130to177)deg",
INDEX=r"\d{4}",
params:
N_EVENTS=get_n_events,
SEED=lambda wildcards: "1" + wildcards.INDEX,
DETECTOR_PATH=os.environ["DETECTOR_PATH"],
DETECTOR_CONFIG=lambda wildcards: wildcards.DETECTOR_CONFIG,
DD4HEP_HASH=get_spack_package_hash("dd4hep"),
NPSIM_HASH=get_spack_package_hash("npsim"),
cache: True
shell:
"""
set -m # monitor mode to prevent lingering processes
exec ddsim \
--runType batch \
--enableGun \
--steeringFile "{input.steering_file}" \
--random.seed {params.SEED} \
--filter.tracker edep0 \
-v WARNING \
--numberOfEvents {params.N_EVENTS} \
--compactFile {params.DETECTOR_PATH}/{params.DETECTOR_CONFIG}.xml \
--outputFile {output}
"""
rule backwards_ecal_recon:
input:
"sim_output/backwards_ecal/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root",
output:
"sim_output/backwards_ecal/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.eicrecon.tree.edm4eic.root",
log:
"sim_output/backwards_ecal/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.eicrecon.tree.edm4eic.root.log",
wildcard_constraints:
INDEX=r"\d{4}",
params:
DETECTOR_CONFIG=lambda wildcards: wildcards.DETECTOR_CONFIG,
EICRECON_HASH=get_spack_package_hash("eicrecon"),
cache: True
shell: """
set -m # monitor mode to prevent lingering processes
exec env DETECTOR_CONFIG={params.DETECTOR_CONFIG} \
eicrecon {input} -Ppodio:output_file={output} \
-Ppodio:output_collections=MCParticles,EcalEndcapNRecHits,EcalEndcapNClusters
"""
rule backwards_ecal_local_sim_list:
input:
expand(
"sim_output/backwards_ecal/{{DETECTOR_CONFIG}}/{{PARTICLE}}/{{ENERGY}}/{{PHASE_SPACE}}/{{PARTICLE}}_{{ENERGY}}_{{PHASE_SPACE}}.{INDEX:04d}.eicrecon.tree.edm4eic.root",
INDEX=range(20),
),
output:
"listing/backwards_ecal/local/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}.lst",
run:
with open(output[0], "wt") as fp:
fp.write("\n".join(input))
if config.get("stream_from_xrootd", True) not in [False, "", "0", "false"]:
rule backwards_ecal_campaign_sim_list:
output:
"listing/backwards_ecal/{CAMPAIGN}/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}.lst",
params:
search_path=lambda wildcards: f"EPIC/RECO/{wildcards.CAMPAIGN}/epic_craterlake/SINGLE/{wildcards.PARTICLE}/{wildcards.ENERGY}/{wildcards.PHASE_SPACE}/",
shell: """
xrdfs root://dtn-eic.jlab.org/ ls /volatile/eic/{params.search_path} \
| awk '{{ print "root://dtn-eic.jlab.org/"$1; }}' \
| sort \
> {output}
if [ ! -s {output} ]; then
echo "Got an empty file listing for path \"\""
exit 1
fi
"""
else:
checkpoint backwards_ecal_campaign_sim_list_checkpoint:
output:
"listing/backwards_ecal/{CAMPAIGN}/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}.lst.orig",
params:
search_path=lambda wildcards: f"EPIC/RECO/{wildcards.CAMPAIGN}/epic_craterlake/SINGLE/{wildcards.PARTICLE}/{wildcards.ENERGY}/{wildcards.PHASE_SPACE}/",
shell: """
xrdfs root://dtn-eic.jlab.org/ ls /volatile/eic/{params.search_path} \
| sed -e 's#^/volatile/eic/##' \
| sort \
> {output}
if [ ! -s {output} ]; then
echo "Got an empty file listing for path \"\""
exit 1
fi
"""
def get_backwards_ecal_campaign_sim_list(wildcards):
with checkpoints.backwards_ecal_campaign_sim_list_checkpoint.get(**wildcards).output[0].open() as fp:
return [line.rstrip() for line in fp.readlines()]
rule backwards_ecal_campaign_sim_list:
input:
# depend on paths from the file list
get_backwards_ecal_campaign_sim_list,
orig_list="listing/backwards_ecal/{CAMPAIGN}/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}.lst.orig",
output:
"listing/backwards_ecal/{CAMPAIGN}/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}.lst",
shell: """
cp {input.orig_list} {output}
"""
ruleorder: backwards_ecal_local_sim_list > backwards_ecal_campaign_sim_list
DETECTOR_CONFIG=os.environ["DETECTOR_CONFIG"]
rule backwards_ecal:
input:
expand(
"listing/backwards_ecal/{{CAMPAIGN}}/" + DETECTOR_CONFIG + "/{PARTICLE}/{ENERGY}/{PHASE_SPACE}.lst",
PARTICLE=["pi-", "e-"],
ENERGY=[
"100MeV",
"200MeV",
"500MeV",
"1GeV",
"2GeV",
"5GeV",
"10GeV",
"20GeV",
],
PHASE_SPACE=["130to177deg"],
),
matplotlibrc=".matplotlibrc",
script="benchmarks/backwards_ecal/backwards_ecal.py",
output:
directory("results/backwards_ecal/{CAMPAIGN}/")
log:
scheduler=".logs/results/backwards_ecal/{CAMPAIGN}/scheduler.log",
worker=".logs/results/backwards_ecal/{CAMPAIGN}/worker.log",
threads: workflow.cores
shell:
"""
if [[ "{wildcards.CAMPAIGN}" == "local" ]]; then
export PLOT_TITLE="Benchmark simulation"
else
export PLOT_TITLE="\\textbf{{ePIC}} Simulation {wildcards.CAMPAIGN}"
fi
set -m # monitor mode to prevent lingering processes
cleanup() {{
echo Cleaning up
kill $WORKER_PID $SCHEDULER_PID
}}
trap cleanup EXIT
PORT=$RANDOM
dask scheduler --port $PORT 2>{log.scheduler} &
export DASK_SCHEDULER=localhost:$PORT
SCHEDULER_PID=$!
dask worker tcp://$DASK_SCHEDULER --nworkers {threads} --nthreads 1 2>{log.worker} &
WORKER_PID=$!
env \
MATPLOTLIBRC={input.matplotlibrc} \
DETECTOR_CONFIG=""" + DETECTOR_CONFIG + """ \
INPUT_PATH_FORMAT=listing/backwards_ecal/{wildcards.CAMPAIGN}/""" + DETECTOR_CONFIG + """/{{particle}}/{{energy}}/130to177deg.lst \
OUTPUT_DIR={output} \
python {input.script}
"""
#+PROPERTY: header-args:jupyter-python :session /jpy:localhost#8888:backwards_ecal :async yes :results drawer :exports both
#+TITLE: ePIC EEEMCal benchmark
#+AUTHOR: Dmitry Kalinkin
#+OPTIONS: d:t
#+LATEX_CLASS_OPTIONS: [9pt,letter]
#+BIND: org-latex-image-default-width ""
#+BIND: org-latex-image-default-option "scale=0.3"
#+BIND: org-latex-images-centered nil
#+BIND: org-latex-minted-options (("breaklines") ("bgcolor" "black!5") ("frame" "single"))
#+LATEX_HEADER: \usepackage[margin=1in]{geometry}
#+LATEX_HEADER: \setlength{\parindent}{0pt}
#+LATEX: \sloppy
#+begin_src jupyter-python :results silent
import os
from pathlib import Path
import awkward as ak
import boost_histogram as bh
import dask_histogram as dh
import numpy as np
import uproot
import vector
#+end_src
#+begin_src jupyter-python :results silent
from dask.distributed import Client
client = Client(os.environ.get("DASK_SCHEDULER", "localhost:8786"))
#+end_src
#+begin_src jupyter-python
vector.register_awkward()
from dask.distributed import WorkerPlugin
class VectorImporter(WorkerPlugin):
idempotent=True
def setup(self, worker):
import vector
vector.register_awkward()
client.register_plugin(VectorImporter())
#+end_src
* Plotting setup
#+begin_src jupyter-python :results silent
import matplotlib as mpl
import matplotlib.pyplot as plt
def setup_presentation_style():
mpl.rcParams.update(mpl.rcParamsDefault)
plt.style.use('ggplot')
plt.rcParams.update({
'axes.labelsize': 8,
'axes.titlesize': 9,
'figure.titlesize': 9,
'figure.figsize': (4, 3),
'legend.fontsize': 7,
'xtick.labelsize': 8,
'ytick.labelsize': 8,
'xaxis.labellocation': 'right',
'yaxis.labellocation': 'top',
'pgf.rcfonts': False,
})
setup_presentation_style()
#+end_src
* Parameters
#+begin_src jupyter-python :results silent
DETECTOR_CONFIG=os.environ.get("DETECTOR_CONFIG")
PLOT_TITLE=os.environ.get("PLOT_TITLE")
INPUT_PATH_FORMAT=os.environ.get("INPUT_PATH_FORMAT", "EPIC/RECO/24.04.0/epic_craterlake/SINGLE/{particle}/{energy}/130to177deg/{particle}_{energy}_130to177deg.{ix:04d}.eicrecon.tree.edm4eic.root")
output_dir=Path(os.environ.get("OUTPUT_DIR", "./"))
output_dir.mkdir(parents=True, exist_ok=True)
#+end_src
* Analysis
First, we define a requirement on what phase we will consider for our
analysis. The following function filters single-particle events that
are thrown within $-3.5 < \eta < -2.0$:
#+begin_src jupyter-python
def filter_pointing(events):
part_momentum = ak.zip({
"m": events["MCParticles.mass"],
"px": events["MCParticles.momentum.x"],
"py": events["MCParticles.momentum.y"],
"pz": events["MCParticles.momentum.z"],
}, with_name="Momentum4D")
cond = (part_momentum.eta[:,0] > -3.5) & (part_momentum.eta[:,0] < -2.)
return events[cond]
#+end_src
#+begin_src jupyter-python
energies = [
"100MeV",
"200MeV",
"500MeV",
"1GeV",
"2GeV",
"5GeV",
"10GeV",
"20GeV",
]
filter_name = [
"MCParticles.*",
"EcalEndcapNClusters.energy",
]
pi_eval = {}
e_eval = {}
def readlist(path):
with open(path, "rt") as fp:
paths = [line.rstrip() for line in fp.readlines()]
return paths
for energy in energies:
pi_eval[energy] = uproot.dask(
{path: "events" for path in readlist(INPUT_PATH_FORMAT.format(particle="pi-", energy=energy))},
filter_name=filter_name,
steps_per_file=1,
open_files=False,
).map_partitions(filter_pointing)
e_eval[energy] = uproot.dask(
{path: "events" for path in readlist(INPUT_PATH_FORMAT.format(particle="e-", energy=energy))},
filter_name=filter_name,
steps_per_file=1,
open_files=False,
).map_partitions(filter_pointing)
#+end_src
** Energy resolution
The first step in the analysis is plotting distributions of cluster energies.
Only electron distribution is needed, but we also do pi- for the pion rejection
study.
#+begin_src jupyter-python
e_over_p_hist = {}
e_over_p_axis = bh.axis.Regular(101, 0., 1.01)
for ix, energy in enumerate(energies):
for particle_name, dataset in [("e-", e_eval[energy]), ("pi-", pi_eval[energy])]:
energy_value = float(energy.replace("GeV", "").replace("MeV", "e-3"))
def clf(events):
return ak.drop_none(ak.max(events["EcalEndcapNClusters.energy"], axis=-1)) / energy_value
e_over_p_hist[(particle_name, energy)] = dh.factory(
clf(dataset),
axes=(e_over_p_axis,),
)
e_over_p_hist = client.gather(client.compute(e_over_p_hist))
#+end_src
The next step is to plot the histograms together with fits and produce a plot
for resolution vs energy that summarizes them.
#+begin_src jupyter-python
fig, axs = plt.subplots(2, 4, sharex=True, sharey=True, figsize=(15, 6))
axs = np.ravel(np.array(axs))
sigmas_rel_FWHM_cb = {}
fractions_below = {}
def norm_by_sum(arr):
return arr / np.sum(arr)
for ix, energy in enumerate(energies):
plt.sca(axs[ix])
hist = e_over_p_hist[("e-", energy)]
patch = plt.stairs(norm_by_sum(hist.values()), hist.axes[0].edges, label=rf"$e^-$ {PLOT_TITLE}")
plt.title(f"{energy}")
import scipy.stats
def f(x, n, beta, m, loc, scale):
return n * scipy.stats.crystalball.pdf(x, beta, m, loc, scale)
p0 = (np.sum(hist.values()[10:]), 2., 3., 0.95, 0.05)
try:
import scipy.optimize
par, pcov = scipy.optimize.curve_fit(f, hist.axes[0].centers[5:], hist.values()[5:], p0=p0, maxfev=10000)
except RuntimeError:
print(hist)
raise
plt.plot(hist.axes[0].centers, f(hist.axes[0].centers, *par), label=rf"Crystal Ball fit", color="tab:green", lw=0.8)
def summarize_fit(par):
_, _, _, loc_cb, scale_cb = par
# Calculate FWHM
y_max = np.max(f(np.linspace(0., 1., 100), *par))
f_prime = lambda x: f(x, *par) - y_max / 2
x_plus, = scipy.optimize.root(f_prime, loc_cb + scale_cb).x
x_minus, = scipy.optimize.root(f_prime, loc_cb - scale_cb).x
plt.axvline(x_minus, ls="--", lw=0.75, color=patch.get_facecolor(), label=r"$\mu - $FWHM")
plt.axvline(x_plus, ls=":", lw=0.75, color=patch.get_facecolor(), label=r"$\mu + $FWHM")
fwhm = (x_plus - x_minus) / loc_cb
sigma_rel_FWHM_cb = fwhm / 2 / np.sqrt(2 * np.log(2))
cutoff_x = loc_cb - fwhm
fraction_below = np.sum(hist.values()[hist.axes[0].centers < cutoff_x]) / np.sum(hist.values())
return sigma_rel_FWHM_cb, fraction_below
sigma_rel_FWHM_cb, fraction_below = summarize_fit(par)
sigmas_rel_FWHM_cb.setdefault(PLOT_TITLE, {})[energy] = sigma_rel_FWHM_cb
fractions_below.setdefault(PLOT_TITLE, {})[energy] = fraction_below
plt.legend()
plt.xlabel("$E/p$", loc="right")
plt.ylabel("Event yield", loc="top")
fig.savefig(output_dir / f"resolution_plots.pdf", bbox_inches="tight")
fig.savefig(output_dir / f"resolution_plots.png", bbox_inches="tight")
plt.show()
plt.close(fig)
plt.figure()
energy_values = np.array([float(energy.replace("GeV", "").replace("MeV", "e-3")) for energy in energies])
for clf_label, sigma_rel_FWHM_cb in sigmas_rel_FWHM_cb.items():
sigma_over_e = np.array([sigma_rel_FWHM_cb[energy] for energy in energies]) * 100 # convert to %
def f(energy, stochastic, constant):
return np.sqrt((stochastic / np.sqrt(energy)) ** 2 + constant ** 2)
cond = energy_values >= 0.5
try:
import scipy.optimize
par, pcov = scipy.optimize.curve_fit(f, energy_values[cond], sigma_over_e[cond], maxfev=10000)
except RuntimeError:
print("energy_values", energy_values[cond])
print("sigma_over_e", sigma_over_e[cond])
raise
stochastic, constant = par
plt.plot(
energy_values,
sigma_over_e,
marker=".",
ls="none",
label=f"{clf_label}"
)
xmin = np.min(energy_values[cond])
xmax = np.max(energy_values[cond])
xs = np.arange(xmin, xmax, 0.1)
plt.plot(
xs,
f(xs, *par),
ls="--",
lw=0.5,
label=fr"Functional fit: ${np.ceil(stochastic * 10) / 10:.1f}\% / \sqrt{{E}} \oplus {np.ceil(constant * 10) / 10:.1f}\%$",
)
xmin = np.min(energy_values)
xmax = np.max(energy_values) * 1.05
xs = np.arange(xmin, xmax + 0.1, 0.1)
plt.fill_between(
xs,
np.sqrt((2 / np.sqrt(xs)) ** 2 + 1 ** 2),
np.sqrt((2 / np.sqrt(xs)) ** 2 + 3 ** 2),
lw=0., alpha=0.2, color="black", label=r"YR requirement $2\% / \sqrt{E} \oplus (1-3)\%$",
)
plt.xlim(0., xmax)
plt.ylim(top=6.)
plt.legend()
plt.xlabel("Energy, GeV", loc="right")
plt.ylabel(r"$\sigma_{E} / E$ derived from FWHM, %", loc="top")
plt.savefig(output_dir / f"resolution.pdf", bbox_inches="tight")
plt.savefig(output_dir / f"resolution.png", bbox_inches="tight")
plt.show()
#+end_src
** Pion rejection
#+begin_src jupyter-python
fig, axs = plt.subplots(2, 4, sharex=True, sharey=True, figsize=(15, 6))
fig_log, axs_log = plt.subplots(2, 4, sharex=True, sharey=True, figsize=(15, 6))
fig_roc, axs_roc = plt.subplots(2, 4, sharex=True, sharey=True, figsize=(15, 6))
axs = np.ravel(np.array(axs))
axs_log = np.ravel(np.array(axs_log))
axs_roc = np.ravel(np.array(axs_roc))
rocs = {}
for ix, energy in enumerate(energies):
energy_value = float(energy.replace("GeV", "").replace("MeV", "e-3"))
def clf(events):
return ak.drop_none(ak.max(events["EcalEndcapNClusters.energy"], axis=-1)) / energy_value
e_pred = clf(e_eval[energy])
pi_pred = clf(pi_eval[energy])
for do_log, ax in [(False, axs[ix]), (True, axs_log[ix])]:
plt.sca(ax)
patch = plt.stairs(norm_by_sum(e_over_p_hist[("e-", energy)].values()), e_over_p_hist[("e-", energy)].axes[0].edges, label=rf"$e^-$ {PLOT_TITLE}")
patch = plt.stairs(norm_by_sum(e_over_p_hist[("pi-", energy)].values()), e_over_p_hist[("pi-", energy)].axes[0].edges, label=rf"$\pi^-$ {PLOT_TITLE}")
plt.title(f"{energy}")
plt.legend()
plt.xlabel("Classifier output")
plt.ylabel("Event yield")
if do_log:
plt.yscale("log")
plt.sca(axs_roc[ix])
fpr = np.cumsum(e_over_p_hist[("pi-", energy)].values()[::-1])
fpr /= fpr[-1]
tpr = np.cumsum(e_over_p_hist[("e-", energy)].values()[::-1])
tpr /= tpr[-1]
cond = fpr != 0 # avoid infinite rejection (region of large uncertainty)
cond &= tpr != 1 # avoid linear interpolation (region of large uncertainty)
def mk_interp(tpr, fpr):
def interp(eff):
return np.interp(eff, tpr, fpr)
return interp
rocs.setdefault(PLOT_TITLE, {})[energy] = mk_interp(tpr, fpr)
plt.plot(tpr[cond] * 100, 1 / fpr[cond], label=f"{PLOT_TITLE}")
plt.yscale("log")
plt.title(f"{energy}")
plt.legend(loc="lower left")
plt.xlabel("Electron efficiency, %")
plt.ylabel("Pion rejection factor")
fig.savefig(output_dir / f"pred.pdf", bbox_inches="tight")
fig.savefig(output_dir / f"pred.png", bbox_inches="tight")
plt.close(fig)
fig_log.savefig(output_dir / f"pred_log.pdf", bbox_inches="tight")
fig_log.savefig(output_dir / f"pred_log.png", bbox_inches="tight")
fig_log.show()
fig_roc.savefig(output_dir / f"roc.pdf", bbox_inches="tight")
fig_roc.savefig(output_dir / f"roc.png", bbox_inches="tight")
fig_roc.show()
plt.figure()
for clf_label, roc in rocs.items():
plt.plot(
[float(energy.replace("GeV", "").replace("MeV", "e-3")) for energy in energies],
[1 / roc[energy](0.95) for energy in energies],
marker=".",
label=f"{clf_label}",
)
xmax = np.max(energy_values) * 1.05
plt.xlim(0., xmax)
plt.yscale("log")
plt.legend()
plt.xlabel("Energy, GeV")
plt.ylabel("Pion rejection at 95%")
plt.savefig(output_dir / f"pion_rej.pdf", bbox_inches="tight")
plt.savefig(output_dir / f"pion_rej.png", bbox_inches="tight")
plt.show()
#+end_src
sim:backwards_ecal:
extends: .det_benchmark
stage: simulate
parallel:
matrix:
- PARTICLE: ["e-", "pi-"]
MOMENTUM: [
"100MeV",
"200MeV",
"500MeV",
"1GeV",
"2GeV",
"5GeV",
"10GeV",
"20GeV",
]
script:
- |
snakemake $SNAKEMAKE_FLAGS --cores 5 listing/backwards_ecal/local/${DETECTOR_CONFIG}/${PARTICLE}/${MOMENTUM}/130to177deg.lst
bench:backwards_ecal:
extends: .det_benchmark
stage: benchmarks
needs:
- ["sim:backwards_ecal"]
script:
- export PYTHONUSERBASE=$LOCAL_DATA_PATH/deps
- pip install -r benchmarks/backwards_ecal/requirements.txt
- snakemake $SNAKEMAKE_FLAGS --cores 5 results/backwards_ecal/local
bench:backwards_ecal_campaigns:
extends: .det_benchmark
stage: benchmarks
when: manual
timeout: 4 hours
script:
- export PYTHONUSERBASE=$LOCAL_DATA_PATH/deps
- pip install -r benchmarks/backwards_ecal/requirements.txt
- snakemake $SNAKEMAKE_FLAGS --cores 5 results/backwards_ecal/24.10.1
collect_results:backwards_ecal:
extends: .det_benchmark
stage: collect
needs:
- "bench:backwards_ecal"
script:
- ls -lrht
- mv results{,_save}/ # move results directory out of the way to preserve it
- snakemake $SNAKEMAKE_FLAGS --cores 1 --delete-all-output results/backwards_ecal/local
- mv results{_save,}/
awkward ~= 2.7.2
dask ~= 2024.12.1
dask_awkward ~= 2024.12.2
dask_histogram ~= 2024.12.1
distributed ~= 2024.12.1
pyhepmc
uproot ~= 5.5.1
vector ~= 1.5.2
......@@ -26,7 +26,8 @@ rule emcal_barrel_particles:
"{DETECTOR_CONFIG}/sim_output/sim_emcal_barrel_{PARTICLE}_energies{E_MIN}_{E_MAX}.edm4hep.root"
shell:
"""
ddsim \
set -m # monitor mode to prevent lingering processes
exec ddsim \
--runType batch \
-v WARNING \
--part.minimalKineticEnergy 0.5*GeV \
......
......@@ -2,13 +2,13 @@ sim:emcal_barrel_pions:
extends: .det_benchmark
stage: simulate
script:
- snakemake --cores 1 $DETECTOR_CONFIG/sim_output/sim_emcal_barrel_{piplus,piminus}_energies5.0_5.0.edm4hep.root
- snakemake $SNAKEMAKE_FLAGS --cores 1 $DETECTOR_CONFIG/sim_output/sim_emcal_barrel_{piplus,piminus}_energies5.0_5.0.edm4hep.root
sim:emcal_barrel_pi0:
extends: .det_benchmark
stage: simulate
script:
- snakemake --cores 1 $DETECTOR_CONFIG/sim_output/sim_emcal_barrel_pi0_energies5.0_5.0.edm4hep.root
- snakemake $SNAKEMAKE_FLAGS --cores 1 $DETECTOR_CONFIG/sim_output/sim_emcal_barrel_pi0_energies5.0_5.0.edm4hep.root
sim:emcal_barrel_electrons:
extends: .det_benchmark
......@@ -16,20 +16,20 @@ sim:emcal_barrel_electrons:
script:
- if [[ "$RUN_EXTENDED_BENCHMARK" == "true" ]] ; then snakemake --cores $DETECTOR_CONFIG/results/energy_scan/emcal_barrel_electron_fsam_scan.png; fi
- export JUGGLER_N_EVENTS=400
- snakemake --cores 1 $DETECTOR_CONFIG/sim_output/sim_emcal_barrel_electron_energies5.0_5.0.edm4hep.root
- snakemake $SNAKEMAKE_FLAGS --cores 1 $DETECTOR_CONFIG/sim_output/sim_emcal_barrel_electron_energies5.0_5.0.edm4hep.root
sim:emcal_barrel_photons:
extends: .det_benchmark
stage: simulate
script:
- if [[ "$RUN_EXTENDED_BENCHMARK" == "true" ]] ; then snakemake --cores $DETECTOR_CONFIG/results/energy_scan/emcal_barrel_proton_fsam_scan.png; fi
- snakemake --cores 1 $DETECTOR_CONFIG/sim_output/sim_emcal_barrel_photon_energies5.0_5.0.edm4hep.root
- snakemake $SNAKEMAKE_FLAGS --cores 1 $DETECTOR_CONFIG/sim_output/sim_emcal_barrel_photon_energies5.0_5.0.edm4hep.root
sim:emcal_barrel_pion_rejection:
extends: .det_benchmark
stage: simulate
script:
- snakemake --cores 1 $DETECTOR_CONFIG/sim_output/sim_emcal_barrel_{piminus,electron}_energies1.0_18.0.edm4hep.root
- snakemake $SNAKEMAKE_FLAGS --cores 1 $DETECTOR_CONFIG/sim_output/sim_emcal_barrel_{piminus,electron}_energies1.0_18.0.edm4hep.root
calib:emcal_barrel_electrons:
extends: .det_benchmark
......@@ -39,7 +39,7 @@ calib:emcal_barrel_electrons:
script:
- ls -lhtR sim_output/
- rootls -t sim_output/sim_emcal_barrel_electron_energies5.0_5.0.edm4hep.root
- snakemake --cores 1 $DETECTOR_CONFIG/results/emcal_barrel_electron_calibration.json
- snakemake $SNAKEMAKE_FLAGS --cores 1 $DETECTOR_CONFIG/results/emcal_barrel_electron_calibration.json
- mv sim_output/sim_emcal_barrel_electron.edm4hep.root results/.
- echo "JSON file(s) from analysis:" ; cat results/*.json
......@@ -49,7 +49,7 @@ bench:emcal_barrel_pions:
needs:
- ["sim:emcal_barrel_pions"]
script:
- snakemake --cores 1 $DETECTOR_CONFIG/results/emcal_barrel_pions_Ethr.png
- snakemake $SNAKEMAKE_FLAGS --cores 1 $DETECTOR_CONFIG/results/emcal_barrel_pions_Ethr.png
bench:emcal_barrel_electrons_scan:
extends: .det_benchmark
......@@ -66,7 +66,7 @@ bench:emcal_barrel_pi0:
- ["sim:emcal_barrel_pi0", "calib:emcal_barrel_electrons"]
script:
- echo "JSON file(s) from analysis:" ; cat results/*.json
- snakemake --cores 1 epic_craterlake/results/Barrel_emcal_pi0.json
- snakemake $SNAKEMAKE_FLAGS --cores 1 epic_craterlake/results/Barrel_emcal_pi0.json
bench:emcal_barrel_photons:
extends: .det_benchmark
......@@ -76,7 +76,7 @@ bench:emcal_barrel_photons:
script:
- ls -lhtR sim_output/
- rootls -t sim_output/sim_emcal_barrel_photon_energies5.0_5.0.edm4hep.root
- snakemake --cores 1 $DETECTOR_CONFIG/results/emcal_barrel_photon_calibration.json
- snakemake $SNAKEMAKE_FLAGS --cores 1 $DETECTOR_CONFIG/results/emcal_barrel_photon_calibration.json
- mv sim_output/sim_emcal_barrel_photon.edm4hep.root results/.
- if [[ "$RUN_EXTENDED_BENCHMARK" == "true" ]] ; then snakemake --cores $DETECTOR_CONFIG/results/energy_scan/emcal_barrel_proton_fsam_scan.png; fi
......@@ -89,7 +89,7 @@ bench:emcal_barrel_pion_rejection:
- ls -lhtR sim_output/
- rootls -t $DETECTOR_CONFIG/sim_output/sim_emcal_barrel_piminus_energies1.0_18.0.edm4hep.root
- rootls -t $DETECTOR_CONFIG/sim_output/sim_emcal_barrel_electron_energies1.0_18.0.edm4hep.root
- snakemake --cores 1 $DETECTOR_CONFIG/results/Barrel_emcal_pion_rej.json
- snakemake $SNAKEMAKE_FLAGS --cores 1 $DETECTOR_CONFIG/results/Barrel_emcal_pion_rej.json
collect_results:barrel_ecal:
extends: .det_benchmark
......
def format_energy_for_dd4hep(s):
return s.rstrip("kMGeV") + "*" + s.lstrip("0123456789")
rule calo_pid_sim:
input:
warmup="warmup/{DETECTOR_CONFIG}.edm4hep.root",
geometry_lib=find_epic_libraries(),
output:
"sim_output/calo_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY_MIN}to{ENERGY_MAX}/{THETA_MIN}to{THETA_MAX}deg/{PARTICLE}_{ENERGY}_{THETA_MIN}to{THETA_MAX}deg.{INDEX}.edm4hep.root",
log:
"sim_output/calo_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY_MIN}to{ENERGY_MAX}/{THETA_MIN}to{THETA_MAX}deg/{PARTICLE}_{ENERGY}_{THETA_MIN}to{THETA_MAX}deg.{INDEX}.edm4hep.root.log",
wildcard_constraints:
PARTICLE="(e-|pi-)",
ENERGY_MIN="[0-9]+[kMG]eV",
ENERGY_MAX="[0-9]+[kMG]eV",
THETA_MIN="[0-9]+",
THETA_MAX="[0-9]+",
INDEX=r"\d{4}",
params:
N_EVENTS=1000,
SEED=lambda wildcards: "1" + wildcards.INDEX,
DETECTOR_PATH=os.environ["DETECTOR_PATH"],
DETECTOR_CONFIG=lambda wildcards: wildcards.DETECTOR_CONFIG,
ENERGY_MIN=lambda wildcards: format_energy_for_dd4hep(wildcards.ENERGY_MIN),
ENERGY_MAX=lambda wildcards: format_energy_for_dd4hep(wildcards.ENERGY_MAX),
THETA_MIN=lambda wildcards: wildcards.THETA_MIN,
THETA_MAX=lambda wildcards: wildcards.THETA_MAX,
DD4HEP_HASH=get_spack_package_hash("dd4hep"),
NPSIM_HASH=get_spack_package_hash("npsim"),
cache: True
shell:
"""
set -m # monitor mode to prevent lingering processes
exec ddsim \
--runType batch \
--enableGun \
--gun.momentumMin "{params.ENERGY_MIN}" \
--gun.momentumMax "{params.ENERGY_MAX}" \
--gun.thetaMin "{wildcards.THETA_MIN}*deg" \
--gun.thetaMax "{wildcards.THETA_MAX}*deg" \
--gun.particle {wildcards.PARTICLE} \
--gun.distribution eta \
--random.seed {params.SEED} \
--filter.tracker edep0 \
-v WARNING \
--numberOfEvents {params.N_EVENTS} \
--compactFile {params.DETECTOR_PATH}/{params.DETECTOR_CONFIG}.xml \
--outputFile {output}
"""
rule calo_pid_recon:
input:
"sim_output/calo_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root",
output:
"sim_output/calo_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.eicrecon.tree.edm4eic.root",
log:
"sim_output/calo_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.eicrecon.tree.edm4eic.root.log",
wildcard_constraints:
INDEX=r"\d{4}",
shell: """
set -m # monitor mode to prevent lingering processes
exec env DETECTOR_CONFIG={wildcards.DETECTOR_CONFIG} \
eicrecon {input} -Ppodio:output_file={output} \
-Ppodio:output_collections=MCParticles,EcalEndcapNRecHits,EcalEndcapNClusters,EcalEndcapNParticleIDInput_features,EcalEndcapNParticleIDTarget,EcalEndcapNParticleIDOutput_probability_tensor
"""
rule calo_pid_input_list:
input:
electrons=expand(
"sim_output/calo_pid/{{DETECTOR_CONFIG}}/{{PARTICLE}}/{ENERGY}/{PHASE_SPACE}/{{PARTICLE}}_{ENERGY}_{PHASE_SPACE}.{INDEX:04d}.eicrecon.tree.edm4eic.root",
ENERGY=["100MeVto20GeV"],
PHASE_SPACE=["130to177deg"],
INDEX=range(100),
),
output:
"listing/calo_pid/{DETECTOR_CONFIG}/{PARTICLE}.lst",
run:
with open(output[0], "wt") as fp:
fp.write("\n".join(input))
rule calo_pid:
input:
electrons="listing/calo_pid/{DETECTOR_CONFIG}/e-.lst",
pions="listing/calo_pid/{DETECTOR_CONFIG}/pi-.lst",
matplotlibrc=".matplotlibrc",
script="benchmarks/calo_pid/calo_pid.py",
output:
directory("results/{DETECTOR_CONFIG}/calo_pid")
shell:
"""
env \
MATPLOTLIBRC={input.matplotlibrc} \
DETECTOR_CONFIG={wildcards.DETECTOR_CONFIG} \
PLOT_TITLE={wildcards.DETECTOR_CONFIG} \
INPUT_ELECTRONS="{input.electrons}" \
INPUT_PIONS="{input.pions}" \
OUTPUT_DIR={output} \
python {input.script}
"""
#+PROPERTY: header-args:jupyter-python :session /jpy:localhost#8888:benchmark :async yes :results drawer :exports both
#+TITLE: ePIC EEEMCal calorimetry PID
#+AUTHOR: Dmitry Kalinkin
#+OPTIONS: d:t
#+LATEX_CLASS_OPTIONS: [9pt,letter]
#+BIND: org-latex-image-default-width ""
#+BIND: org-latex-image-default-option "scale=0.3"
#+BIND: org-latex-images-centered nil
#+BIND: org-latex-minted-options (("breaklines") ("bgcolor" "black!5") ("frame" "single"))
#+LATEX_HEADER: \usepackage[margin=1in]{geometry}
#+LATEX_HEADER: \setlength{\parindent}{0pt}
#+LATEX: \sloppy
#+begin_src jupyter-python :results silent
import os
from pathlib import Path
import awkward as ak
import boost_histogram as bh
import numpy as np
import vector
import uproot
from sklearn.metrics import roc_curve
vector.register_awkward()
#+end_src
* Parameters
#+begin_src jupyter-python :results silent
DETECTOR_CONFIG=os.environ.get("DETECTOR_CONFIG")
PLOT_TITLE=os.environ.get("PLOT_TITLE")
INPUT_PIONS=os.environ.get("INPUT_PIONS")
INPUT_ELECTRONS=os.environ.get("INPUT_ELECTRONS")
output_dir=Path(os.environ.get("OUTPUT_DIR", "./"))
output_dir.mkdir(parents=True, exist_ok=True)
#+end_src
* Plotting setup
#+begin_src jupyter-python :results silent
import matplotlib as mpl
import matplotlib.pyplot as plt
def setup_presentation_style():
mpl.rcParams.update(mpl.rcParamsDefault)
plt.style.use('ggplot')
plt.rcParams.update({
'axes.labelsize': 8,
'axes.titlesize': 9,
'figure.titlesize': 9,
'figure.figsize': (4, 3),
'legend.fontsize': 7,
'xtick.labelsize': 8,
'ytick.labelsize': 8,
'pgf.rcfonts': False,
})
setup_presentation_style()
#+end_src
* Analysis
#+begin_src jupyter-python
def filter_pointing(events):
part_momentum = ak.zip({
"m": events["MCParticles.mass"],
"px": events["MCParticles.momentum.x"],
"py": events["MCParticles.momentum.y"],
"pz": events["MCParticles.momentum.z"],
}, with_name="Momentum4D")
cond = (part_momentum.eta[:,0] > -3.5) & (part_momentum.eta[:,0] < -2.)
return events[cond]
def readlist(path):
with open(path, "rt") as fp:
paths = [line.rstrip() for line in fp.readlines()]
return paths
e = filter_pointing(uproot.concatenate({filename: "events" for filename in readlist(INPUT_ELECTRONS)}, filter_name=["MCParticles.*", "*EcalEndcapN*"]))
pi = filter_pointing(uproot.concatenate({filename: "events" for filename in readlist(INPUT_PIONS)}, filter_name=["MCParticles.*", "*EcalEndcapN*"]))
e_train = e[:len(pi)//2]
pi_train = pi[:len(pi)//2]
e_eval = e[len(pi)//2:]
pi_eval = pi[len(pi)//2:]
#+end_src
#+RESULTS:
:results:
:end:
#+begin_src jupyter-python
nums = ak.num(pi_train["_EcalEndcapNParticleIDInput_features_floatData"], axis=1)
num_features = ak.min(nums[nums > 0])
print(f"{num_features} features")
nums = ak.num(pi_train["_EcalEndcapNParticleIDTarget_int64Data"], axis=1)
num_targets = ak.min(nums[nums > 0])
print(f"{num_targets} targets")
pi_train_leading_cluster_ixs = ak.argsort(pi_train["EcalEndcapNClusters.energy"], ascending=False)[:,:1]
e_train_leading_cluster_ixs = ak.argsort(e_train["EcalEndcapNClusters.energy"], ascending=False)[:,:1]
pi_train_x = ak.flatten(ak.unflatten(pi_train["_EcalEndcapNParticleIDInput_features_floatData"], num_features, axis=1)[pi_train_leading_cluster_ixs])
pi_train_y = ak.flatten(ak.unflatten(pi_train["_EcalEndcapNParticleIDTarget_int64Data"], num_targets, axis=1)[pi_train_leading_cluster_ixs])
e_train_x = ak.flatten(ak.unflatten(e_train["_EcalEndcapNParticleIDInput_features_floatData"], num_features, axis=1)[e_train_leading_cluster_ixs])
e_train_y = ak.flatten(ak.unflatten(e_train["_EcalEndcapNParticleIDTarget_int64Data"], num_targets, axis=1)[e_train_leading_cluster_ixs])
train_x = ak.concatenate([pi_train_x, e_train_x])
train_y = ak.concatenate([pi_train_y, e_train_y])
pi_eval_leading_cluster_ixs = ak.argsort(pi_eval["EcalEndcapNClusters.energy"], ascending=False)[:,:1]
e_eval_leading_cluster_ixs = ak.argsort(e_eval["EcalEndcapNClusters.energy"], ascending=False)[:,:1]
pi_eval_x = ak.flatten(ak.unflatten(pi_eval["_EcalEndcapNParticleIDInput_features_floatData"], num_features, axis=1)[pi_eval_leading_cluster_ixs])
pi_eval_y = ak.flatten(ak.unflatten(pi_eval["_EcalEndcapNParticleIDTarget_int64Data"], num_targets, axis=1)[pi_eval_leading_cluster_ixs])
e_eval_x = ak.flatten(ak.unflatten(e_eval["_EcalEndcapNParticleIDInput_features_floatData"], num_features, axis=1)[e_eval_leading_cluster_ixs])
e_eval_y = ak.flatten(ak.unflatten(e_eval["_EcalEndcapNParticleIDTarget_int64Data"], num_targets, axis=1)[e_eval_leading_cluster_ixs])
eval_x = ak.concatenate([pi_eval_x, e_eval_x])
eval_y = ak.concatenate([pi_eval_y, e_eval_y])
#+end_src
#+RESULTS:
:results:
: 11 features
: 2 targets
:end:
#+begin_src jupyter-python
#+end_src
#+RESULTS:
:results:
#+begin_export html
<pre>[5.11,
0.0424,
3.03,
2.16,
17.7,
8.32,
-4.54e-07,
0.000456,
0,
69.2,
0]
------------------
type: 11 * float32</pre>
#+end_export
:end:
#+begin_src jupyter-python
ak.sum((ak.num(pi_train_leading_cluster_ixs) != 0)), ak.num(pi_train_leading_cluster_ixs, axis=0)
#+end_src
#+RESULTS:
:results:
| 87721 | array | (88210) |
:end:
#+begin_src jupyter-python
plt.hist(pi_eval_x[:,0])
plt.hist(e_eval_x[:,0], alpha=0.5)
plt.show()
#+end_src
#+RESULTS:
:results:
[[file:./.ob-jupyter/5381c9bd149f0bb8855bf539e7ce8ef927a2e1a9.png]]
:end:
#+begin_src jupyter-python
"""
fig, axs = plt.subplots(2, 4, sharex=True, sharey=True, figsize=(15, 6))
fig_log, axs_log = plt.subplots(2, 4, sharex=True, sharey=True, figsize=(15, 6))
fig_roc, axs_roc = plt.subplots(2, 4, sharex=True, sharey=True, figsize=(15, 6))
axs = np.ravel(np.array(axs))
axs_log = np.ravel(np.array(axs_log))
axs_roc = np.ravel(np.array(axs_roc))
rocs = {}
for ix, energy in enumerate(energies):
for c in [False, True]:
energy_value = float(energy.replace("GeV", "").replace("MeV", "e-3"))
if c:
clf_label = "leading cluster"
else:
clf_label = "sum all hits"
def clf(events):
if c:
return ak.drop_none(ak.max(events["EcalEndcapNClusters.energy"], axis=-1)) / energy_value
else:
return ak.sum(events["EcalEndcapNRecHits.energy"], axis=-1) / energy_value
e_pred = clf(e_eval[energy])
pi_pred = clf(pi_eval[energy])
for do_log in [False, True]:
plt.sca(axs[ix])
plt.hist(e_pred, bins=np.linspace(0., 1.01, 101), label=rf"$e^-$ {clf_label}")
plt.hist(pi_pred, bins=np.linspace(0., 1.01, 101), label=rf"$\pi^-$ {clf_label}", histtype="step")
plt.title(f"{energy}")
plt.legend()
if do_log: plt.yscale("log")
plt.sca(axs_roc[ix])
fpr, tpr, _ = roc_curve(
np.concatenate([np.ones_like(e_pred), np.zeros_like(pi_pred)]),
np.concatenate([e_pred, pi_pred]),
)
cond = fpr != 0 # avoid infinite rejection (region of large uncertainty)
cond &= tpr != 1 # avoid linear interpolation (region of large uncertainty)
def mk_interp(tpr, fpr):
def interp(eff):
return np.interp(eff, tpr, fpr)
return interp
rocs.setdefault(clf_label, {})[energy] = mk_interp(tpr, fpr)
plt.plot(tpr[cond] * 100, 1 / fpr[cond], label=f"{clf_label}")
plt.title(f"{energy}")
plt.legend()
fig.show()
plt.close(fig_log)
fig_roc.show()
plt.figure()
for clf_label, roc in rocs.items():
plt.plot(
[float(energy.replace("GeV", "").replace("MeV", "e-3")) for energy in energies],
[1 / roc[energy](0.95) for energy in energies],
marker=".",
label=f"{clf_label}",
)
plt.legend()
plt.show()
"""
#+end_src
#+begin_src jupyter-python
import catboost
clf = {}
from sklearn.metrics import roc_curve
roc = {}
clf = catboost.CatBoostClassifier(loss_function="CrossEntropy", verbose=0, n_estimators=1000)
clf.fit(
train_x.to_numpy(),
train_y.to_numpy()[:,1], # index 1 = is electron
)
plt.hist(clf.predict_proba(e_eval_x.to_numpy())[:,1], bins=np.linspace(0., 1.01, 101), label=r"$e^-$")
plt.hist(clf.predict_proba(pi_eval_x.to_numpy())[:,1], bins=np.linspace(0., 1.01, 101), label=r"$\pi^-$", histtype="step")
plt.xlabel("Classifier's probability prediction", loc="right")
plt.ylabel("Number of events", loc="top")
plt.legend(loc="upper center")
plt.savefig(output_dir / "predict_proba.pdf", bbox_inches="tight")
plt.show()
#+end_src
#+begin_src jupyter-python
energy_bin_edges = np.arange(0., 20. + 1e-7, 1.)
_eval_energy = eval_x[:,0]
for energy_bin_ix, (energy_bin_low, energy_bin_high) in enumerate(zip(energy_bin_edges[:-1], energy_bin_edges[1:])):
cond = (_eval_energy >= energy_bin_low) & (_eval_energy < energy_bin_high)
print(energy_bin_low, energy_bin_high, ak.sum(cond))
pi_cond = (pi_eval_x[:,0] >= energy_bin_low) & (pi_eval_x[:,0] < energy_bin_high)
e_cond = (e_eval_x[:,0] >= energy_bin_low) & (e_eval_x[:,0] < energy_bin_high)
plt.hist(pi_eval_x[pi_cond][:,1], bins=np.linspace(0., 1.01, 101))
plt.hist(e_eval_x[e_cond][:,1], bins=np.linspace(0., 1.01, 101))
plt.yscale("log")
plt.show()
plt.clf()
fpr, tpr, _ = roc_curve(
eval_y[cond][:,1],
#eval_x[cond][:,1],
clf.predict_proba(eval_x[cond].to_numpy())[:,1],
)
cond = fpr != 0 # avoid infinite rejection (region of large uncertainty)
cond &= tpr != 1 # avoid linear interpolation (region of large uncertainty)
#cond &= tpr > 0.5
plt.plot(tpr[cond] * 100, 1 / fpr[cond])
def mk_interp(tpr, fpr):
def interp(eff):
return np.interp(eff, tpr, fpr)
return interp
roc[energy_bin_ix] = mk_interp(tpr, fpr)
plt.xlabel("Electron efficiency, %", loc="right")
plt.ylabel("Pion rejection factor", loc="top")
plt.title(rf"${energy_bin_low:.1f} < |\vec{{p}}| < {energy_bin_high:.1f}$ GeV")
plt.legend(loc="lower left")
plt.yscale("log")
plt.savefig(output_dir / f"roc_{energy_bin_low:.1f}_{energy_bin_high:.1f}.pdf", bbox_inches="tight")
plt.show()
plt.clf()
plt.plot(
(energy_bin_edges[:-1] + energy_bin_edges[1:]) / 2,
[1 / roc[energy_bin_ix](0.95) for energy_bin_ix in range(len(energy_bin_edges) - 1)],
marker=".",
label="",
)
plt.yscale("log")
plt.legend()
plt.xlabel("Energy, GeV", loc="right")
plt.ylabel("Pion rejection at 95%", loc="top")
plt.savefig(output_dir / f"pion_rej.pdf", bbox_inches="tight")
plt.show()
#+end_src
#+begin_src jupyter-python
clf.save_model(
output_dir / "EcalEndcapN_pi_rejection.onnx",
format="onnx",
export_parameters={
"onnx_doc_string": "Classifier model for pion rejection in EEEMCal",
"onnx_graph_name": "CalorimeterParticleID_BinaryClassification",
}
)
import onnx
model = onnx.load(output_dir / "EcalEndcapN_pi_rejection.onnx")
onnx.checker.check_model(model)
graph_def = onnx.helper.make_graph(
nodes=[model.graph.node[0]],
name=model.graph.name,
inputs=model.graph.input,
outputs=[model.graph.output[0], model.graph.value_info[0]],
initializer=model.graph.initializer
)
model_def = onnx.helper.make_model(graph_def, producer_name=model.producer_name)
del model_def.opset_import[:]
op_set = model_def.opset_import.add()
op_set.domain = "ai.onnx.ml"
op_set.version = 2
model_def = onnx.shape_inference.infer_shapes(model_def)
onnx.checker.check_model(model_def)
onnx.save(model_def, output_dir / "EcalEndcapN_pi_rejection.onnx")
#+end_src
#+RESULTS:
:results:
:end:
#+begin_src jupyter-python
if "_EcalEndcapNParticleIDOutput_probability_tensor_floatData" in pi_train.fields:
nums = ak.num(pi_train["_EcalEndcapNParticleIDOutput_probability_tensor_floatData"], axis=1)
num_outputs = ak.min(nums[nums > 0])
print(f"{num_outputs} outputs")
pi_train_proba = ak.flatten(ak.unflatten(pi_train["_EcalEndcapNParticleIDOutput_probability_tensor_floatData"], num_outputs, axis=1)[pi_train_leading_cluster_ixs])
e_train_proba = ak.flatten(ak.unflatten(e_train["_EcalEndcapNParticleIDOutput_probability_tensor_floatData"], num_outputs, axis=1)[e_train_leading_cluster_ixs])
train_proba = ak.concatenate([pi_train_proba, e_train_proba])
pi_eval_proba = ak.flatten(ak.unflatten(pi_eval["_EcalEndcapNParticleIDOutput_probability_tensor_floatData"], num_outputs, axis=1)[pi_eval_leading_cluster_ixs])
e_eval_proba = ak.flatten(ak.unflatten(e_eval["_EcalEndcapNParticleIDOutput_probability_tensor_floatData"], num_outputs, axis=1)[e_eval_leading_cluster_ixs])
eval_proba = ak.concatenate([pi_eval_proba, e_eval_proba])
plt.hist(clf.predict_proba(eval_x.to_numpy())[:,1] - eval_proba[:,1].to_numpy())
plt.savefig(output_dir / f"proba_diff.pdf", bbox_inches="tight")
plt.show()
else:
print("EcalEndcapNParticleIDOutput not present")
#+end_src
sim:calo_pid:
extends: .det_benchmark
stage: simulate
parallel:
matrix:
- PARTICLE: ["e-", "pi-"]
INDEX_RANGE: [
"0 9",
"10 19",
"20 29",
"30 39",
"40 49",
"50 59",
"60 69",
"70 79",
"80 89",
"90 99",
]
script:
- |
snakemake $SNAKEMAKE_FLAGS --cores 5 \
$(seq --format="sim_output/calo_pid/epic_inner_detector/${PARTICLE}/100MeVto20GeV/130to177deg/${PARTICLE}_100MeVto20GeV_130to177deg.%04.f.eicrecon.tree.edm4eic.root" ${INDEX_RANGE})
bench:calo_pid:
allow_failure: true # until inference merged into EICrecon
extends: .det_benchmark
stage: benchmarks
needs:
- ["sim:calo_pid"]
script:
- export PYTHONUSERBASE=$LOCAL_DATA_PATH/deps
- pip install -r benchmarks/calo_pid/requirements.txt
- snakemake $SNAKEMAKE_FLAGS --cores 1 results/epic_inner_detector/calo_pid
collect_results:calo_pid:
allow_failure: true # until inference merged into EICrecon
extends: .det_benchmark
stage: collect
needs:
- "bench:calo_pid"
script:
- ls -lrht
- mv results{,_save}/ # move results directory out of the way to preserve it
- snakemake $SNAKEMAKE_FLAGS --cores 1 --delete-all-output results/epic_inner_detector/calo_pid
- mv results{_save,}/
awkward >= 2.4.0
catboost
onnx
scikit-learn
uproot >= 5.2.0
vector
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment