Merge remote-tracking branch 'origin/master' into add_lowq2_benchmarks

bdedaaca · Simon Gardner · 07f9c09e · 9a60f27d · bdedaaca · bdedaaca
Commit bdedaaca authored 3 months ago by Simon Gardner
--- a/.github/workflows/mirror.yaml
+++ b/.github/workflows/mirror.yaml
 name: Mirror and Trigger EICweb

 on:
+  delete:
  push:
  workflow_dispatch:

 concurrency:
-  group: mirror
+  group: mirror-${{ github.event_name }}
  cancel-in-progress: false

 jobs:
@@ -15,6 +16,7 @@ jobs:
    permissions:
      actions: write
      contents: read
+      statuses: write
    steps:
    - name: Checkout
      uses: actions/checkout@v4
@@ -28,7 +30,9 @@ jobs:
        username: ${{ secrets.GITLAB_USERNAME }}
        ciskip: true
    - name: Trigger EICweb
+      id: trigger_eicweb
      uses: eic/trigger-gitlab-ci@v3
+      if: ${{ github.event_name != 'delete' }}
      with:
        url: https://eicweb.phy.anl.gov
        project_id: 399
@@ -39,3 +43,17 @@ jobs:
          GITHUB_SHA=${{ github.event.pull_request.head.sha || github.sha }}
          GITHUB_PR=${{ github.event.pull_request.number }}
          PIPELINE_NAME=${{ github.repository }}: ${{ github.event.pull_request.title || github.ref_name }}
+    - name: Set pending EICweb status
+      if: ${{ github.event_name != 'delete' }}
+      env:
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        DETECTOR_CONFIG: epic_craterlake
+      run: |
+        curl \
+          --fail-with-body \
+          -X POST \
+          -H "Accept: application/vnd.github+json" \
+          -H "Authorization: Bearer $GITHUB_TOKEN" \
+          -H "X-GitHub-Api-Version: 2022-11-28" \
+          -d '{"context": "eicweb/detector_benchmarks (nightly, '"$DETECTOR_CONFIG"')", "state": "pending", "description": "Waiting for response from the EICweb", "target_url": "${{ fromJson(steps.trigger_eicweb.outputs.json).web_url }}"}' \
+          "https://api.github.com/repos/${{ github.repository }}/statuses/${{ github.event.pull_request.head.sha || github.sha }}"
--- a/.gitlab-ci-local-variables.yml
+++ b/.gitlab-ci-local-variables.yml
 BENCHMARKS_REGISTRY: docker.io/eicweb
-BENCHMARKS_CONTAINER: jug_xl
+BENCHMARKS_CONTAINER: eic_xl
 BENCHMARKS_TAG: nightly
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -3,9 +3,9 @@ image: ${BENCHMARKS_REGISTRY}/${BENCHMARKS_CONTAINER}:${BENCHMARKS_TAG}
 variables:
  DETECTOR: epic
  DETECTOR_CONFIG: epic_craterlake
-  DETECTOR_REPOSITORYURL: 'https://github.com/eic/epic.git'
  GITHUB_SHA: ''
  GITHUB_REPOSITORY: ''
+  SNAKEMAKE_FLAGS: '--cache'

 workflow:
  name: '$PIPELINE_NAME'
@@ -28,12 +28,14 @@ default:
      - .local/bin
      - .local/include
      - .local/share
+      - .snakemake/log
      - results
      - config
      - .env
      - summary.txt
    reports:
      dotenv: .env
+    when: always

 stages:
  - status-pending
@@ -45,7 +47,6 @@ stages:
  - benchmarks
  - collect
  - deploy
-  - trigger
  - status-report

 .status:
@@ -60,8 +61,8 @@ stages:
          "https://api.github.com/repos/${GITHUB_REPOSITORY}/statuses/${GITHUB_SHA}" \
          -d '{"state":"'"${STATE}"'",
               "target_url":"'"${CI_PIPELINE_URL}"'",
-               "description":"'"${DESCRIPTION} $(TZ=America/New_York date)"'",
-               "context":"eicweb/detector_benchmarks ('"$DETECTOR_CONFIG"')"
+               "description":"'"$(TZ=America/New_York date)"'",
+               "context":"eicweb/detector_benchmarks ('"${BENCHMARKS_TAG}"', '"$DETECTOR_CONFIG"')"
              }' ;
      fi

@@ -71,7 +72,6 @@ benchmarks:detector:pending:
  extends: .status
  variables:
    STATE: "pending"
-    DESCRIPTION: "Started..."
  when: always

 common:setup:
@@ -89,13 +89,6 @@ common:setup:
      echo "BENCHMARKS_CONTAINER: ${BENCHMARKS_CONTAINER}"
      echo "BENCHMARKS_REGISTRY: ${BENCHMARKS_REGISTRY}"
    - source setup/bin/env.sh && ./setup/bin/install_common.sh
-
-
-common:detector:
-  stage: initialize
-  needs: ["common:setup"]
-  script:
-    - source .local/bin/env.sh && build_detector.sh
    - mkdir_local_data_link sim_output
    - mkdir -p results
    - mkdir -p config
@@ -103,7 +96,7 @@ common:detector:

 get_data:
  stage: data_init
-  needs: ["common:detector"]
+  needs: ["common:setup"]
  script:
    - source .local/bin/env.sh
    - ln -s "${LOCAL_DATA_PATH}/sim_output" sim_output
@@ -112,9 +105,10 @@ get_data:

 .det_benchmark:
  needs:
-    - ["get_data","common:detector"]
+    - ["get_data","common:setup"]
  before_script:
    - source .local/bin/env.sh
+    - source /opt/detector/epic-main/setup.sh
    - ls -lrtha 
    - ln -s "${LOCAL_DATA_PATH}/sim_output" sim_output
    - ln -s "${LOCAL_DATA_PATH}/datasets/data" data
@@ -122,6 +116,7 @@ get_data:
    - mkdir "${DETECTOR_CONFIG}"
    - ln -s "${LOCAL_DATA_PATH}/sim_output" "${DETECTOR_CONFIG}/sim_output"
    - ln -s "../results" "${DETECTOR_CONFIG}/results"
+    - mkdir -p "$SNAKEMAKE_OUTPUT_CACHE"
    - ls -lrtha
  retry:
    max: 2
@@ -129,26 +124,71 @@ get_data:
      - runner_system_failure

 include: 
-#  - local: 'benchmarks/backgrounds/config.yml'
-#  - local: 'benchmarks/ecal_gaps/config.yml'
-#  - local: 'benchmarks/tracking_detectors/config.yml'
-#  - local: 'benchmarks/barrel_ecal/config.yml'
-#  - local: 'benchmarks/barrel_hcal/config.yml'
-#  - local: 'benchmarks/zdc/config.yml'
-#  - local: 'benchmarks/material_maps/config.yml'
-#  - local: 'benchmarks/material_scan/config.yml'
-#  - local: 'benchmarks/pid/config.yml'
-#  - local: 'benchmarks/timing/config.yml'
-#  - local: 'benchmarks/b0_tracker/config.yml'
+  - local: 'benchmarks/backgrounds/config.yml'
+  - local: 'benchmarks/backwards_ecal/config.yml'
+  - local: 'benchmarks/calo_pid/config.yml'
+  - local: 'benchmarks/ecal_gaps/config.yml'
+  - local: 'benchmarks/tracking_detectors/config.yml'
+  - local: 'benchmarks/tracking_performances/config.yml'
+  - local: 'benchmarks/tracking_performances_dis/config.yml'
+  - local: 'benchmarks/barrel_ecal/config.yml'
+  - local: 'benchmarks/barrel_hcal/config.yml'
+  - local: 'benchmarks/lfhcal/config.yml'
+  - local: 'benchmarks/zdc/config.yml'
+  - local: 'benchmarks/zdc_lyso/config.yml'
+  - local: 'benchmarks/zdc_neutron/config.yml'
+  - local: 'benchmarks/zdc_photon/config.yml'
+  - local: 'benchmarks/zdc_pi0/config.yml'
+  - local: 'benchmarks/material_scan/config.yml'
+  - local: 'benchmarks/pid/config.yml'
+  - local: 'benchmarks/timing/config.yml'
+  - local: 'benchmarks/b0_tracker/config.yml'
+  - local: 'benchmarks/insert_muon/config.yml'
+  - local: 'benchmarks/insert_tau/config.yml'
+  - local: 'benchmarks/zdc_sigma/config.yml'
+  - local: 'benchmarks/zdc_lambda/config.yml'
+  - local: 'benchmarks/insert_neutron/config.yml'
+  - local: 'benchmarks/femc_electron/config.yml'
+  - local: 'benchmarks/femc_photon/config.yml'
+  - local: 'benchmarks/femc_pi0/config.yml'
  - local: 'benchmarks/LOWQ2/analysis/config.yml'
-
 deploy_results:
+  allow_failure: true
  stage: deploy
  needs:
-#    - ["collect_results:zdc","collect_results:barrel_ecal","collect_results:barrel_hcal","collect_results:material_scan"]
+    - "collect_results:backgrounds"
+    - "collect_results:backwards_ecal"
+    - "collect_results:barrel_ecal"
+    - "collect_results:barrel_hcal"
+    - "collect_results:calo_pid"
+    - "collect_results:ecal_gaps"
+    - "collect_results:lfhcal"
+    - "collect_results:material_scan"
+    - "collect_results:pid"
+    - "collect_results:tracking_performance"
+    - "collect_results:tracking_performance_campaigns"
+    - "collect_results:zdc_sigma"
+    - "collect_results:zdc_lambda"
+    - "collect_results:insert_neutron"
+    - "collect_results:tracking_performances_dis"
+    - "collect_results:zdc"
+    - "collect_results:zdc_lyso"
+    - "collect_results:zdc_neutron"
+    - "collect_results:insert_muon"
+    - "collect_results:insert_tau"
+    - "collect_results:zdc_photon"
+    - "collect_results:zdc_pi0"
+    - "collect_results:femc_electron"
+    - "collect_results:femc_photon"
+    - "collect_results:femc_pi0"
  script:
-    - echo "deploy results!"
+    - snakemake $SNAKEMAKE_FLAGS --cores 1 results/metadata.json
    - find results -print | sort | tee summary.txt
+    - wget https://dl.pelicanplatform.org/7.13.0/pelican_Linux_x86_64.tar.gz
+    - sha256sum -c <(echo '38ac8548c67302299e50a1b81c159ed418e90d84a6606ddd377fd2c8b164d114  pelican_Linux_x86_64.tar.gz')
+    - tar zxf pelican_Linux_x86_64.tar.gz
+    - mv results pipeline-$CI_PIPELINE_ID; tar cf pipeline-$CI_PIPELINE_ID.tar pipeline-$CI_PIPELINE_ID/; mv pipeline-$CI_PIPELINE_ID results
+    - ./pelican-*/pelican object copy pipeline-$CI_PIPELINE_ID.tar $OSDF_ENDPOINT$OSDF_OUTPUT_PREFIX/pipeline-$CI_PIPELINE_ID.tar

 benchmarks:detector:success:
  stage: status-report
@@ -156,7 +196,9 @@ benchmarks:detector:success:
  extends: .status
  variables:
    STATE: "success"
-    DESCRIPTION: "Succeeded!"
+  after_script:
+    # Cleanup scratch space
+    - rm -rfv $LOCAL_DATA_PATH
  when: on_success

 benchmarks:detector:failure:
@@ -165,51 +207,8 @@ benchmarks:detector:failure:
  extends: .status
  variables:
    STATE: "failure"
-    DESCRIPTION: "Failed!"
  when: on_failure

-benchmarks:reconstruction:
-  stage: trigger
-  variables:
-    GITHUB_SHA: "${GITHUB_SHA}"
-    GITHUB_REPOSITORY: "${GITHUB_REPOSITORY}"
-    DETECTOR: "$DETECTOR"
-    DETECTOR_CONFIG: "$DETECTOR_CONFIG"
-    DETECTOR_VERSION: "$DETECTOR_VERSION"
-    DETECTOR_REPOSITORYURL: "$DETECTOR_REPOSITORYURL"
-    DETECTOR_DEPLOY_TOKEN_USERNAME: "${DETECTOR_DEPLOY_TOKEN_USERNAME}"
-    DETECTOR_DEPLOY_TOKEN_PASSWORD: "${DETECTOR_DEPLOY_TOKEN_PASSWORD}"
-    COMMON_BENCH_VERSION: "$COMMON_BENCH_VERSION"
-    BENCHMARKS_TAG: "${BENCHMARKS_TAG}"
-    BENCHMARKS_CONTAINER: "${BENCHMARKS_CONTAINER}"
-    BENCHMARKS_REGISTRY: "${BENCHMARKS_REGISTRY}"
-    PIPELINE_NAME: "$PIPELINE_NAME"
-  trigger:
-    project: EIC/benchmarks/reconstruction_benchmarks
-    strategy: depend
-  needs: ["deploy_results"]
-
-benchmarks:physics:
-  stage: trigger
-  variables:
-    GITHUB_SHA: "${GITHUB_SHA}"
-    GITHUB_REPOSITORY: "${GITHUB_REPOSITORY}"
-    DETECTOR: "$DETECTOR"
-    DETECTOR_CONFIG: "$DETECTOR_CONFIG"
-    DETECTOR_VERSION: "$DETECTOR_VERSION"
-    DETECTOR_REPOSITORYURL: "$DETECTOR_REPOSITORYURL"
-    DETECTOR_DEPLOY_TOKEN_USERNAME: "${DETECTOR_DEPLOY_TOKEN_USERNAME}"
-    DETECTOR_DEPLOY_TOKEN_PASSWORD: "${DETECTOR_DEPLOY_TOKEN_PASSWORD}"
-    COMMON_BENCH_VERSION: "$COMMON_BENCH_VERSION"
-    BENCHMARKS_TAG: "${BENCHMARKS_TAG}"
-    BENCHMARKS_CONTAINER: "${BENCHMARKS_CONTAINER}"
-    BENCHMARKS_REGISTRY: "${BENCHMARKS_REGISTRY}"
-    PIPELINE_NAME: "$PIPELINE_NAME"
-  trigger:
-    project: EIC/benchmarks/physics_benchmarks
-    strategy: depend
-  needs: ["deploy_results"]
-
 pages:
  stage: deploy
  rules:

--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
+    hooks:
+    -   id: check-added-large-files
+        args: ['--maxkb=128']
+    -   id: check-yaml
--- a/README.md
+++ b/README.md
-EIC Detector Benchmarks
-=======================
+ePIC Detector Benchmarks
+========================

 [![pipeline status](https://eicweb.phy.anl.gov/EIC/benchmarks/detector_benchmarks/badges/master/pipeline.svg)](https://eicweb.phy.anl.gov/EIC/benchmarks/detector_benchmarks/-/commits/master)

 ## Overview

-Detector benchmarks are meant to test for regressions in individual detector subsystems.
-The analysis is meant to avoid a reconstruction step. 
-So this precludes using [juggler](https://eicweb.phy.anl.gov/EIC/juggler) for processing the events.
+Detector benchmarks are meant to provide a maintained set of performance plots for individual detector subsystems.

 ## Documentation

-See [common_bench](https://eicweb.phy.anl.gov/EIC/benchmarks/common_bench/).
+ - See [tutorial](https://eic.github.io/tutorial-developing-benchmarks/)
+ - See [common_bench](https://eicweb.phy.anl.gov/EIC/benchmarks/common_bench/).

 ## Adding new benchmarks

 To get an idea of what to do look at an existing benchmark in the 
-[`benchmarks` directory](https://eicweb.phy.anl.gov/EIC/benchmarks/detector_benchmarks/-/tree/master/benchmarks).
-
-## Running Locally
-
-### Local development example
-
-Here we setup to use our local build of the `juggler` library.
-Note juggler is not needed for `detector_benchmarks` because it is not used but this is the same setup for 
-`reconstruction_benchmarks` and `physics_benchmarks`.
-
-First set some environment variables.
-```
-export DETECTOR=epic             # epic is the default
-```
-
-
-```
-git clone https://eicweb.phy.anl.gov/EIC/benchmarks/detector_benchmarks.git && cd detector_benchmarks
-git clone https://eicweb.phy.anl.gov/EIC/benchmarks/common_bench.git setup
-source setup/bin/env.sh && ./setup/bin/install_common.sh
-source .local/bin/env.sh && build_detector.sh
-mkdir_local_data_link sim_output
-mkdir -p results
-mkdir -p config
-```
-
+[`benchmarks` directory](https://github.com/eic/detector_benchmarks/tree/master/benchmarks).
+Currently a good reference for Snakemake instrumentation is available in the `tracking\_performances` benchmark.
+It relies on single particle simulations that can be either produced on eicweb or downloaded from official campagins.

+### File organization

+For a minimal benchmark you'll need to add
+`benchmarks/<benchmark_name_here>/config.yml` and
+`benchmarks/<benchmark_name_here>/Snakemake`, plus the analysis script/macro.
+The `Snakefile` has to be included in the root `./Snakefile` of the repository.
+That common entry point is needed to ensure that common simulation samples can
+be defined to be re-used by several benchmarks at a time.
+The `config.yml` will require an include from the `./.gitlab-ci.yml`.

 ### Pass/Fail tests

 - Create a script that returns exit status 0 for success.
 - Any non-zero value will be considered failure.
- Script  
-
--- a/Snakefile
+++ b/Snakefile
-configfile: "config.yaml"
-
-if config["remote"] == "S3":
-    from snakemake.remote.S3 import RemoteProvider as S3RemoteProvider
-    provider = S3RemoteProvider(
-        endpoint_url="https://eics3.sdcc.bnl.gov:9000",
-        access_key_id=os.environ["S3_ACCESS_KEY"],
-        secret_access_key=os.environ["S3_SECRET_KEY"],
-    )
-    remote_path = lambda path: f"eictest/{path}"
-elif config["remote"] == "XRootD":
-    from snakemake.remote.XRootD import RemoteProvider as XRootDRemoteProvider
-    provider = XRootDRemoteProvider(
-        stay_on_remote=False,
-    )
-    remote_path = lambda path: f"root://dtn-eic.jlab.org//work/eic2/{path}"
-else:
-    raise ValueError(f"Unexpected config[\"remote\"] = {config['remote']}")
+configfile: "snakemake.yml"
+
+import functools
+import os
+from snakemake.logging import logger
+
+
+@functools.cache
+def get_spack_package_hash(package_name):
+    import json
+    try:
+        ver_info = json.loads(subprocess.check_output(["spack", "find", "--json", package_name]))
+        return ver_info[0]["package_hash"]
+    except FileNotFoundError as e:
+        logger.warning("Spack is not installed")
+        return ""
+    except subprocess.CalledProcessError as e:
+        print(e)
+        return ""
+
+
+@functools.cache
+def find_epic_libraries():
+    import ctypes.util
+    # if library is not found (not avaliable) we return an empty list to let DAG still evaluate
+    libs = []
+    lib = ctypes.util.find_library("epic")
+    if lib is not None:
+        libs.append(os.environ["DETECTOR_PATH"] + "/../../lib/" + lib)
+    return libs
+

 include: "benchmarks/backgrounds/Snakefile"
+include: "benchmarks/backwards_ecal/Snakefile"
 include: "benchmarks/barrel_ecal/Snakefile"
+include: "benchmarks/calo_pid/Snakefile"
 include: "benchmarks/ecal_gaps/Snakefile"
 include: "benchmarks/material_scan/Snakefile"
+include: "benchmarks/tracking_performances/Snakefile"
+include: "benchmarks/tracking_performances_dis/Snakefile"
+include: "benchmarks/lfhcal/Snakefile"
+include: "benchmarks/zdc_lyso/Snakefile"
+include: "benchmarks/zdc_neutron/Snakefile"
+include: "benchmarks/insert_muon/Snakefile"
+include: "benchmarks/zdc_lambda/Snakefile"
+include: "benchmarks/zdc_photon/Snakefile"
+include: "benchmarks/zdc_pi0/Snakefile"
+include: "benchmarks/zdc_sigma/Snakefile"
+include: "benchmarks/insert_neutron/Snakefile"
+include: "benchmarks/insert_tau/Snakefile"
+include: "benchmarks/femc_electron/Snakefile"
+include: "benchmarks/femc_photon/Snakefile"
+include: "benchmarks/femc_pi0/Snakefile"
+
+use_s3 = config["remote_provider"].lower() == "s3"
+use_xrootd = config["remote_provider"].lower() == "xrootd"
+
+
+def get_remote_path(path):
+    if use_s3:
+        return f"s3https://eics3.sdcc.bnl.gov:9000/eictest/{path}"
+    elif use_xrootd:
+        return f"root://dtn-eic.jlab.org//volatile/eic/{path}"
+    else:
+        raise runtime_exception('Unexpected value for config["remote_provider"]: {config["remote_provider"]}')
+
+
+rule fetch_epic:
+    output:
+        filepath="EPIC/{PATH}"
+    params:
+        # wildcards are not included in hash for caching, we need to add them as params
+        PATH=lambda wildcards: wildcards.PATH
+    cache: True
+    retries: 3
+    shell: """
+xrdcp --debug 2 root://dtn-eic.jlab.org//volatile/eic/{output.filepath} {output.filepath}
+""" if use_xrootd else """
+mc cp S3/eictest/{output.filepath} {output.filepath}
+""" if use_s3 else f"""
+echo 'Unexpected value for config["remote_provider"]: {config["remote_provider"]}'
+exit 1
+"""


 rule warmup_run:
@@ -28,7 +88,8 @@ rule warmup_run:
        "warmup/{DETECTOR_CONFIG}.edm4hep.root",
    message: "Ensuring that calibrations/fieldmaps are available for {wildcards.DETECTOR_CONFIG}"
    shell: """
-ddsim \
+set -m # monitor mode to prevent lingering processes
+exec ddsim \
  --runType batch \
  --numberOfEvents 1 \
  --compactFile "$DETECTOR_PATH/{wildcards.DETECTOR_CONFIG}.xml" \
@@ -57,3 +118,26 @@ rule org2py:
        """
 awk -f {input.converter} {input.notebook} > {output}
 """
+
+
+rule metadata:
+    output:
+        "results/metadata.json"
+    shell:
+        """
+cat > {output} <<EOF
+{{
+  "CI_COMMIT_REF_NAME": "${{CI_COMMIT_REF_NAME:-}}",
+  "CI_COMMIT_SHA": "${{CI_COMMIT_SHA:-}}",
+  "CI_PIPELINE_ID": "${{CI_PIPELINE_ID:-}}",
+  "CI_PIPELINE_SOURCE": "${{CI_PIPELINE_SOURCE:-}}",
+  "CI_PROJECT_ID": "${{CI_PROJECT_ID:-}}",
+  "GITHUB_REPOSITORY": "${{GITHUB_REPOSITORY:-}}",
+  "GITHUB_SHA": "${{GITHUB_SHA:-}}",
+  "GITHUB_PR": "${{GITHUB_PR:-}}",
+  "PIPELINE_NAME": "${{PIPELINE_NAME:-}}"
+}}
+EOF
+# validate JSON
+jq '.' {output}
+"""
--- a/benchmarks/backgrounds/Snakefile
+++ b/benchmarks/backgrounds/Snakefile
@@ -2,56 +2,27 @@ import os
 import shutil


-rule backgrounds_get_beam_gas_electron:
-    input:
-        provider.remote(remote_path("EPIC/EVGEN/BACKGROUNDS/BEAMGAS/electron/beam_gas_ep_10GeV_foam_emin10keV_10Mevt_vtx.hepmc")),
-    output:
-        "input/backgrounds/beam_gas_electron.hepmc",
-    run:
-        shutil.move(input[0], output[0])
-
-
-rule backgrounds_get_beam_gas_proton:
-    input:
-        provider.remote(remote_path("EPIC/EVGEN/BACKGROUNDS/BEAMGAS/proton/ProtonBeamGasEvents/100GeV/100GeV_1.hepmc")),
-    output:
-        "input/backgrounds/beam_gas_proton.hepmc",
-    run:
-        shutil.move(input[0], output[0])
-
-
-rule backgrounds_get_DIS:
-    input:
-        provider.remote(remote_path("EPIC/EVGEN/DIS/NC/{BEAM}/minQ2={MINQ2}/pythia8NCDIS_{BEAM}_minQ2={MINQ2}_{SUFFIX}.hepmc")),
-    wildcard_constraints:
-        BEAM="\d+x\d+",
-        MINQ2="\d+",
-    output:
-        "input/backgrounds/pythia8NCDIS_{BEAM}_minQ2={MINQ2}_{SUFFIX}.hepmc",
-    run:
-        shutil.move(input[0], output[0])
-
-
 rule backgrounds_sim:
    input:
-        hepmc="input/backgrounds/{NAME}.hepmc",
        warmup="warmup/{DETECTOR_CONFIG}.edm4hep.root",
    output:
-        "sim_output/{DETECTOR_CONFIG}/backgrounds/{NAME}.edm4hep.root",
+        "sim_output/{DETECTOR_CONFIG}/backgrounds/{PATH}.edm4hep.root",
    log:
-        "sim_output/{DETECTOR_CONFIG}/backgrounds/{NAME}.edm4hep.root.log",
+        "sim_output/{DETECTOR_CONFIG}/backgrounds/{PATH}.edm4hep.root.log",
    params:
-        N_EVENTS=100
+        N_EVENTS=100,
+        hepmc=lambda wildcards: get_remote_path(f"{wildcards.PATH}.hepmc3.tree.root"),
    shell:
        """
-ddsim \
+set -m # monitor mode to prevent lingering processes
+exec ddsim \
  --runType batch \
  --part.minimalKineticEnergy 100*GeV  \
  --filter.tracker edep0 \
  -v WARNING \
  --numberOfEvents {params.N_EVENTS} \
  --compactFile $DETECTOR_PATH/{wildcards.DETECTOR_CONFIG}.xml \
-  --inputFiles {input.hepmc} \
+  --inputFiles {params.hepmc} \
  --outputFile {output}
 """

@@ -62,30 +33,37 @@ rule backgrounds_ecal_backwards:
    input:
        matplotlibrc=".matplotlibrc",
        script="benchmarks/backgrounds/ecal_backwards.py",
-        electron_beam_gas_gen="input/backgrounds/beam_gas_electron.hepmc",
-        electron_beam_gas_sim="sim_output/" + DETECTOR_CONFIG + "/backgrounds/beam_gas_electron.edm4hep.root",
-        physics_signal_sim="sim_output/" + DETECTOR_CONFIG + "/backgrounds/pythia8NCDIS_10x100_minQ2=1_beamEffects_xAngle=-0.025_hiDiv_1.edm4hep.root",
-        proton_beam_gas_gen="input/backgrounds/beam_gas_proton.hepmc",
-        proton_beam_gas_sim="sim_output/" + DETECTOR_CONFIG + "/backgrounds/beam_gas_proton.edm4hep.root",
+        electron_beam_gas_sim="sim_output/" + DETECTOR_CONFIG + "/backgrounds/EPIC/EVGEN/BACKGROUNDS/BEAMGAS/electron/GETaLM1.0.0-1.0/10GeV/GETaLM1.0.0-1.0_ElectronBeamGas_10GeV_foam_emin10keV_run001.edm4hep.root",
+        physics_signal_sim="sim_output/" + DETECTOR_CONFIG + "/backgrounds/EPIC/EVGEN/DIS/NC/10x100/minQ2=1/pythia8NCDIS_10x100_minQ2=1_beamEffects_xAngle=-0.025_hiDiv_1.edm4hep.root",
+        proton_beam_gas_sim="sim_output/" + DETECTOR_CONFIG + "/backgrounds/EPIC/EVGEN/BACKGROUNDS/BEAMGAS/proton/pythia8.306-1.0/100GeV/pythia8.306-1.0_ProtonBeamGas_100GeV_run001.edm4hep.root",
    output:
        directory("results/backgrounds/backwards_ecal")
+    log:
+        scheduler=".logs/results/backgrounds/backwards_ecal/scheduler.log",
+        worker=".logs/results/backgrounds/backwards_ecal/worker.log",
    threads: workflow.cores
    shell:
        """
+set -m # monitor mode to prevent lingering processes
+cleanup() {{
+  echo Cleaning up
+  kill $WORKER_PID $SCHEDULER_PID
+}}
+trap cleanup EXIT
+
 PORT=$RANDOM
-dask scheduler --port $PORT &
+dask scheduler --port $PORT 2>{log.scheduler} &
 export DASK_SCHEDULER=localhost:$PORT
 SCHEDULER_PID=$!
-dask worker tcp://$DASK_SCHEDULER --nworkers {threads} --nthreads 1 &
+dask worker tcp://$DASK_SCHEDULER --nworkers {threads} --nthreads 1 2>{log.worker} &
 WORKER_PID=$!
 env \
 MATPLOTLIBRC={input.matplotlibrc} \
-ELECTRON_BEAM_GAS_GEN=$(realpath {input.electron_beam_gas_gen}) \
+ELECTRON_BEAM_GAS_GEN=root://dtn-eic.jlab.org//volatile/eic/EPIC/EVGEN/BACKGROUNDS/BEAMGAS/electron/GETaLM1.0.0-1.0/10GeV/GETaLM1.0.0-1.0_ElectronBeamGas_10GeV_foam_emin10keV_run001.hepmc3.tree.root \
 ELECTRON_BEAM_GAS_SIM=$(realpath {input.electron_beam_gas_sim}) \
 PHYSICS_PROCESS_SIM=$(realpath {input.physics_signal_sim}) \
-PROTON_BEAM_GAS_GEN=$(realpath {input.proton_beam_gas_gen}) \
+PROTON_BEAM_GAS_GEN=root://dtn-eic.jlab.org//volatile/eic/EPIC/EVGEN/BACKGROUNDS/BEAMGAS/proton/pythia8.306-1.0/100GeV/pythia8.306-1.0_ProtonBeamGas_100GeV_run001.hepmc3.tree.root \
 PROTON_BEAM_GAS_SIM=$(realpath {input.proton_beam_gas_sim}) \
 OUTPUT_DIR={output} \
 python {input.script}
-kill $WORKER_PID $SCHEDULER_PID
 """
--- a/benchmarks/backgrounds/config.yml
+++ b/benchmarks/backgrounds/config.yml
@@ -4,7 +4,11 @@ sim:backgrounds:
  script:
    - mkdir -p $LOCAL_DATA_PATH/input
    - ln -s $LOCAL_DATA_PATH/input input
-    - snakemake --cores 2 sim_output/$DETECTOR_CONFIG/beam_gas_{electron,proton}.edm4hep.root sim_output/$DETECTOR_CONFIG/pythia8NCDIS_10x100_minQ2=1_beamEffects_xAngle=-0.025_hiDiv_vtxfix_1.edm4hep.root
+    - |
+      snakemake $SNAKEMAKE_FLAGS --cores 2 \
+        sim_output/$DETECTOR_CONFIG/backgrounds/EPIC/EVGEN/BACKGROUNDS/BEAMGAS/electron/GETaLM1.0.0-1.0/10GeV/GETaLM1.0.0-1.0_ElectronBeamGas_10GeV_foam_emin10keV_run001.edm4hep.root \
+        sim_output/$DETECTOR_CONFIG/backgrounds/EPIC/EVGEN/DIS/NC/10x100/minQ2=1/pythia8NCDIS_10x100_minQ2=1_beamEffects_xAngle=-0.025_hiDiv_1.edm4hep.root \
+        sim_output/$DETECTOR_CONFIG/backgrounds/EPIC/EVGEN/BACKGROUNDS/BEAMGAS/proton/pythia8.306-1.0/100GeV/pythia8.306-1.0_ProtonBeamGas_100GeV_run001.edm4hep.root

 bench:backgrounds_emcal_backwards:
  extends: .det_benchmark
@@ -15,7 +19,7 @@ bench:backgrounds_emcal_backwards:
    - ln -s $LOCAL_DATA_PATH/input input
    - export PYTHONUSERBASE=$LOCAL_DATA_PATH/deps
    - pip install -r benchmarks/backgrounds/requirements.txt
-    - snakemake --cores 8 backgrounds_ecal_backwards
+    - snakemake $SNAKEMAKE_FLAGS --cores 8 backgrounds_ecal_backwards

 collect_results:backgrounds:
  extends: .det_benchmark
@@ -24,3 +28,6 @@ collect_results:backgrounds:
    - "bench:backgrounds_emcal_backwards"
  script:
    - ls -lrht
+    - mv results{,_save}/ # move results directory out of the way to preserve it
+    - snakemake $SNAKEMAKE_FLAGS --cores 1 --delete-all-output backgrounds_ecal_backwards
+    - mv results{_save,}/
--- a/benchmarks/backgrounds/ecal_backwards.org
+++ b/benchmarks/backgrounds/ecal_backwards.org
@@ -15,7 +15,7 @@ import dask_awkward as dak
 import dask_histogram as dh
 import numpy as np
 import uproot
-import pyhepmc
+from pyHepMC3 import HepMC3
 #+end_src

 #+begin_src jupyter-python :results silent
@@ -42,6 +42,8 @@ def setup_presentation_style():
        'legend.loc': 'upper right',
        'xtick.labelsize': 8,
        'ytick.labelsize': 8,
+        'xaxis.labellocation': 'right',
+        'yaxis.labellocation': 'top',
        'pgf.rcfonts': False,
    })

@@ -66,21 +68,21 @@ output_dir.mkdir(parents=True, exist_ok=True)
 #+begin_src jupyter-python :results silent
 builder = ak.ArrayBuilder()
 n = 0
-with pyhepmc.open(PROTON_BEAM_GAS_GEN) as f:
-    it = iter(f)
-    for event in f:
+f = HepMC3.ReaderPlugin(PROTON_BEAM_GAS_GEN, "libHepMC3rootIO.so", "newReaderRootTreefile")
+event = HepMC3.GenEvent()
+while f.read_event(event):
    builder.begin_list()
-        for vertex in event.vertices:
-            assert event.length_unit.name == "MM"
+    assert event.length_unit().name == "MM"
+    for vertex in event.vertices():
        with builder.record("Vector4D"):
            builder.field("x")
-                builder.real(vertex.position.x)
+            builder.real(vertex.position().x())
            builder.field("y")
-                builder.real(vertex.position.y)
+            builder.real(vertex.position().y())
            builder.field("z")
-                builder.real(vertex.position.z)
+            builder.real(vertex.position().z())
            builder.field("t")
-                builder.real(vertex.position.t)
+            builder.real(vertex.position().t())
    builder.end_list()
    n += 1
    if n > 10000: break
@@ -89,21 +91,21 @@ vertices_proton_beam_gas = builder.snapshot()

 builder = ak.ArrayBuilder()
 n = 0
-with pyhepmc.open(ELECTRON_BEAM_GAS_GEN) as f:
-    it = iter(f)
-    for event in f:
+f = HepMC3.ReaderPlugin(ELECTRON_BEAM_GAS_GEN, "libHepMC3rootIO.so", "newReaderRootTreefile")
+event = HepMC3.GenEvent()
+while f.read_event(event):
    builder.begin_list()
-        for vertex in event.vertices:
-            assert event.length_unit.name == "MM"
-            with builder.record("Vector3D"):
+    assert event.length_unit().name == "MM"
+    for vertex in event.vertices():
+        with builder.record("Vector4D"):
            builder.field("x")
-                builder.real(vertex.position.x)
+            builder.real(vertex.position().x())
            builder.field("y")
-                builder.real(vertex.position.y)
+            builder.real(vertex.position().y())
            builder.field("z")
-                builder.real(vertex.position.z)
+            builder.real(vertex.position().z())
            builder.field("t")
-                builder.real(vertex.position.t)
+            builder.real(vertex.position().t())
    builder.end_list()
    n += 1
    if n > 10000: break
@@ -153,6 +155,7 @@ plt.xlabel("vertex[0].t, mm")
 plt.legend()
 plt.savefig(output_dir / "vertex_time_distribution.png", bbox_inches="tight")
 plt.show()
+plt.clf()

 for label, ds in datasets.items():
    if "vertices" not in ds: continue
@@ -164,6 +167,7 @@ plt.xlabel("vertex[0].z, mm")
 plt.legend()
 plt.savefig(output_dir / "vertex_z_distribution.png", bbox_inches="tight")
 plt.show()
+plt.clf()

 for label, ds in datasets.items():
    if "vertices" not in ds: continue
@@ -178,6 +182,7 @@ plt.ylabel("vertex[0].x, mm")
 plt.legend()
 plt.savefig(output_dir / "vertex_xz_distribution.png", bbox_inches="tight")
 plt.show()
+plt.clf()

 for ix, (label, ds) in enumerate(datasets.items()):
    if "vertices" not in ds: continue
@@ -192,6 +197,7 @@ for ix, (label, ds) in enumerate(datasets.items()):
    plt.title(label)
    plt.savefig(output_dir / f"vertex_yz_distribution_{ix}.png", bbox_inches="tight")
    plt.show()
+    plt.clf()
 #+end_src

 ** Simulation results
@@ -210,7 +216,8 @@ for collection_name in ["EcalEndcapNHits", "EcalEndcapPHits"]:
        for ix, vals in enumerate([xs, ys]):
            centers = np.unique(vals)
            diffs = centers[1:] - centers[:-1]
-            bin_widths[ix] = np.min(diffs[diffs > 0]) if np.sum(diffs > 0) > 0 else 1.
+            EPSILON = 1e-5
+            bin_widths[ix] = np.min(diffs[diffs > EPSILON]) if np.sum(diffs > EPSILON) > 0 else 1.
            print(f"bin_widths[{ix}]", bin_widths[ix])

        bins = {
@@ -227,10 +234,11 @@ for collection_name in ["EcalEndcapNHits", "EcalEndcapPHits"]:
        )
        plt.colorbar().set_label("energy, GeV", loc="top")
        plt.title(f"{label}, event_id={event_id}\n{collection_name}")
-        plt.xlabel("hit x, mm", loc="right")
-        plt.ylabel("hit y, mm", loc="top")
+        plt.xlabel("hit x, mm")
+        plt.ylabel("hit y, mm")
        plt.savefig(output_dir / f"{collection_name}_event_display_{dataset_ix}.png", bbox_inches="tight")
        plt.show()
+        plt.clf()
 #+end_src

 ** Discovering number of cells
@@ -280,11 +288,12 @@ for collection_name in ["EcalEndcapNHits", "EcalEndcapPHits"]:

    plt.title(f"{collection_name}")
    plt.legend()
-    plt.xlabel("r, mm", loc="right")
+    plt.xlabel("r, mm")
    dr = (r_axis.edges[1] - r_axis.edges[0])
-    plt.ylabel(f"Number of towers per {dr} mm slice in $r$", loc="top")
+    plt.ylabel(f"Number of towers per {dr} mm slice in $r$")
    plt.savefig(output_dir / f"{collection_name}_num_towers.png", bbox_inches="tight")
    plt.show()
+    plt.clf()
 #+end_src

 ** Plotting the rates
@@ -319,11 +328,12 @@ for collection_name in ["EcalEndcapNHits", "EcalEndcapPHits"]:

        plt.title(f"for $E_{{dep.}} >$ {edep_min * 1000} MeV\n{collection_name}")
        plt.legend()
-        plt.xlabel("r, mm", loc="right")
-        plt.ylabel("rate per tower, Hz", loc="top")
+        plt.xlabel("r, mm")
+        plt.ylabel("rate per tower, Hz")
        plt.yscale("log")
        plt.savefig(output_dir / f"{collection_name}_hit_rate_vs_r_edep_min_{edep_min:.3f}.png", bbox_inches="tight")
        plt.show()
+        plt.clf()
 #+end_src

 #+begin_src jupyter-python
@@ -349,11 +359,12 @@ for collection_name in ["EcalEndcapNHits", "EcalEndcapPHits"]:

        plt.title(rf"for events with $E_{{\mathrm{{dep. tot.}}}}$ $>$ {totedep_min} GeV" + f"\n{collection_name}")
        plt.legend()
-        plt.xlabel("$z$ of the first interaction vertex, mm", loc="right")
-        plt.ylabel("rate, Hz", loc="top")
+        plt.xlabel("$z$ of the first interaction vertex, mm")
+        plt.ylabel("rate, Hz")
        plt.yscale("log")
        plt.savefig(output_dir / f"{collection_name}_hit_rate_vs_z_totedep_min_{totedep_min:.1f}.png", bbox_inches="tight")
        plt.show()
+        plt.clf()
 #+end_src

 #+begin_src jupyter-python
@@ -429,7 +440,7 @@ for edep_min in [0]: # GeV
        plt.sca(ax_cur)
        plt.legend()
        plt.title(f"for $E_{{dep.}} >$ {edep_min * 1000} MeV")
-        plt.ylabel("rate, Hz", loc="top")
+        plt.ylabel("rate, Hz")
        plt.yscale("log")
        plt.xticks(rotation=90, ha='right')
        fig_cur.savefig(f"rates_edep_min_{edep_min}_{dataset_ix}.png", bbox_inches="tight")
@@ -439,9 +450,10 @@ for edep_min in [0]: # GeV
    plt.sca(ax_cmb)
    plt.legend()
    plt.title(f"for $E_{{dep.}} >$ {edep_min * 1000} MeV")
-    plt.ylabel("rate per tower, Hz", loc="top")
+    plt.ylabel("rate per tower, Hz")
    plt.yscale("log")
    plt.xticks(rotation=90, ha='right')
    fig_cmb.savefig(f"rates_edep_min_{edep_min}.png", bbox_inches="tight")
    plt.show()
+    plt.clf()
 #+end_src
--- a/benchmarks/backgrounds/requirements.txt
+++ b/benchmarks/backgrounds/requirements.txt
-awkward >= 2.4.0
-dask >= 2023
-dask_awkward
-dask_histogram
-distributed >= 2023
+awkward ~= 2.7.2
+dask ~= 2024.12.1
+dask_awkward ~= 2024.12.2
+dask_histogram ~= 2024.12.1
+distributed ~= 2024.12.1
 pyhepmc
-uproot >= 5.2.0
+uproot ~= 5.5.1
--- a/benchmarks/backwards_ecal/Snakefile
+++ b/benchmarks/backwards_ecal/Snakefile
+def get_n_events(wildcards):
+    energy = float(wildcards.ENERGY.replace("GeV", "").replace("MeV", "e-3"))
+    n_events = 1000 if wildcards.PARTICLE == "e-" else 2000
+    n_events = int(n_events // (energy ** 0.5))
+    return n_events
+
+
+rule backwards_ecal_sim:
+    input:
+        steering_file=ancient("EPIC/EVGEN/SINGLE/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.steer"),
+        warmup="warmup/{DETECTOR_CONFIG}.edm4hep.root",
+        geometry_lib=find_epic_libraries(),
+    output:
+        "sim_output/backwards_ecal/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root",
+    log:
+        "sim_output/backwards_ecal/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root.log",
+    wildcard_constraints:
+        PARTICLE="(e-|pi-)",
+        ENERGY="[0-9]+[kMG]eV",
+        PHASE_SPACE="(3to50|45to135|130to177)deg",
+        INDEX=r"\d{4}",
+    params:
+        N_EVENTS=get_n_events,
+        SEED=lambda wildcards: "1" + wildcards.INDEX,
+        DETECTOR_PATH=os.environ["DETECTOR_PATH"],
+        DETECTOR_CONFIG=lambda wildcards: wildcards.DETECTOR_CONFIG,
+        DD4HEP_HASH=get_spack_package_hash("dd4hep"),
+        NPSIM_HASH=get_spack_package_hash("npsim"),
+    cache: True
+    shell:
+        """
+set -m # monitor mode to prevent lingering processes
+exec ddsim \
+  --runType batch \
+  --enableGun \
+  --steeringFile "{input.steering_file}" \
+  --random.seed {params.SEED} \
+  --filter.tracker edep0 \
+  -v WARNING \
+  --numberOfEvents {params.N_EVENTS} \
+  --compactFile {params.DETECTOR_PATH}/{params.DETECTOR_CONFIG}.xml \
+  --outputFile {output}
+"""
+
+
+rule backwards_ecal_recon:
+    input:
+        "sim_output/backwards_ecal/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root",
+    output:
+        "sim_output/backwards_ecal/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.eicrecon.tree.edm4eic.root",
+    log:
+        "sim_output/backwards_ecal/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.eicrecon.tree.edm4eic.root.log",
+    wildcard_constraints:
+        INDEX=r"\d{4}",
+    params:
+        DETECTOR_CONFIG=lambda wildcards: wildcards.DETECTOR_CONFIG,
+        EICRECON_HASH=get_spack_package_hash("eicrecon"),
+    cache: True
+    shell: """
+set -m # monitor mode to prevent lingering processes
+exec env DETECTOR_CONFIG={params.DETECTOR_CONFIG} \
+  eicrecon {input} -Ppodio:output_file={output} \
+  -Ppodio:output_collections=MCParticles,EcalEndcapNRecHits,EcalEndcapNClusters
+"""
+
+
+rule backwards_ecal_local_sim_list:
+    input:
+        expand(
+            "sim_output/backwards_ecal/{{DETECTOR_CONFIG}}/{{PARTICLE}}/{{ENERGY}}/{{PHASE_SPACE}}/{{PARTICLE}}_{{ENERGY}}_{{PHASE_SPACE}}.{INDEX:04d}.eicrecon.tree.edm4eic.root",
+            INDEX=range(20),
+        ),
+    output:
+        "listing/backwards_ecal/local/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}.lst",
+    run:
+        with open(output[0], "wt") as fp:
+            fp.write("\n".join(input))
+
+
+if config.get("stream_from_xrootd", True) not in [False, "", "0", "false"]:
+    rule backwards_ecal_campaign_sim_list:
+        output:
+            "listing/backwards_ecal/{CAMPAIGN}/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}.lst",
+        params:
+            search_path=lambda wildcards: f"EPIC/RECO/{wildcards.CAMPAIGN}/epic_craterlake/SINGLE/{wildcards.PARTICLE}/{wildcards.ENERGY}/{wildcards.PHASE_SPACE}/",
+        shell: """
+    xrdfs root://dtn-eic.jlab.org/ ls /volatile/eic/{params.search_path} \
+      | awk '{{ print "root://dtn-eic.jlab.org/"$1; }}' \
+      | sort \
+      > {output}
+    if [ ! -s {output} ]; then
+      echo "Got an empty file listing for path \"\""
+      exit 1
+    fi
+    """
+else:
+    checkpoint backwards_ecal_campaign_sim_list_checkpoint:
+        output:
+            "listing/backwards_ecal/{CAMPAIGN}/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}.lst.orig",
+        params:
+            search_path=lambda wildcards: f"EPIC/RECO/{wildcards.CAMPAIGN}/epic_craterlake/SINGLE/{wildcards.PARTICLE}/{wildcards.ENERGY}/{wildcards.PHASE_SPACE}/",
+        shell: """
+    xrdfs root://dtn-eic.jlab.org/ ls /volatile/eic/{params.search_path} \
+      | sed -e 's#^/volatile/eic/##' \
+      | sort \
+      > {output}
+    if [ ! -s {output} ]; then
+      echo "Got an empty file listing for path \"\""
+      exit 1
+    fi
+    """
+
+    def get_backwards_ecal_campaign_sim_list(wildcards):
+        with checkpoints.backwards_ecal_campaign_sim_list_checkpoint.get(**wildcards).output[0].open() as fp:
+            return [line.rstrip() for line in fp.readlines()]
+
+    rule backwards_ecal_campaign_sim_list:
+        input:
+            # depend on paths from the file list
+            get_backwards_ecal_campaign_sim_list,
+            orig_list="listing/backwards_ecal/{CAMPAIGN}/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}.lst.orig",
+        output:
+            "listing/backwards_ecal/{CAMPAIGN}/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}.lst",
+        shell: """
+    cp {input.orig_list} {output}
+    """
+
+
+ruleorder: backwards_ecal_local_sim_list > backwards_ecal_campaign_sim_list
+
+
+DETECTOR_CONFIG=os.environ["DETECTOR_CONFIG"]
+
+rule backwards_ecal:
+    input:
+        expand(
+            "listing/backwards_ecal/{{CAMPAIGN}}/" + DETECTOR_CONFIG + "/{PARTICLE}/{ENERGY}/{PHASE_SPACE}.lst",
+            PARTICLE=["pi-", "e-"],
+            ENERGY=[
+                "100MeV",
+                "200MeV",
+                "500MeV",
+                "1GeV",
+                "2GeV",
+                "5GeV",
+                "10GeV",
+                "20GeV",
+            ],
+            PHASE_SPACE=["130to177deg"],
+        ),
+        matplotlibrc=".matplotlibrc",
+        script="benchmarks/backwards_ecal/backwards_ecal.py",
+    output:
+        directory("results/backwards_ecal/{CAMPAIGN}/")
+    log:
+        scheduler=".logs/results/backwards_ecal/{CAMPAIGN}/scheduler.log",
+        worker=".logs/results/backwards_ecal/{CAMPAIGN}/worker.log",
+    threads: workflow.cores
+    shell:
+        """
+if [[ "{wildcards.CAMPAIGN}" == "local" ]]; then
+export PLOT_TITLE="Benchmark simulation"
+else
+export PLOT_TITLE="\\textbf{{ePIC}} Simulation {wildcards.CAMPAIGN}"
+fi
+
+set -m # monitor mode to prevent lingering processes
+cleanup() {{
+  echo Cleaning up
+  kill $WORKER_PID $SCHEDULER_PID
+}}
+trap cleanup EXIT
+
+PORT=$RANDOM
+dask scheduler --port $PORT 2>{log.scheduler} &
+export DASK_SCHEDULER=localhost:$PORT
+SCHEDULER_PID=$!
+dask worker tcp://$DASK_SCHEDULER --nworkers {threads} --nthreads 1 2>{log.worker} &
+WORKER_PID=$!
+env \
+MATPLOTLIBRC={input.matplotlibrc} \
+DETECTOR_CONFIG=""" + DETECTOR_CONFIG + """ \
+INPUT_PATH_FORMAT=listing/backwards_ecal/{wildcards.CAMPAIGN}/""" + DETECTOR_CONFIG + """/{{particle}}/{{energy}}/130to177deg.lst \
+OUTPUT_DIR={output} \
+python {input.script}
+"""
--- a/benchmarks/backwards_ecal/backwards_ecal.org
+++ b/benchmarks/backwards_ecal/backwards_ecal.org
+#+PROPERTY: header-args:jupyter-python :session /jpy:localhost#8888:backwards_ecal :async yes :results drawer :exports both
+
+#+TITLE: ePIC EEEMCal benchmark
+#+AUTHOR: Dmitry Kalinkin
+#+OPTIONS: d:t
+
+#+LATEX_CLASS_OPTIONS: [9pt,letter]
+#+BIND: org-latex-image-default-width ""
+#+BIND: org-latex-image-default-option "scale=0.3"
+#+BIND: org-latex-images-centered nil
+#+BIND: org-latex-minted-options (("breaklines") ("bgcolor" "black!5") ("frame" "single"))
+#+LATEX_HEADER: \usepackage[margin=1in]{geometry}
+#+LATEX_HEADER: \setlength{\parindent}{0pt}
+#+LATEX: \sloppy
+
+#+begin_src jupyter-python :results silent
+import os
+from pathlib import Path
+
+import awkward as ak
+import boost_histogram as bh
+import dask_histogram as dh
+import numpy as np
+import uproot
+import vector
+#+end_src
+
+#+begin_src jupyter-python :results silent
+from dask.distributed import Client
+
+client = Client(os.environ.get("DASK_SCHEDULER", "localhost:8786"))
+#+end_src
+
+#+begin_src jupyter-python
+vector.register_awkward()
+
+from dask.distributed import WorkerPlugin
+class VectorImporter(WorkerPlugin):
+    idempotent=True
+
+    def setup(self, worker):
+        import vector
+
+        vector.register_awkward()
+
+client.register_plugin(VectorImporter())
+#+end_src
+
+* Plotting setup
+                
+#+begin_src jupyter-python :results silent
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+       
+def setup_presentation_style():
+    mpl.rcParams.update(mpl.rcParamsDefault)
+    plt.style.use('ggplot')
+    plt.rcParams.update({
+        'axes.labelsize': 8,
+        'axes.titlesize': 9,
+        'figure.titlesize': 9,
+        'figure.figsize': (4, 3),
+        'legend.fontsize': 7,
+        'xtick.labelsize': 8,
+        'ytick.labelsize': 8,
+        'xaxis.labellocation': 'right',
+        'yaxis.labellocation': 'top',
+        'pgf.rcfonts': False,
+    })
+
+setup_presentation_style()
+#+end_src       
+
+* Parameters
+
+#+begin_src jupyter-python :results silent
+DETECTOR_CONFIG=os.environ.get("DETECTOR_CONFIG")
+PLOT_TITLE=os.environ.get("PLOT_TITLE")
+INPUT_PATH_FORMAT=os.environ.get("INPUT_PATH_FORMAT", "EPIC/RECO/24.04.0/epic_craterlake/SINGLE/{particle}/{energy}/130to177deg/{particle}_{energy}_130to177deg.{ix:04d}.eicrecon.tree.edm4eic.root")
+
+output_dir=Path(os.environ.get("OUTPUT_DIR", "./"))
+output_dir.mkdir(parents=True, exist_ok=True)
+#+end_src
+
+* Analysis
+
+First, we define a requirement on what phase we will consider for our
+analysis. The following function filters single-particle events that
+are thrown within $-3.5 < \eta < -2.0$:
+
+#+begin_src jupyter-python
+def filter_pointing(events):
+    part_momentum = ak.zip({
+        "m": events["MCParticles.mass"],
+        "px": events["MCParticles.momentum.x"],
+        "py": events["MCParticles.momentum.y"],
+        "pz": events["MCParticles.momentum.z"],
+    }, with_name="Momentum4D")
+    cond = (part_momentum.eta[:,0] > -3.5) & (part_momentum.eta[:,0] < -2.)
+    return events[cond]
+#+end_src
+
+#+begin_src jupyter-python
+energies = [
+    "100MeV",
+    "200MeV",
+    "500MeV",
+    "1GeV",
+    "2GeV",
+    "5GeV",
+    "10GeV",
+    "20GeV",
+]
+filter_name = [
+    "MCParticles.*",
+    "EcalEndcapNClusters.energy",
+]
+
+pi_eval = {}
+e_eval = {}
+
+def readlist(path):
+    with open(path, "rt") as fp:
+        paths = [line.rstrip() for line in fp.readlines()]
+    return paths
+
+for energy in energies:
+    pi_eval[energy] = uproot.dask(
+        {path: "events" for path in readlist(INPUT_PATH_FORMAT.format(particle="pi-", energy=energy))},
+        filter_name=filter_name,
+        steps_per_file=1,
+        open_files=False,
+    ).map_partitions(filter_pointing)
+    e_eval[energy] = uproot.dask(
+        {path: "events" for path in readlist(INPUT_PATH_FORMAT.format(particle="e-", energy=energy))},
+        filter_name=filter_name,
+        steps_per_file=1,
+        open_files=False,
+    ).map_partitions(filter_pointing)
+#+end_src
+
+** Energy resolution
+
+The first step in the analysis is plotting distributions of cluster energies.
+Only electron distribution is needed, but we also do pi- for the pion rejection
+study.
+
+#+begin_src jupyter-python
+e_over_p_hist = {}
+e_over_p_axis = bh.axis.Regular(101, 0., 1.01)
+
+for ix, energy in enumerate(energies):
+    for particle_name, dataset in [("e-", e_eval[energy]), ("pi-", pi_eval[energy])]:
+        energy_value = float(energy.replace("GeV", "").replace("MeV", "e-3"))
+        def clf(events):
+            return ak.drop_none(ak.max(events["EcalEndcapNClusters.energy"], axis=-1)) / energy_value
+        e_over_p_hist[(particle_name, energy)] = dh.factory(
+            clf(dataset),
+            axes=(e_over_p_axis,),
+        )
+
+e_over_p_hist = client.gather(client.compute(e_over_p_hist))
+#+end_src
+
+The next step is to plot the histograms together with fits and produce a plot
+for resolution vs energy that summarizes them.
+
+#+begin_src jupyter-python
+fig, axs = plt.subplots(2, 4, sharex=True, sharey=True, figsize=(15, 6))
+
+axs = np.ravel(np.array(axs))
+
+sigmas_rel_FWHM_cb = {}
+fractions_below = {}
+
+def norm_by_sum(arr):
+    return arr / np.sum(arr)
+
+for ix, energy in enumerate(energies):
+    plt.sca(axs[ix])
+    hist = e_over_p_hist[("e-", energy)]
+    patch = plt.stairs(norm_by_sum(hist.values()), hist.axes[0].edges, label=rf"$e^-$ {PLOT_TITLE}")
+    plt.title(f"{energy}")
+
+    import scipy.stats
+    def f(x, n, beta, m, loc, scale):
+        return n * scipy.stats.crystalball.pdf(x, beta, m, loc, scale)
+    p0 = (np.sum(hist.values()[10:]), 2., 3., 0.95, 0.05)
+
+    try:
+        import scipy.optimize
+        par, pcov = scipy.optimize.curve_fit(f, hist.axes[0].centers[5:], hist.values()[5:], p0=p0, maxfev=10000)
+    except RuntimeError:
+        print(hist)
+        raise
+    plt.plot(hist.axes[0].centers, f(hist.axes[0].centers, *par), label=rf"Crystal Ball fit", color="tab:green", lw=0.8)
+
+    def summarize_fit(par):
+        _, _, _, loc_cb, scale_cb = par
+        # Calculate FWHM
+        y_max = np.max(f(np.linspace(0., 1., 100), *par))
+        f_prime = lambda x: f(x, *par) - y_max / 2
+        x_plus, = scipy.optimize.root(f_prime, loc_cb + scale_cb).x
+        x_minus, = scipy.optimize.root(f_prime, loc_cb - scale_cb).x
+        plt.axvline(x_minus, ls="--", lw=0.75, color=patch.get_facecolor(), label=r"$\mu - $FWHM")
+        plt.axvline(x_plus, ls=":", lw=0.75, color=patch.get_facecolor(), label=r"$\mu + $FWHM")
+        fwhm = (x_plus - x_minus) / loc_cb
+        sigma_rel_FWHM_cb = fwhm / 2 / np.sqrt(2 * np.log(2))
+
+        cutoff_x = loc_cb - fwhm
+        fraction_below = np.sum(hist.values()[hist.axes[0].centers < cutoff_x]) / np.sum(hist.values())
+
+        return sigma_rel_FWHM_cb, fraction_below
+
+    sigma_rel_FWHM_cb, fraction_below = summarize_fit(par)
+    sigmas_rel_FWHM_cb.setdefault(PLOT_TITLE, {})[energy] = sigma_rel_FWHM_cb
+    fractions_below.setdefault(PLOT_TITLE, {})[energy] = fraction_below
+
+    plt.legend()
+    plt.xlabel("$E/p$", loc="right")
+    plt.ylabel("Event yield", loc="top")
+
+fig.savefig(output_dir / f"resolution_plots.pdf", bbox_inches="tight")
+fig.savefig(output_dir / f"resolution_plots.png", bbox_inches="tight")
+plt.show()
+plt.close(fig)
+
+plt.figure()
+energy_values = np.array([float(energy.replace("GeV", "").replace("MeV", "e-3")) for energy in energies])
+
+for clf_label, sigma_rel_FWHM_cb in sigmas_rel_FWHM_cb.items():
+    sigma_over_e = np.array([sigma_rel_FWHM_cb[energy] for energy in energies]) * 100 # convert to %
+
+    def f(energy, stochastic, constant):
+        return np.sqrt((stochastic / np.sqrt(energy)) ** 2 + constant ** 2)
+    cond = energy_values >= 0.5
+    try:
+        import scipy.optimize
+        par, pcov = scipy.optimize.curve_fit(f, energy_values[cond], sigma_over_e[cond], maxfev=10000)
+    except RuntimeError:
+        print("energy_values", energy_values[cond])
+        print("sigma_over_e", sigma_over_e[cond])
+        raise
+    stochastic, constant = par
+
+    plt.plot(
+        energy_values,
+        sigma_over_e,
+        marker=".",
+        ls="none",
+        label=f"{clf_label}"
+    )
+    xmin = np.min(energy_values[cond])
+    xmax = np.max(energy_values[cond])
+    xs = np.arange(xmin, xmax, 0.1)
+    plt.plot(
+        xs,
+        f(xs, *par),
+        ls="--",
+        lw=0.5,
+        label=fr"Functional fit: ${np.ceil(stochastic * 10) / 10:.1f}\% / \sqrt{{E}} \oplus {np.ceil(constant * 10) / 10:.1f}\%$",
+    )
+xmin = np.min(energy_values)
+xmax = np.max(energy_values) * 1.05
+xs = np.arange(xmin, xmax + 0.1, 0.1)
+plt.fill_between(
+    xs,
+    np.sqrt((2 / np.sqrt(xs)) ** 2 + 1 ** 2),
+    np.sqrt((2 / np.sqrt(xs)) ** 2 + 3 ** 2),
+    lw=0., alpha=0.2, color="black", label=r"YR requirement $2\% / \sqrt{E} \oplus (1-3)\%$",
+)
+plt.xlim(0., xmax)
+plt.ylim(top=6.)
+plt.legend()
+plt.xlabel("Energy, GeV", loc="right")
+plt.ylabel(r"$\sigma_{E} / E$ derived from FWHM, %", loc="top")
+plt.savefig(output_dir / f"resolution.pdf", bbox_inches="tight")
+plt.savefig(output_dir / f"resolution.png", bbox_inches="tight")
+plt.show()
+#+end_src
+
+** Pion rejection
+
+#+begin_src jupyter-python
+fig, axs = plt.subplots(2, 4, sharex=True, sharey=True, figsize=(15, 6))
+fig_log, axs_log = plt.subplots(2, 4, sharex=True, sharey=True, figsize=(15, 6))
+fig_roc, axs_roc = plt.subplots(2, 4, sharex=True, sharey=True, figsize=(15, 6))
+
+axs = np.ravel(np.array(axs))
+axs_log = np.ravel(np.array(axs_log))
+axs_roc = np.ravel(np.array(axs_roc))
+
+rocs = {}
+
+for ix, energy in enumerate(energies):
+    energy_value = float(energy.replace("GeV", "").replace("MeV", "e-3"))
+    def clf(events):
+        return ak.drop_none(ak.max(events["EcalEndcapNClusters.energy"], axis=-1)) / energy_value
+    e_pred = clf(e_eval[energy])
+    pi_pred = clf(pi_eval[energy])
+
+    for do_log, ax in [(False, axs[ix]), (True, axs_log[ix])]:
+        plt.sca(ax)
+        patch = plt.stairs(norm_by_sum(e_over_p_hist[("e-", energy)].values()), e_over_p_hist[("e-", energy)].axes[0].edges, label=rf"$e^-$ {PLOT_TITLE}")
+        patch = plt.stairs(norm_by_sum(e_over_p_hist[("pi-", energy)].values()), e_over_p_hist[("pi-", energy)].axes[0].edges, label=rf"$\pi^-$ {PLOT_TITLE}")
+        plt.title(f"{energy}")
+        plt.legend()
+        plt.xlabel("Classifier output")
+        plt.ylabel("Event yield")
+        if do_log:
+            plt.yscale("log")
+
+    plt.sca(axs_roc[ix])
+    fpr = np.cumsum(e_over_p_hist[("pi-", energy)].values()[::-1])
+    fpr /= fpr[-1]
+    tpr = np.cumsum(e_over_p_hist[("e-", energy)].values()[::-1])
+    tpr /= tpr[-1]
+    cond = fpr != 0 # avoid infinite rejection (region of large uncertainty)
+    cond &= tpr != 1 # avoid linear interpolation (region of large uncertainty)
+    def mk_interp(tpr, fpr):
+        def interp(eff):
+            return np.interp(eff, tpr, fpr)
+        return interp
+    rocs.setdefault(PLOT_TITLE, {})[energy] = mk_interp(tpr, fpr)
+    plt.plot(tpr[cond] * 100, 1 / fpr[cond], label=f"{PLOT_TITLE}")
+    plt.yscale("log")
+    plt.title(f"{energy}")
+    plt.legend(loc="lower left")
+    plt.xlabel("Electron efficiency, %")
+    plt.ylabel("Pion rejection factor")
+
+fig.savefig(output_dir / f"pred.pdf", bbox_inches="tight")
+fig.savefig(output_dir / f"pred.png", bbox_inches="tight")
+plt.close(fig)
+fig_log.savefig(output_dir / f"pred_log.pdf", bbox_inches="tight")
+fig_log.savefig(output_dir / f"pred_log.png", bbox_inches="tight")
+fig_log.show()
+fig_roc.savefig(output_dir / f"roc.pdf", bbox_inches="tight")
+fig_roc.savefig(output_dir / f"roc.png", bbox_inches="tight")
+fig_roc.show()
+
+plt.figure()
+for clf_label, roc in rocs.items():
+    plt.plot(
+        [float(energy.replace("GeV", "").replace("MeV", "e-3")) for energy in energies],
+        [1 / roc[energy](0.95) for energy in energies],
+        marker=".",
+        label=f"{clf_label}",
+    )
+xmax = np.max(energy_values) * 1.05
+plt.xlim(0., xmax)
+plt.yscale("log")
+plt.legend()
+plt.xlabel("Energy, GeV")
+plt.ylabel("Pion rejection at 95%")
+plt.savefig(output_dir / f"pion_rej.pdf", bbox_inches="tight")
+plt.savefig(output_dir / f"pion_rej.png", bbox_inches="tight")
+plt.show()
+#+end_src
--- a/benchmarks/backwards_ecal/config.yml
+++ b/benchmarks/backwards_ecal/config.yml
+sim:backwards_ecal:
+  extends: .det_benchmark
+  stage: simulate
+  parallel:
+    matrix:
+      - PARTICLE: ["e-", "pi-"]
+        MOMENTUM: [
+          "100MeV",
+          "200MeV",
+          "500MeV",
+          "1GeV",
+          "2GeV",
+          "5GeV",
+          "10GeV",
+          "20GeV",
+        ]
+  script:
+    - |
+      snakemake $SNAKEMAKE_FLAGS --cores 5 listing/backwards_ecal/local/${DETECTOR_CONFIG}/${PARTICLE}/${MOMENTUM}/130to177deg.lst
+
+bench:backwards_ecal:
+  extends: .det_benchmark
+  stage: benchmarks
+  needs:
+    - ["sim:backwards_ecal"]
+  script:
+    - export PYTHONUSERBASE=$LOCAL_DATA_PATH/deps
+    - pip install -r benchmarks/backwards_ecal/requirements.txt
+    - snakemake $SNAKEMAKE_FLAGS --cores 5 results/backwards_ecal/local
+
+bench:backwards_ecal_campaigns:
+  extends: .det_benchmark
+  stage: benchmarks
+  when: manual
+  timeout: 4 hours
+  script:
+    - export PYTHONUSERBASE=$LOCAL_DATA_PATH/deps
+    - pip install -r benchmarks/backwards_ecal/requirements.txt
+    - snakemake $SNAKEMAKE_FLAGS --cores 5 results/backwards_ecal/24.10.1
+
+collect_results:backwards_ecal:
+  extends: .det_benchmark
+  stage: collect
+  needs:
+    - "bench:backwards_ecal"
+  script:
+    - ls -lrht
+    - mv results{,_save}/ # move results directory out of the way to preserve it
+    - snakemake $SNAKEMAKE_FLAGS --cores 1 --delete-all-output results/backwards_ecal/local
+    - mv results{_save,}/
--- a/benchmarks/backwards_ecal/requirements.txt
+++ b/benchmarks/backwards_ecal/requirements.txt
+awkward ~= 2.7.2
+dask ~= 2024.12.1
+dask_awkward ~= 2024.12.2
+dask_histogram ~= 2024.12.1
+distributed ~= 2024.12.1
+pyhepmc
+uproot ~= 5.5.1
+vector ~= 1.5.2
--- a/benchmarks/barrel_ecal/Snakefile
+++ b/benchmarks/barrel_ecal/Snakefile
@@ -26,7 +26,8 @@ rule emcal_barrel_particles:
        "{DETECTOR_CONFIG}/sim_output/sim_emcal_barrel_{PARTICLE}_energies{E_MIN}_{E_MAX}.edm4hep.root"
    shell:
        """
-ddsim \
+set -m # monitor mode to prevent lingering processes
+exec ddsim \
   --runType batch \
   -v WARNING \
   --part.minimalKineticEnergy 0.5*GeV  \

--- a/benchmarks/barrel_ecal/config.yml
+++ b/benchmarks/barrel_ecal/config.yml
@@ -2,13 +2,13 @@ sim:emcal_barrel_pions:
  extends: .det_benchmark
  stage: simulate
  script:
-    - snakemake --cores 1 $DETECTOR_CONFIG/sim_output/sim_emcal_barrel_{piplus,piminus}_energies5.0_5.0.edm4hep.root
+    - snakemake $SNAKEMAKE_FLAGS --cores 1 $DETECTOR_CONFIG/sim_output/sim_emcal_barrel_{piplus,piminus}_energies5.0_5.0.edm4hep.root

 sim:emcal_barrel_pi0:
  extends: .det_benchmark
  stage: simulate
  script:
-    - snakemake --cores 1 $DETECTOR_CONFIG/sim_output/sim_emcal_barrel_pi0_energies5.0_5.0.edm4hep.root
+    - snakemake $SNAKEMAKE_FLAGS --cores 1 $DETECTOR_CONFIG/sim_output/sim_emcal_barrel_pi0_energies5.0_5.0.edm4hep.root

 sim:emcal_barrel_electrons:
  extends: .det_benchmark
@@ -16,20 +16,20 @@ sim:emcal_barrel_electrons:
  script:
    - if [[ "$RUN_EXTENDED_BENCHMARK" == "true" ]] ; then snakemake --cores $DETECTOR_CONFIG/results/energy_scan/emcal_barrel_electron_fsam_scan.png; fi
    - export JUGGLER_N_EVENTS=400
-    - snakemake --cores 1 $DETECTOR_CONFIG/sim_output/sim_emcal_barrel_electron_energies5.0_5.0.edm4hep.root
+    - snakemake $SNAKEMAKE_FLAGS --cores 1 $DETECTOR_CONFIG/sim_output/sim_emcal_barrel_electron_energies5.0_5.0.edm4hep.root

 sim:emcal_barrel_photons:
  extends: .det_benchmark
  stage: simulate
  script:
    - if [[ "$RUN_EXTENDED_BENCHMARK" == "true" ]] ; then snakemake --cores $DETECTOR_CONFIG/results/energy_scan/emcal_barrel_proton_fsam_scan.png; fi
-    - snakemake --cores 1 $DETECTOR_CONFIG/sim_output/sim_emcal_barrel_photon_energies5.0_5.0.edm4hep.root
+    - snakemake $SNAKEMAKE_FLAGS --cores 1 $DETECTOR_CONFIG/sim_output/sim_emcal_barrel_photon_energies5.0_5.0.edm4hep.root

 sim:emcal_barrel_pion_rejection:
  extends: .det_benchmark
  stage: simulate
  script:
-    - snakemake --cores 1 $DETECTOR_CONFIG/sim_output/sim_emcal_barrel_{piminus,electron}_energies1.0_18.0.edm4hep.root
+    - snakemake $SNAKEMAKE_FLAGS --cores 1 $DETECTOR_CONFIG/sim_output/sim_emcal_barrel_{piminus,electron}_energies1.0_18.0.edm4hep.root

 calib:emcal_barrel_electrons:
  extends: .det_benchmark
@@ -39,7 +39,7 @@ calib:emcal_barrel_electrons:
  script:
    - ls -lhtR sim_output/
    - rootls -t sim_output/sim_emcal_barrel_electron_energies5.0_5.0.edm4hep.root
-    - snakemake --cores 1 $DETECTOR_CONFIG/results/emcal_barrel_electron_calibration.json
+    - snakemake $SNAKEMAKE_FLAGS --cores 1 $DETECTOR_CONFIG/results/emcal_barrel_electron_calibration.json
    - mv sim_output/sim_emcal_barrel_electron.edm4hep.root results/.
    - echo "JSON file(s) from analysis:" ; cat results/*.json

@@ -49,7 +49,7 @@ bench:emcal_barrel_pions:
  needs:
    - ["sim:emcal_barrel_pions"]
  script:
-    - snakemake --cores 1 $DETECTOR_CONFIG/results/emcal_barrel_pions_Ethr.png
+    - snakemake $SNAKEMAKE_FLAGS --cores 1 $DETECTOR_CONFIG/results/emcal_barrel_pions_Ethr.png

 bench:emcal_barrel_electrons_scan:
  extends: .det_benchmark
@@ -66,7 +66,7 @@ bench:emcal_barrel_pi0:
    - ["sim:emcal_barrel_pi0", "calib:emcal_barrel_electrons"]
  script:
    - echo "JSON file(s) from analysis:" ; cat results/*.json
-    - snakemake --cores 1 epic_craterlake/results/Barrel_emcal_pi0.json
+    - snakemake $SNAKEMAKE_FLAGS --cores 1 epic_craterlake/results/Barrel_emcal_pi0.json

 bench:emcal_barrel_photons:
  extends: .det_benchmark
@@ -76,7 +76,7 @@ bench:emcal_barrel_photons:
  script:
    - ls -lhtR sim_output/
    - rootls -t sim_output/sim_emcal_barrel_photon_energies5.0_5.0.edm4hep.root
-    - snakemake --cores 1 $DETECTOR_CONFIG/results/emcal_barrel_photon_calibration.json
+    - snakemake $SNAKEMAKE_FLAGS --cores 1 $DETECTOR_CONFIG/results/emcal_barrel_photon_calibration.json
    - mv sim_output/sim_emcal_barrel_photon.edm4hep.root results/.
    - if [[ "$RUN_EXTENDED_BENCHMARK" == "true" ]] ; then snakemake --cores $DETECTOR_CONFIG/results/energy_scan/emcal_barrel_proton_fsam_scan.png; fi

@@ -89,7 +89,7 @@ bench:emcal_barrel_pion_rejection:
    - ls -lhtR sim_output/
    - rootls -t $DETECTOR_CONFIG/sim_output/sim_emcal_barrel_piminus_energies1.0_18.0.edm4hep.root
    - rootls -t $DETECTOR_CONFIG/sim_output/sim_emcal_barrel_electron_energies1.0_18.0.edm4hep.root
-    - snakemake --cores 1 $DETECTOR_CONFIG/results/Barrel_emcal_pion_rej.json
+    - snakemake $SNAKEMAKE_FLAGS --cores 1 $DETECTOR_CONFIG/results/Barrel_emcal_pion_rej.json

 collect_results:barrel_ecal:
  extends: .det_benchmark

--- a/benchmarks/calo_pid/Snakefile
+++ b/benchmarks/calo_pid/Snakefile
+def format_energy_for_dd4hep(s):
+    return s.rstrip("kMGeV") + "*" + s.lstrip("0123456789")
+
+rule calo_pid_sim:
+    input:
+        warmup="warmup/{DETECTOR_CONFIG}.edm4hep.root",
+        geometry_lib=find_epic_libraries(),
+    output:
+        "sim_output/calo_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY_MIN}to{ENERGY_MAX}/{THETA_MIN}to{THETA_MAX}deg/{PARTICLE}_{ENERGY}_{THETA_MIN}to{THETA_MAX}deg.{INDEX}.edm4hep.root",
+    log:
+        "sim_output/calo_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY_MIN}to{ENERGY_MAX}/{THETA_MIN}to{THETA_MAX}deg/{PARTICLE}_{ENERGY}_{THETA_MIN}to{THETA_MAX}deg.{INDEX}.edm4hep.root.log",
+    wildcard_constraints:
+        PARTICLE="(e-|pi-)",
+        ENERGY_MIN="[0-9]+[kMG]eV",
+        ENERGY_MAX="[0-9]+[kMG]eV",
+        THETA_MIN="[0-9]+",
+        THETA_MAX="[0-9]+",
+        INDEX=r"\d{4}",
+    params:
+        N_EVENTS=1000,
+        SEED=lambda wildcards: "1" + wildcards.INDEX,
+        DETECTOR_PATH=os.environ["DETECTOR_PATH"],
+        DETECTOR_CONFIG=lambda wildcards: wildcards.DETECTOR_CONFIG,
+        ENERGY_MIN=lambda wildcards: format_energy_for_dd4hep(wildcards.ENERGY_MIN),
+        ENERGY_MAX=lambda wildcards: format_energy_for_dd4hep(wildcards.ENERGY_MAX),
+        THETA_MIN=lambda wildcards: wildcards.THETA_MIN,
+        THETA_MAX=lambda wildcards: wildcards.THETA_MAX,
+        DD4HEP_HASH=get_spack_package_hash("dd4hep"),
+        NPSIM_HASH=get_spack_package_hash("npsim"),
+    cache: True
+    shell:
+        """
+set -m # monitor mode to prevent lingering processes
+exec ddsim \
+  --runType batch \
+  --enableGun \
+  --gun.momentumMin "{params.ENERGY_MIN}" \
+  --gun.momentumMax "{params.ENERGY_MAX}" \
+  --gun.thetaMin "{wildcards.THETA_MIN}*deg" \
+  --gun.thetaMax "{wildcards.THETA_MAX}*deg" \
+  --gun.particle {wildcards.PARTICLE} \
+  --gun.distribution eta \
+  --random.seed {params.SEED} \
+  --filter.tracker edep0 \
+  -v WARNING \
+  --numberOfEvents {params.N_EVENTS} \
+  --compactFile {params.DETECTOR_PATH}/{params.DETECTOR_CONFIG}.xml \
+  --outputFile {output}
+"""
+
+
+rule calo_pid_recon:
+    input:
+        "sim_output/calo_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.edm4hep.root",
+    output:
+        "sim_output/calo_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.eicrecon.tree.edm4eic.root",
+    log:
+        "sim_output/calo_pid/{DETECTOR_CONFIG}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX}.eicrecon.tree.edm4eic.root.log",
+    wildcard_constraints:
+        INDEX=r"\d{4}",
+    shell: """
+set -m # monitor mode to prevent lingering processes
+exec env DETECTOR_CONFIG={wildcards.DETECTOR_CONFIG} \
+  eicrecon {input} -Ppodio:output_file={output} \
+  -Ppodio:output_collections=MCParticles,EcalEndcapNRecHits,EcalEndcapNClusters,EcalEndcapNParticleIDInput_features,EcalEndcapNParticleIDTarget,EcalEndcapNParticleIDOutput_probability_tensor
+"""
+
+
+rule calo_pid_input_list:
+    input:
+        electrons=expand(
+            "sim_output/calo_pid/{{DETECTOR_CONFIG}}/{{PARTICLE}}/{ENERGY}/{PHASE_SPACE}/{{PARTICLE}}_{ENERGY}_{PHASE_SPACE}.{INDEX:04d}.eicrecon.tree.edm4eic.root",
+            ENERGY=["100MeVto20GeV"],
+            PHASE_SPACE=["130to177deg"],
+            INDEX=range(100),
+        ),
+    output:
+        "listing/calo_pid/{DETECTOR_CONFIG}/{PARTICLE}.lst",
+    run:
+        with open(output[0], "wt") as fp:
+            fp.write("\n".join(input))
+
+
+rule calo_pid:
+    input:
+        electrons="listing/calo_pid/{DETECTOR_CONFIG}/e-.lst",
+        pions="listing/calo_pid/{DETECTOR_CONFIG}/pi-.lst",
+        matplotlibrc=".matplotlibrc",
+        script="benchmarks/calo_pid/calo_pid.py",
+    output:
+        directory("results/{DETECTOR_CONFIG}/calo_pid")
+    shell:
+        """
+env \
+MATPLOTLIBRC={input.matplotlibrc} \
+DETECTOR_CONFIG={wildcards.DETECTOR_CONFIG} \
+PLOT_TITLE={wildcards.DETECTOR_CONFIG} \
+INPUT_ELECTRONS="{input.electrons}" \
+INPUT_PIONS="{input.pions}" \
+OUTPUT_DIR={output} \
+python {input.script}
+"""
--- a/benchmarks/calo_pid/calo_pid.org
+++ b/benchmarks/calo_pid/calo_pid.org
+#+PROPERTY: header-args:jupyter-python :session /jpy:localhost#8888:benchmark :async yes :results drawer :exports both
+
+#+TITLE: ePIC EEEMCal calorimetry PID
+#+AUTHOR: Dmitry Kalinkin
+#+OPTIONS: d:t
+
+#+LATEX_CLASS_OPTIONS: [9pt,letter]
+#+BIND: org-latex-image-default-width ""
+#+BIND: org-latex-image-default-option "scale=0.3"
+#+BIND: org-latex-images-centered nil
+#+BIND: org-latex-minted-options (("breaklines") ("bgcolor" "black!5") ("frame" "single"))
+#+LATEX_HEADER: \usepackage[margin=1in]{geometry}
+#+LATEX_HEADER: \setlength{\parindent}{0pt}
+#+LATEX: \sloppy
+
+#+begin_src jupyter-python :results silent
+import os
+from pathlib import Path
+
+import awkward as ak
+import boost_histogram as bh
+import numpy as np
+import vector
+import uproot
+from sklearn.metrics import roc_curve
+
+vector.register_awkward()
+#+end_src   
+
+* Parameters
+
+#+begin_src jupyter-python :results silent
+DETECTOR_CONFIG=os.environ.get("DETECTOR_CONFIG")
+PLOT_TITLE=os.environ.get("PLOT_TITLE")
+INPUT_PIONS=os.environ.get("INPUT_PIONS")
+INPUT_ELECTRONS=os.environ.get("INPUT_ELECTRONS")
+
+output_dir=Path(os.environ.get("OUTPUT_DIR", "./"))
+output_dir.mkdir(parents=True, exist_ok=True)
+#+end_src
+
+* Plotting setup
+                
+#+begin_src jupyter-python :results silent
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+       
+def setup_presentation_style():
+    mpl.rcParams.update(mpl.rcParamsDefault)
+    plt.style.use('ggplot')
+    plt.rcParams.update({
+        'axes.labelsize': 8,
+        'axes.titlesize': 9,
+        'figure.titlesize': 9,
+        'figure.figsize': (4, 3),
+        'legend.fontsize': 7,
+        'xtick.labelsize': 8,
+        'ytick.labelsize': 8,
+        'pgf.rcfonts': False,
+    })
+
+setup_presentation_style()
+#+end_src       
+
+* Analysis
+
+#+begin_src jupyter-python
+def filter_pointing(events):
+    part_momentum = ak.zip({
+        "m": events["MCParticles.mass"],
+        "px": events["MCParticles.momentum.x"],
+        "py": events["MCParticles.momentum.y"],
+        "pz": events["MCParticles.momentum.z"],
+    }, with_name="Momentum4D")
+    cond = (part_momentum.eta[:,0] > -3.5) & (part_momentum.eta[:,0] < -2.)
+    return events[cond]
+
+def readlist(path):
+    with open(path, "rt") as fp:
+        paths = [line.rstrip() for line in fp.readlines()]
+    return paths
+
+e = filter_pointing(uproot.concatenate({filename: "events" for filename in readlist(INPUT_ELECTRONS)}, filter_name=["MCParticles.*", "*EcalEndcapN*"]))
+pi = filter_pointing(uproot.concatenate({filename: "events" for filename in readlist(INPUT_PIONS)}, filter_name=["MCParticles.*", "*EcalEndcapN*"]))
+
+e_train = e[:len(pi)//2]
+pi_train = pi[:len(pi)//2]
+e_eval = e[len(pi)//2:]
+pi_eval = pi[len(pi)//2:]
+#+end_src
+
+#+RESULTS:
+:results:
+:end:
+
+#+begin_src jupyter-python
+nums = ak.num(pi_train["_EcalEndcapNParticleIDInput_features_floatData"], axis=1)
+num_features = ak.min(nums[nums > 0])
+print(f"{num_features} features")
+nums = ak.num(pi_train["_EcalEndcapNParticleIDTarget_int64Data"], axis=1)
+num_targets = ak.min(nums[nums > 0])
+print(f"{num_targets} targets")
+
+pi_train_leading_cluster_ixs = ak.argsort(pi_train["EcalEndcapNClusters.energy"], ascending=False)[:,:1]
+e_train_leading_cluster_ixs = ak.argsort(e_train["EcalEndcapNClusters.energy"], ascending=False)[:,:1]
+pi_train_x = ak.flatten(ak.unflatten(pi_train["_EcalEndcapNParticleIDInput_features_floatData"], num_features, axis=1)[pi_train_leading_cluster_ixs])
+pi_train_y = ak.flatten(ak.unflatten(pi_train["_EcalEndcapNParticleIDTarget_int64Data"], num_targets, axis=1)[pi_train_leading_cluster_ixs])
+e_train_x = ak.flatten(ak.unflatten(e_train["_EcalEndcapNParticleIDInput_features_floatData"], num_features, axis=1)[e_train_leading_cluster_ixs])
+e_train_y = ak.flatten(ak.unflatten(e_train["_EcalEndcapNParticleIDTarget_int64Data"], num_targets, axis=1)[e_train_leading_cluster_ixs])
+train_x = ak.concatenate([pi_train_x, e_train_x])
+train_y = ak.concatenate([pi_train_y, e_train_y])
+
+pi_eval_leading_cluster_ixs = ak.argsort(pi_eval["EcalEndcapNClusters.energy"], ascending=False)[:,:1]
+e_eval_leading_cluster_ixs = ak.argsort(e_eval["EcalEndcapNClusters.energy"], ascending=False)[:,:1]
+pi_eval_x = ak.flatten(ak.unflatten(pi_eval["_EcalEndcapNParticleIDInput_features_floatData"], num_features, axis=1)[pi_eval_leading_cluster_ixs])
+pi_eval_y = ak.flatten(ak.unflatten(pi_eval["_EcalEndcapNParticleIDTarget_int64Data"], num_targets, axis=1)[pi_eval_leading_cluster_ixs])
+e_eval_x = ak.flatten(ak.unflatten(e_eval["_EcalEndcapNParticleIDInput_features_floatData"], num_features, axis=1)[e_eval_leading_cluster_ixs])
+e_eval_y = ak.flatten(ak.unflatten(e_eval["_EcalEndcapNParticleIDTarget_int64Data"], num_targets, axis=1)[e_eval_leading_cluster_ixs])
+eval_x = ak.concatenate([pi_eval_x, e_eval_x])
+eval_y = ak.concatenate([pi_eval_y, e_eval_y])
+#+end_src
+
+#+RESULTS:
+:results:
+: 11 features
+: 2 targets
+:end:
+
+#+begin_src jupyter-python
+
+#+end_src
+
+#+RESULTS:
+:results:
+#+begin_export html
+<pre>[5.11,
+ 0.0424,
+ 3.03,
+ 2.16,
+ 17.7,
+ 8.32,
+ -4.54e-07,
+ 0.000456,
+ 0,
+ 69.2,
+ 0]
+------------------
+type: 11 * float32</pre>
+#+end_export
+:end:
+
+#+begin_src jupyter-python
+ak.sum((ak.num(pi_train_leading_cluster_ixs) != 0)), ak.num(pi_train_leading_cluster_ixs, axis=0)
+#+end_src
+
+#+RESULTS:
+:results:
+| 87721 | array | (88210) |
+:end:
+
+#+begin_src jupyter-python
+plt.hist(pi_eval_x[:,0])
+plt.hist(e_eval_x[:,0], alpha=0.5)
+plt.show()
+#+end_src
+
+#+RESULTS:
+:results:
+[[file:./.ob-jupyter/5381c9bd149f0bb8855bf539e7ce8ef927a2e1a9.png]]
+:end:
+
+#+begin_src jupyter-python
+"""
+fig, axs = plt.subplots(2, 4, sharex=True, sharey=True, figsize=(15, 6))
+fig_log, axs_log = plt.subplots(2, 4, sharex=True, sharey=True, figsize=(15, 6))
+fig_roc, axs_roc = plt.subplots(2, 4, sharex=True, sharey=True, figsize=(15, 6))
+
+axs = np.ravel(np.array(axs))
+axs_log = np.ravel(np.array(axs_log))
+axs_roc = np.ravel(np.array(axs_roc))
+
+rocs = {}
+
+for ix, energy in enumerate(energies):
+  for c in [False, True]:
+    energy_value = float(energy.replace("GeV", "").replace("MeV", "e-3"))
+    if c:
+      clf_label = "leading cluster"
+    else:
+      clf_label = "sum all hits"
+    def clf(events):
+      if c:       
+        return ak.drop_none(ak.max(events["EcalEndcapNClusters.energy"], axis=-1)) / energy_value
+      else:
+        return ak.sum(events["EcalEndcapNRecHits.energy"], axis=-1) / energy_value
+    e_pred = clf(e_eval[energy])
+    pi_pred = clf(pi_eval[energy])
+
+    for do_log in [False, True]:
+        plt.sca(axs[ix])
+        plt.hist(e_pred, bins=np.linspace(0., 1.01, 101), label=rf"$e^-$ {clf_label}")
+        plt.hist(pi_pred, bins=np.linspace(0., 1.01, 101), label=rf"$\pi^-$ {clf_label}", histtype="step")
+        plt.title(f"{energy}")
+        plt.legend()
+        if do_log: plt.yscale("log")
+
+    plt.sca(axs_roc[ix])
+    fpr, tpr, _ = roc_curve(
+        np.concatenate([np.ones_like(e_pred), np.zeros_like(pi_pred)]),
+        np.concatenate([e_pred, pi_pred]),
+    )
+    cond = fpr != 0 # avoid infinite rejection (region of large uncertainty)
+    cond &= tpr != 1 # avoid linear interpolation (region of large uncertainty)
+    def mk_interp(tpr, fpr):
+        def interp(eff):
+            return np.interp(eff, tpr, fpr)
+        return interp
+    rocs.setdefault(clf_label, {})[energy] = mk_interp(tpr, fpr)
+    plt.plot(tpr[cond] * 100, 1 / fpr[cond], label=f"{clf_label}")
+    plt.title(f"{energy}")
+    plt.legend()
+
+fig.show()
+plt.close(fig_log)
+fig_roc.show()
+
+plt.figure()
+for clf_label, roc in rocs.items():
+    plt.plot(
+        [float(energy.replace("GeV", "").replace("MeV", "e-3")) for energy in energies],
+        [1 / roc[energy](0.95) for energy in energies],
+        marker=".",
+        label=f"{clf_label}",
+    )
+plt.legend()
+plt.show()
+"""
+#+end_src
+
+#+begin_src jupyter-python
+import catboost
+clf = {}
+
+from sklearn.metrics import roc_curve
+roc = {}
+
+clf = catboost.CatBoostClassifier(loss_function="CrossEntropy", verbose=0, n_estimators=1000)
+clf.fit(
+    train_x.to_numpy(),
+    train_y.to_numpy()[:,1], # index 1 = is electron
+)
+plt.hist(clf.predict_proba(e_eval_x.to_numpy())[:,1], bins=np.linspace(0., 1.01, 101), label=r"$e^-$")
+plt.hist(clf.predict_proba(pi_eval_x.to_numpy())[:,1], bins=np.linspace(0., 1.01, 101), label=r"$\pi^-$", histtype="step")
+plt.xlabel("Classifier's probability prediction", loc="right")
+plt.ylabel("Number of events", loc="top")
+plt.legend(loc="upper center")
+plt.savefig(output_dir / "predict_proba.pdf", bbox_inches="tight")
+plt.show()
+#+end_src
+
+#+begin_src jupyter-python
+energy_bin_edges = np.arange(0., 20. + 1e-7, 1.)
+
+_eval_energy = eval_x[:,0]
+
+for energy_bin_ix, (energy_bin_low, energy_bin_high) in enumerate(zip(energy_bin_edges[:-1], energy_bin_edges[1:])):
+   cond = (_eval_energy >= energy_bin_low) & (_eval_energy < energy_bin_high)
+   print(energy_bin_low, energy_bin_high, ak.sum(cond))
+
+   pi_cond = (pi_eval_x[:,0] >= energy_bin_low) & (pi_eval_x[:,0] < energy_bin_high)
+   e_cond = (e_eval_x[:,0] >= energy_bin_low) & (e_eval_x[:,0] < energy_bin_high)
+   plt.hist(pi_eval_x[pi_cond][:,1], bins=np.linspace(0., 1.01, 101))
+   plt.hist(e_eval_x[e_cond][:,1], bins=np.linspace(0., 1.01, 101))
+   plt.yscale("log")
+   plt.show()
+   plt.clf()
+
+   fpr, tpr, _ = roc_curve(
+      eval_y[cond][:,1],
+      #eval_x[cond][:,1],
+      clf.predict_proba(eval_x[cond].to_numpy())[:,1],
+   )
+   cond = fpr != 0 # avoid infinite rejection (region of large uncertainty)
+   cond &= tpr != 1 # avoid linear interpolation (region of large uncertainty)
+   #cond &= tpr > 0.5
+   plt.plot(tpr[cond] * 100, 1 / fpr[cond])
+
+   def mk_interp(tpr, fpr):
+       def interp(eff):
+           return np.interp(eff, tpr, fpr)
+       return interp
+   roc[energy_bin_ix] = mk_interp(tpr, fpr)
+
+   plt.xlabel("Electron efficiency, %", loc="right")
+   plt.ylabel("Pion rejection factor", loc="top")
+   plt.title(rf"${energy_bin_low:.1f} < |\vec{{p}}| < {energy_bin_high:.1f}$ GeV")
+   plt.legend(loc="lower left")
+   plt.yscale("log")
+   plt.savefig(output_dir / f"roc_{energy_bin_low:.1f}_{energy_bin_high:.1f}.pdf", bbox_inches="tight")
+   plt.show()
+   plt.clf()
+
+plt.plot(
+    (energy_bin_edges[:-1] + energy_bin_edges[1:]) / 2,
+    [1 / roc[energy_bin_ix](0.95) for energy_bin_ix in range(len(energy_bin_edges) - 1)],
+    marker=".",
+    label="",
+)
+plt.yscale("log")
+plt.legend()
+plt.xlabel("Energy, GeV", loc="right")
+plt.ylabel("Pion rejection at 95%", loc="top")
+plt.savefig(output_dir / f"pion_rej.pdf", bbox_inches="tight")
+plt.show()
+#+end_src
+
+#+begin_src jupyter-python
+clf.save_model(
+    output_dir / "EcalEndcapN_pi_rejection.onnx",
+    format="onnx",
+    export_parameters={
+        "onnx_doc_string": "Classifier model for pion rejection in EEEMCal",
+        "onnx_graph_name": "CalorimeterParticleID_BinaryClassification",
+    }
+)
+import onnx
+
+model = onnx.load(output_dir / "EcalEndcapN_pi_rejection.onnx")
+onnx.checker.check_model(model)
+graph_def = onnx.helper.make_graph(
+    nodes=[model.graph.node[0]],
+    name=model.graph.name,
+    inputs=model.graph.input,
+    outputs=[model.graph.output[0], model.graph.value_info[0]],
+    initializer=model.graph.initializer
+)
+model_def = onnx.helper.make_model(graph_def, producer_name=model.producer_name)
+del model_def.opset_import[:]
+op_set = model_def.opset_import.add()
+op_set.domain = "ai.onnx.ml"
+op_set.version = 2
+model_def = onnx.shape_inference.infer_shapes(model_def)
+onnx.checker.check_model(model_def)
+onnx.save(model_def, output_dir / "EcalEndcapN_pi_rejection.onnx")
+#+end_src
+
+#+RESULTS:
+:results:
+:end:
+
+#+begin_src jupyter-python
+if "_EcalEndcapNParticleIDOutput_probability_tensor_floatData" in pi_train.fields:
+    nums = ak.num(pi_train["_EcalEndcapNParticleIDOutput_probability_tensor_floatData"], axis=1)
+    num_outputs = ak.min(nums[nums > 0])
+    print(f"{num_outputs} outputs")
+
+    pi_train_proba = ak.flatten(ak.unflatten(pi_train["_EcalEndcapNParticleIDOutput_probability_tensor_floatData"], num_outputs, axis=1)[pi_train_leading_cluster_ixs])
+    e_train_proba = ak.flatten(ak.unflatten(e_train["_EcalEndcapNParticleIDOutput_probability_tensor_floatData"], num_outputs, axis=1)[e_train_leading_cluster_ixs])
+    train_proba = ak.concatenate([pi_train_proba, e_train_proba])
+
+    pi_eval_proba = ak.flatten(ak.unflatten(pi_eval["_EcalEndcapNParticleIDOutput_probability_tensor_floatData"], num_outputs, axis=1)[pi_eval_leading_cluster_ixs])
+    e_eval_proba = ak.flatten(ak.unflatten(e_eval["_EcalEndcapNParticleIDOutput_probability_tensor_floatData"], num_outputs, axis=1)[e_eval_leading_cluster_ixs])
+    eval_proba = ak.concatenate([pi_eval_proba, e_eval_proba])
+
+    plt.hist(clf.predict_proba(eval_x.to_numpy())[:,1] - eval_proba[:,1].to_numpy())
+    plt.savefig(output_dir / f"proba_diff.pdf", bbox_inches="tight")
+    plt.show()
+else:
+    print("EcalEndcapNParticleIDOutput not present")
+#+end_src
--- a/benchmarks/calo_pid/config.yml
+++ b/benchmarks/calo_pid/config.yml
+sim:calo_pid:
+  extends: .det_benchmark
+  stage: simulate
+  parallel:
+    matrix:
+      - PARTICLE: ["e-", "pi-"]
+        INDEX_RANGE: [
+          "0 9",
+          "10 19",
+          "20 29",
+          "30 39",
+          "40 49",
+          "50 59",
+          "60 69",
+          "70 79",
+          "80 89",
+          "90 99",
+        ]
+  script:
+    - |
+      snakemake $SNAKEMAKE_FLAGS --cores 5 \
+        $(seq --format="sim_output/calo_pid/epic_inner_detector/${PARTICLE}/100MeVto20GeV/130to177deg/${PARTICLE}_100MeVto20GeV_130to177deg.%04.f.eicrecon.tree.edm4eic.root" ${INDEX_RANGE})
+
+bench:calo_pid:
+  allow_failure: true # until inference merged into EICrecon
+  extends: .det_benchmark
+  stage: benchmarks
+  needs:
+    - ["sim:calo_pid"]
+  script:
+    - export PYTHONUSERBASE=$LOCAL_DATA_PATH/deps
+    - pip install -r benchmarks/calo_pid/requirements.txt
+    - snakemake $SNAKEMAKE_FLAGS --cores 1 results/epic_inner_detector/calo_pid
+
+collect_results:calo_pid:
+  allow_failure: true # until inference merged into EICrecon
+  extends: .det_benchmark
+  stage: collect
+  needs:
+    - "bench:calo_pid"
+  script:
+    - ls -lrht
+    - mv results{,_save}/ # move results directory out of the way to preserve it
+    - snakemake $SNAKEMAKE_FLAGS --cores 1 --delete-all-output results/epic_inner_detector/calo_pid
+    - mv results{_save,}/
--- a/benchmarks/calo_pid/requirements.txt
+++ b/benchmarks/calo_pid/requirements.txt
+awkward >= 2.4.0
+catboost
+onnx
+scikit-learn
+uproot >= 5.2.0
+vector