From aa5448654b63d075654c5282d192bf78742762af Mon Sep 17 00:00:00 2001
From: Dmitry Kalinkin <dmitry.kalinkin@gmail.com>
Date: Wed, 18 Dec 2024 23:57:39 -0500
Subject: [PATCH] calo_pid: pass input files via a list file

---
 benchmarks/calo_pid/Snakefile    | 23 +++++++++++++----------
 benchmarks/calo_pid/calo_pid.org | 13 +++++++++----
 2 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/benchmarks/calo_pid/Snakefile b/benchmarks/calo_pid/Snakefile
index 3d32bd64..05c77f3e 100644
--- a/benchmarks/calo_pid/Snakefile
+++ b/benchmarks/calo_pid/Snakefile
@@ -64,22 +64,25 @@ exec env DETECTOR_CONFIG={wildcards.DETECTOR_CONFIG} \
 """
 
 
-rule calo_pid:
+rule calo_pid_input_list:
     input:
         electrons=expand(
-            "sim_output/calo_pid/{{DETECTOR_CONFIG}}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX:04d}.eicrecon.tree.edm4eic.root",
-            PARTICLE=["e-"],
-            ENERGY=["100MeVto20GeV"],
-            PHASE_SPACE=["130to177deg"],
-            INDEX=range(100),
-        ),
-        pions=expand(
-            "sim_output/calo_pid/{{DETECTOR_CONFIG}}/{PARTICLE}/{ENERGY}/{PHASE_SPACE}/{PARTICLE}_{ENERGY}_{PHASE_SPACE}.{INDEX:04d}.eicrecon.tree.edm4eic.root",
-            PARTICLE=["pi-"],
+            "sim_output/calo_pid/{{DETECTOR_CONFIG}}/{{PARTICLE}}/{ENERGY}/{PHASE_SPACE}/{{PARTICLE}}_{ENERGY}_{PHASE_SPACE}.{INDEX:04d}.eicrecon.tree.edm4eic.root",
             ENERGY=["100MeVto20GeV"],
             PHASE_SPACE=["130to177deg"],
             INDEX=range(100),
         ),
+    output:
+        "listing/calo_pid/{DETECTOR_CONFIG}/{PARTICLE}.lst",
+    run:
+        with open(output[0], "wt") as fp:
+            fp.write("\n".join(input))
+
+
+rule calo_pid:
+    input:
+        electrons="listing/calo_pid/{DETECTOR_CONFIG}/e-.lst",
+        pions="listing/calo_pid/{DETECTOR_CONFIG}/pi-.lst",
         matplotlibrc=".matplotlibrc",
         script="benchmarks/calo_pid/calo_pid.py",
     output:
diff --git a/benchmarks/calo_pid/calo_pid.org b/benchmarks/calo_pid/calo_pid.org
index a46aec02..965af140 100644
--- a/benchmarks/calo_pid/calo_pid.org
+++ b/benchmarks/calo_pid/calo_pid.org
@@ -32,8 +32,8 @@ vector.register_awkward()
 #+begin_src jupyter-python :results silent
 DETECTOR_CONFIG=os.environ.get("DETECTOR_CONFIG")
 PLOT_TITLE=os.environ.get("PLOT_TITLE")
-INPUT_PIONS=os.environ.get("INPUT_PIONS", "").split(" ")
-INPUT_ELECTRONS=os.environ.get("INPUT_ELECTRONS", "").split(" ")
+INPUT_PIONS=os.environ.get("INPUT_PIONS")
+INPUT_ELECTRONS=os.environ.get("INPUT_ELECTRONS")
 
 output_dir=Path(os.environ.get("OUTPUT_DIR", "./"))
 output_dir.mkdir(parents=True, exist_ok=True)
@@ -75,8 +75,13 @@ def filter_pointing(events):
     cond = (part_momentum.eta[:,0] > -3.5) & (part_momentum.eta[:,0] < -2.)
     return events[cond]
 
-e = filter_pointing(uproot.concatenate({filename: "events" for filename in INPUT_ELECTRONS}, filter_name=["MCParticles.*", "*EcalEndcapN*"]))
-pi = filter_pointing(uproot.concatenate({filename: "events" for filename in INPUT_PIONS}, filter_name=["MCParticles.*", "*EcalEndcapN*"]))
+def readlist(path):
+    with open(path, "rt") as fp:
+        paths = [line.rstrip() for line in fp.readlines()]
+    return paths
+
+e = filter_pointing(uproot.concatenate({filename: "events" for filename in readlist(INPUT_ELECTRONS)}, filter_name=["MCParticles.*", "*EcalEndcapN*"]))
+pi = filter_pointing(uproot.concatenate({filename: "events" for filename in readlist(INPUT_PIONS)}, filter_name=["MCParticles.*", "*EcalEndcapN*"]))
 
 e_train = e[:len(pi)//2]
 pi_train = pi[:len(pi)//2]
-- 
GitLab