First go at full workflow (pending other PRs)

4646a89c · Simon Gardner · c5f076c8 · 4646a89c · 4646a89c · 4646a89c
Commit 4646a89c authored 6 months ago by Simon Gardner
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -125,62 +125,64 @@ get_data:
      - runner_system_failure
 include: 
-  - local: 'benchmarks/backgrounds/config.yml'
+  # - local: 'benchmarks/backgrounds/config.yml'
-  - local: 'benchmarks/backwards_ecal/config.yml'
+  # - local: 'benchmarks/backwards_ecal/config.yml'
-  - local: 'benchmarks/calo_pid/config.yml'
+  # - local: 'benchmarks/calo_pid/config.yml'
-  - local: 'benchmarks/ecal_gaps/config.yml'
+  # - local: 'benchmarks/ecal_gaps/config.yml'
-  - local: 'benchmarks/tracking_detectors/config.yml'
+  # - local: 'benchmarks/tracking_detectors/config.yml'
-  - local: 'benchmarks/tracking_performances/config.yml'
+  # - local: 'benchmarks/tracking_performances/config.yml'
-  - local: 'benchmarks/tracking_performances_dis/config.yml'
+  # - local: 'benchmarks/tracking_performances_dis/config.yml'
-  - local: 'benchmarks/barrel_ecal/config.yml'
+  # - local: 'benchmarks/barrel_ecal/config.yml'
-  - local: 'benchmarks/barrel_hcal/config.yml'
+  # - local: 'benchmarks/barrel_hcal/config.yml'
-  - local: 'benchmarks/lfhcal/config.yml'
+  # - local: 'benchmarks/lfhcal/config.yml'
-  - local: 'benchmarks/zdc/config.yml'
+  # - local: 'benchmarks/zdc/config.yml'
-  - local: 'benchmarks/zdc_lyso/config.yml'
+  # - local: 'benchmarks/zdc_lyso/config.yml'
-  - local: 'benchmarks/zdc_neutron/config.yml'
+  # - local: 'benchmarks/zdc_neutron/config.yml'
-  - local: 'benchmarks/zdc_photon/config.yml'
+  # - local: 'benchmarks/zdc_photon/config.yml'
-  - local: 'benchmarks/zdc_pi0/config.yml'
+  # - local: 'benchmarks/zdc_pi0/config.yml'
-  - local: 'benchmarks/material_scan/config.yml'
+  # - local: 'benchmarks/material_scan/config.yml'
-  - local: 'benchmarks/pid/config.yml'
+  # - local: 'benchmarks/pid/config.yml'
-  - local: 'benchmarks/timing/config.yml'
+  # - local: 'benchmarks/timing/config.yml'
-  - local: 'benchmarks/b0_tracker/config.yml'
+  # - local: 'benchmarks/b0_tracker/config.yml'
-  - local: 'benchmarks/insert_muon/config.yml'
+  # - local: 'benchmarks/insert_muon/config.yml'
-  - local: 'benchmarks/insert_tau/config.yml'
+  # - local: 'benchmarks/insert_tau/config.yml'
-  - local: 'benchmarks/zdc_sigma/config.yml'
+  # - local: 'benchmarks/zdc_sigma/config.yml'
-  - local: 'benchmarks/zdc_lambda/config.yml'
+  # - local: 'benchmarks/zdc_lambda/config.yml'
-  - local: 'benchmarks/insert_neutron/config.yml'
+  # - local: 'benchmarks/insert_neutron/config.yml'
-  - local: 'benchmarks/femc_electron/config.yml'
+  # - local: 'benchmarks/femc_electron/config.yml'
-  - local: 'benchmarks/femc_photon/config.yml'
+  # - local: 'benchmarks/femc_photon/config.yml'
-  - local: 'benchmarks/femc_pi0/config.yml'
+  # - local: 'benchmarks/femc_pi0/config.yml'
+  - local: 'benchmarks/lowq2/reconstruction_training/config.yml'
 deploy_results:
  allow_failure: true
  stage: deploy
  needs:
-    - "collect_results:backgrounds"
+    # - "collect_results:backgrounds"
-    - "collect_results:backwards_ecal"
+    # - "collect_results:backwards_ecal"
-    - "collect_results:barrel_ecal"
+    # - "collect_results:barrel_ecal"
-    - "collect_results:barrel_hcal"
+    # - "collect_results:barrel_hcal"
-    - "collect_results:calo_pid"
+    # - "collect_results:calo_pid"
-    - "collect_results:ecal_gaps"
+    # - "collect_results:ecal_gaps"
-    - "collect_results:lfhcal"
+    # - "collect_results:lfhcal"
-    - "collect_results:material_scan"
+    # - "collect_results:material_scan"
-    - "collect_results:pid"
+    # - "collect_results:pid"
-    - "collect_results:tracking_performance"
+    # - "collect_results:tracking_performance"
-    - "collect_results:tracking_performance_campaigns"
+    # - "collect_results:tracking_performance_campaigns"
-    - "collect_results:zdc_sigma"
+    # - "collect_results:zdc_sigma"
-    - "collect_results:zdc_lambda"
+    # - "collect_results:zdc_lambda"
-    - "collect_results:insert_neutron"
+    # - "collect_results:insert_neutron"
-    - "collect_results:tracking_performances_dis"
+    # - "collect_results:tracking_performances_dis"
-    - "collect_results:zdc"
+    # - "collect_results:zdc"
-    - "collect_results:zdc_lyso"
+    # - "collect_results:zdc_lyso"
-    - "collect_results:zdc_neutron"
+    # - "collect_results:zdc_neutron"
-    - "collect_results:insert_muon"
+    # - "collect_results:insert_muon"
-    - "collect_results:insert_tau"
+    # - "collect_results:insert_tau"
-    - "collect_results:zdc_photon"
+    # - "collect_results:zdc_photon"
-    - "collect_results:zdc_pi0"
+    # - "collect_results:zdc_pi0"
-    - "collect_results:femc_electron"
+    # - "collect_results:femc_electron"
-    - "collect_results:femc_photon"
+    # - "collect_results:femc_photon"
-    - "collect_results:femc_pi0"
+    # - "collect_results:femc_pi0"
+    - "collect_results:lowq2_reconstruction_training"
  script:
    - snakemake $SNAKEMAKE_FLAGS --cores 1 results/metadata.json
    - find results -print | sort | tee summary.txt

--- a/benchmarks/lowq2/reconstruction_training/Snakefile
+++ b/benchmarks/lowq2/reconstruction_training/Snakefile
+import os
+# Get the LOWQ2_DATADIR environment variable, or use a default value if it is not set
+outputdir  = os.getenv("LOWQ2_DATADIR", "")
+resultsdir = os.getenv("LOWQ2_RESULTSDIR", "")
 # Creates or copies the feature and target tensors for the lowq2 reconstruction training into a new file.
 rule lowq2_tensor_recon:
    output:
-        "tensors.eicrecon.tree.edm4eic.root",
+        f"{outputdir}tensors.eicrecon.tree.edm4eic.root",
    log:
-        "tensors.eicrecon.tree.edm4eic.root.log",
+        f"{outputdir}tensors.eicrecon.tree.edm4eic.root.log",
    params:
-        input=expand("root://dtn-eic.jlab.org//work/eic2/EPIC/RECO/24.12.0/epic_craterlake/SIDIS/pythia6-eic/1.0.0/10x100/q2_0to1/pythia_ep_noradcor_10x100_q2_0.000000001_1.0_run{run}.ab.{number:04d}.eicrecon.tree.edm4eic.root", run=range(1,3), number=range(0, 20)),
+        input=expand("root://dtn-eic.jlab.org//work/eic2/EPIC/RECO/24.12.0/epic_craterlake/SIDIS/pythia6-eic/1.0.0/10x100/q2_0to1/pythia_ep_noradcor_10x100_q2_0.000000001_1.0_run{run}.ab.{number:04d}.eicrecon.tree.edm4eic.root", run=range(1,2), number=range(0, 1)),
        compact_xml="epic_lowq2.xml",
    shell:  
        """
@@ -18,10 +24,10 @@ rule lowq2_tensor_recon:
 # Trains a regression model to predict the TaggerTrackerTargetTensor from the TaggerTrackerFeatureTensor.
 rule lowq2_reconstruction_training:
    input:
-        data="tensors.eicrecon.tree.edm4eic.root",
+        data=f"{outputdir}tensors.eicrecon.tree.edm4eic.root",
        script="TaggerRegression.py",
    output:
-        "TestTaggerRegression.onnx",
+        f"{outputdir}TestTaggerTrackerTransportation.onnx",
    shell:
        """
        python {input.script} --dataFiles {input.data} --outModelFile {output}
@@ -30,9 +36,9 @@ rule lowq2_reconstruction_training:
 # Runs the inference with the default model.
 rule lowq2_reconstruction_inference:
    input:
-        data="tensors.eicrecon.tree.edm4eic.root",
+        data=f"{outputdir}tensors.eicrecon.tree.edm4eic.root",
    output:
-        "TaggerInference.eicrecon.tree.edm4eic.root",
+        f"{outputdir}TaggerInference.eicrecon.tree.edm4eic.root",
    params:
        compact_xml="epic_lowq2.xml",
    shell:
@@ -43,13 +49,13 @@ rule lowq2_reconstruction_inference:
        -Pdd4hep:xml_files={params.compact_xml} \
        """
-# Runs the inference with the new model.
+# Check inference runs with the new model.
 rule lowq2_reconstruction_new_inference:
    input:
-        data="tensors.eicrecon.tree.edm4eic.root",
+        data=f"{outputdir}tensors.eicrecon.tree.edm4eic.root",
-        model="TestTaggerRegression.onnx",
+        model=f"{outputdir}TestTaggerTrackerTransportation.onnx",
    output:
-        "TaggerInferenceNew.eicrecon.tree.edm4eic.root",
+        f"{outputdir}TaggerInferenceNew.eicrecon.tree.edm4eic.root",
    params:
        compact_xml="epic_lowq2.xml",
    shell:
@@ -61,3 +67,29 @@ rule lowq2_reconstruction_new_inference:
        -PLOWQ2:TaggerTrackerTransportationInference:modelPath={input.model} \
        -Peicrecon:LogLevel=error \
        """
+# Create plots showing the performance of a model.
+rule lowq2_reconstruction_new_plot:
+    input:
+        data=f"{outputdir}tensors.eicrecon.tree.edm4eic.root",
+        model=lambda wildcards: f"{outputdir}{wildcards.model}.onnx",
+    output:
+        directory(expand(f"{resultsdir}{{model}}", model="{model}")),
+    shell:
+        """
+        mkdir -p {output}
+        python TestModel.py --dataFiles {input.data} --modelFile {input.model} --outDir {output}
+        """
+# Create plots showing the performance of a model.
+rule lowq2_reconstruction_old_plot:
+    input:
+        data=f"{outputdir}tensors.eicrecon.tree.edm4eic.root",
+        model=lambda wildcards: f"calibrations/onnx/{wildcards.model}.onnx",
+    output:
+        directory(expand(f"{resultsdir}{{model}}", model="{model}")),
+    shell:
+        """
+        mkdir -p {output}
+        python TestModel.py --dataFiles {input.data} --modelFile {input.model} --outDir {output}
+        """
\ No newline at end of file
--- a/benchmarks/lowq2/reconstruction_training/TaggerRegression.py
+++ b/benchmarks/lowq2/reconstruction_training/TaggerRegression.py
@@ -38,7 +38,7 @@ val_loader   = DataLoader(val_dataset,   batch_size=batch_size, shuffle=False)
 print(f"Training data: {len(train_input_data)} samples")
-epochs = 100
+epochs = 1000
 model  = trainModel(epochs, train_loader, val_loader)
 # Save the trained model to ONNX format

--- a/benchmarks/lowq2/reconstruction_training/TestModel.py
+++ b/benchmarks/lowq2/reconstruction_training/TestModel.py
+import onnxruntime as ort
+import argparse
+import numpy as np
+from ProcessData import create_arrays
+import matplotlib.pyplot as plt
+from matplotlib.colors import LogNorm
+from scipy.stats import norm
+from scipy.optimize import curve_fit
+# Parse arguments
+parser = argparse.ArgumentParser(description='Train a regression model for the Tagger.')
+parser.add_argument('--modelFile', type=str, default="regression_model.onnx", help='Path to the ONNX model file')
+parser.add_argument('--dataFiles', type=str, nargs='+', help='Path to the data files')
+parser.add_argument('--outDir', type=str, default=".", help='Output directory')
+args = parser.parse_args()
+modelFile     = args.modelFile
+dataFiles     = args.dataFiles
+outDir        = args.outDir
+outGraphFile  = outDir + "/output_vs_target.png"
+outGraphFile2 = outDir + "/output_vs_target2.png"
+outGraphFile3 = outDir + "/output_vs_target3.png"
+outGraphFile4 = outDir + "/output_vs_target4.png"
+input_data, target_data = create_arrays(dataFiles)
+target_data = np.array(target_data)
+# Load the ONNX model
+session = ort.InferenceSession(modelFile)
+# Run the model on the input data
+input_name = session.get_inputs()[0].name
+output_name = session.get_outputs()[0].name
+input_data = np.array(input_data,dtype=np.float32)
+output = session.run([output_name], {input_name: input_data})
+output = np.array(output[0])
+out_theta = np.arctan2(np.sqrt(output[:,0]**2 + output[:,1]**2),output[:,2])
+out_phi = np.arctan2(output[:,1],output[:,0])
+out_mag = np.sqrt(output[:,0]**2 + output[:,1]**2 + output[:,2]**2)
+in_theta = np.arctan2(np.sqrt(target_data[:,0]**2 + target_data[:,1]**2),target_data[:,2])
+in_phi = np.arctan2(target_data[:,1],target_data[:,0])
+in_mag = np.sqrt(target_data[:,0]**2 + target_data[:,1]**2 + target_data[:,2]**2)
+thetadiff = out_theta - in_theta
+phidiff = out_phi - in_phi
+# Move phidiff to within -pi/2 and pi/2
+phidiff = (phidiff + np.pi) % (2 * np.pi) - np.pi
+magdiff = out_mag - in_mag
+diff = (target_data - output)/target_data
+diffrange = [[-5,5],[-5,5],[-0.5,0.5]]
+datarange = [[-0.02,0.02],[-0.02,0.02],[-1,0]]
+# Use the 'seismic' colormap
+cmap = plt.get_cmap('seismic')
+# Creates histograms to compare the target and output data
+fig, axs = plt.subplots(3, 3, figsize=(12, 12))
+for i in range(3):
+    # 2D histograms showing trends in the data
+    axs[0,i].hist2d(target_data[:,i], output[:,i], bins=100, range=[datarange[i],datarange[i]], cmap="seismic", norm=LogNorm(), label="Output vs Target")
+    axs[0,i].set_xlabel(f"Variable {i} Target")
+    axs[0,i].set_ylabel(f"Variable {i} Output")
+    axs[1,i].hist(diff[:,i], bins=100, alpha=0.5, range=diffrange[i], label="Difference")
+    axs[1,i].set_xlabel(f"Variable {i} Difference")
+    axs[1,i].set_ylabel("Counts")
+    axs[2,i].hist2d(target_data[:,i], diff[:,i], bins=100, range=[datarange[i],diffrange[i]], cmap="seismic", norm=LogNorm(), label="Difference vs Target")
+    axs[2,i].set_xlabel(f"Variable {i} Target")
+    axs[2,i].set_ylabel(f"Variable {i} Difference")
+plt.show()
+plt.savefig(outGraphFile)
+# Creates histograms to compare theta, phi and mag target and output data
+fig2, axs2 = plt.subplots(3, 3, figsize=(12, 12))
+thetarange = [np.pi-0.01,np.pi]
+phirange = [-np.pi,np.pi]
+magrange = [0,1]
+thetadiffrange = [-0.01,0.01]
+phidiffrange = [-np.pi,np.pi]
+magdiffrange = [-0.1,0.1]
+# 2D histograms showing trends in the data
+axs2[0,0].hist2d(out_theta, in_theta, bins=100, range=[thetarange,thetarange], cmap="seismic", norm=LogNorm(), label="Output vs Target")
+axs2[0,0].set_xlabel("Theta Target")
+axs2[0,0].set_ylabel("Theta Output")
+axs2[0,1].hist2d(out_phi, in_phi, bins=100, range=[phirange,phirange], cmap="seismic", label="Output vs Target")
+axs2[0,1].set_xlabel("Phi Target")
+axs2[0,1].set_ylabel("Phi Output")
+axs2[0,2].hist2d(out_mag, in_mag, bins=100, range=[magrange,magrange], cmap="seismic", norm=LogNorm(), label="Output vs Target")
+axs2[0,2].set_xlabel("Mag Target")
+axs2[0,2].set_ylabel("Mag Output")
+axs2[1,0].hist(thetadiff, bins=100, alpha=0.5, range=thetadiffrange, label="Difference")
+axs2[1,0].set_xlabel("Theta Difference")
+axs2[1,0].set_ylabel("Counts")
+axs2[1,1].hist(phidiff, bins=100, alpha=0.5, range=phidiffrange, label="Difference")
+axs2[1,1].set_xlabel("Phi Difference")
+axs2[1,1].set_ylabel("Counts")
+axs2[1,2].hist(magdiff, bins=100, alpha=0.5, range=magdiffrange, label="Difference")
+axs2[1,2].set_xlabel("Mag Difference")
+axs2[1,2].set_ylabel("Counts")
+axs2[2,0].hist2d(in_theta, thetadiff, bins=100, range=[thetarange,thetadiffrange], cmap="seismic", norm=LogNorm(), label="Difference vs Target")
+axs2[2,0].set_xlabel("Theta Target")
+axs2[2,0].set_ylabel("Theta Difference")
+axs2[2,1].hist2d(in_phi, phidiff, bins=100, range=[phirange,phidiffrange], cmap="seismic", label="Difference vs Target")
+axs2[2,1].set_xlabel("Phi Target")
+axs2[2,1].set_ylabel("Phi Difference")
+axs2[2,2].hist2d(in_mag, magdiff, bins=100, range=[magrange,magdiffrange], cmap="seismic", norm=LogNorm(), label="Difference vs Target")
+axs2[2,2].set_xlabel("Mag Target")
+axs2[2,2].set_ylabel("Mag Difference")
+plt.show()
+plt.savefig(outGraphFile2)
+# Create histograms where the theta value has been cut at less than 3.14
+fig3, axs3 = plt.subplots(3, 3, figsize=(12, 12))
+out_theta_cut = out_theta[out_theta < 3.14]
+in_theta_cut = in_theta[out_theta < 3.14]
+thetadiff_cut = thetadiff[out_theta < 3.14]
+out_phi_cut = out_phi[out_theta < 3.14]
+in_phi_cut = in_phi[out_theta < 3.14]
+phidiff_cut = phidiff[out_theta < 3.14]
+out_mag_cut = out_mag[out_theta < 3.14]
+in_mag_cut = in_mag[out_theta < 3.14]
+magdiff_cut = magdiff[out_theta < 3.14]
+axs3[0,0].hist2d(out_theta_cut, in_theta_cut, bins=100, range=[thetarange,thetarange], cmap="seismic", norm=LogNorm(), label="Output vs Target")
+axs3[0,0].set_xlabel("Theta Target")
+axs3[0,0].set_ylabel("Theta Output")
+axs3[0,1].hist2d(out_phi_cut, in_phi_cut, bins=100, range=[phirange,phirange], cmap="seismic", label="Output vs Target")
+axs3[0,1].set_xlabel("Phi Target")
+axs3[0,1].set_ylabel("Phi Output")
+axs3[0,2].hist2d(out_mag_cut, in_mag_cut, bins=100, range=[magrange,magrange], cmap="seismic", norm=LogNorm(), label="Output vs Target")
+axs3[0,2].set_xlabel("Mag Target")
+axs3[0,2].set_ylabel("Mag Output")
+axs3[1,0].hist(thetadiff_cut, bins=100, alpha=0.5, range=thetadiffrange, label="Difference")
+axs3[1,0].set_xlabel("Theta Difference")
+axs3[1,0].set_ylabel("Counts")
+axs3[1,1].hist(phidiff_cut, bins=100, alpha=0.5, range=phidiffrange, label="Difference")
+axs3[1,1].set_xlabel("Phi Difference")
+axs3[1,1].set_ylabel("Counts")
+axs3[1,2].hist(magdiff_cut, bins=100, alpha=0.5, range=magdiffrange, label="Difference")
+axs3[1,2].set_xlabel("Mag Difference")
+axs3[1,2].set_ylabel("Counts")
+axs3[2,0].hist2d(in_theta_cut, thetadiff_cut, bins=100, range=[thetarange,thetadiffrange], cmap="seismic", norm=LogNorm(), label="Difference vs Target")    
+axs3[2,0].set_xlabel("Theta Target")
+axs3[2,0].set_ylabel("Theta Difference")
+axs3[2,1].hist2d(in_phi_cut, phidiff_cut, bins=100, range=[phirange,phidiffrange], cmap="seismic", label="Difference vs Target")
+axs3[2,1].set_xlabel("Phi Target")
+axs3[2,1].set_ylabel("Phi Difference")
+axs3[2,2].hist2d(in_mag_cut, magdiff_cut, bins=100, range=[magrange,magdiffrange], cmap="seismic", norm=LogNorm(), label="Difference vs Target")
+axs3[2,2].set_xlabel("Mag Target")
+axs3[2,2].set_ylabel("Mag Difference")
+plt.show()
+plt.savefig(outGraphFile3)
+# Create plots where a Gaussian has been fitted to the data
+# Function to fit a Gaussian and plot the results
+def plot_gaussian_fit(ax, data, range, xlabel, ylabel):
+    def gaussian(x, mu, sigma, amplitude):
+        return amplitude * np.exp(-0.5 * ((x - mu) / sigma) ** 2)
+    hist, bin_edges = np.histogram(data, bins=100, range=range, density=True)
+    bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
+    popt, _ = curve_fit(gaussian, bin_centers, hist, p0=[0, 0.01, 1])
+    mu, sigma, amplitude = popt
+    print(f"mu={mu}, sigma={sigma}, amplitude={amplitude}")
+    x = np.linspace(range[0], range[1], 100)
+    ax.plot(x, gaussian(x, *popt), 'k', linewidth=2)
+    ax.set_xlabel(xlabel)
+    ax.set_ylabel(ylabel)
+    ax.hist(data, bins=100, alpha=0.5, range=range, edgecolor='black', density=True)
+    ax.legend([f'Fit: $\mu$={mu:.5f}, $\sigma$={sigma:.5f}'])
+# Create histograms with Gaussian fits
+fig4, axs4 = plt.subplots(3, 1, figsize=(8, 12))
+plot_gaussian_fit(axs4[0], thetadiff, thetadiffrange, "Theta Difference", "Density")
+plot_gaussian_fit(axs4[1], phidiff_cut, phidiffrange, "Phi Difference", "Density")
+plot_gaussian_fit(axs4[2], magdiff, magdiffrange, "Mag Difference", "Density")
+plt.show()
+plt.savefig(outGraphFile4)
\ No newline at end of file