Skip to content
Snippets Groups Projects
Commit 4646a89c authored by Simon Gardner's avatar Simon Gardner
Browse files

First go at full workflow (pending other PRs)

parent c5f076c8
Branches
No related tags found
No related merge requests found
...@@ -125,62 +125,64 @@ get_data: ...@@ -125,62 +125,64 @@ get_data:
- runner_system_failure - runner_system_failure
include: include:
- local: 'benchmarks/backgrounds/config.yml' # - local: 'benchmarks/backgrounds/config.yml'
- local: 'benchmarks/backwards_ecal/config.yml' # - local: 'benchmarks/backwards_ecal/config.yml'
- local: 'benchmarks/calo_pid/config.yml' # - local: 'benchmarks/calo_pid/config.yml'
- local: 'benchmarks/ecal_gaps/config.yml' # - local: 'benchmarks/ecal_gaps/config.yml'
- local: 'benchmarks/tracking_detectors/config.yml' # - local: 'benchmarks/tracking_detectors/config.yml'
- local: 'benchmarks/tracking_performances/config.yml' # - local: 'benchmarks/tracking_performances/config.yml'
- local: 'benchmarks/tracking_performances_dis/config.yml' # - local: 'benchmarks/tracking_performances_dis/config.yml'
- local: 'benchmarks/barrel_ecal/config.yml' # - local: 'benchmarks/barrel_ecal/config.yml'
- local: 'benchmarks/barrel_hcal/config.yml' # - local: 'benchmarks/barrel_hcal/config.yml'
- local: 'benchmarks/lfhcal/config.yml' # - local: 'benchmarks/lfhcal/config.yml'
- local: 'benchmarks/zdc/config.yml' # - local: 'benchmarks/zdc/config.yml'
- local: 'benchmarks/zdc_lyso/config.yml' # - local: 'benchmarks/zdc_lyso/config.yml'
- local: 'benchmarks/zdc_neutron/config.yml' # - local: 'benchmarks/zdc_neutron/config.yml'
- local: 'benchmarks/zdc_photon/config.yml' # - local: 'benchmarks/zdc_photon/config.yml'
- local: 'benchmarks/zdc_pi0/config.yml' # - local: 'benchmarks/zdc_pi0/config.yml'
- local: 'benchmarks/material_scan/config.yml' # - local: 'benchmarks/material_scan/config.yml'
- local: 'benchmarks/pid/config.yml' # - local: 'benchmarks/pid/config.yml'
- local: 'benchmarks/timing/config.yml' # - local: 'benchmarks/timing/config.yml'
- local: 'benchmarks/b0_tracker/config.yml' # - local: 'benchmarks/b0_tracker/config.yml'
- local: 'benchmarks/insert_muon/config.yml' # - local: 'benchmarks/insert_muon/config.yml'
- local: 'benchmarks/insert_tau/config.yml' # - local: 'benchmarks/insert_tau/config.yml'
- local: 'benchmarks/zdc_sigma/config.yml' # - local: 'benchmarks/zdc_sigma/config.yml'
- local: 'benchmarks/zdc_lambda/config.yml' # - local: 'benchmarks/zdc_lambda/config.yml'
- local: 'benchmarks/insert_neutron/config.yml' # - local: 'benchmarks/insert_neutron/config.yml'
- local: 'benchmarks/femc_electron/config.yml' # - local: 'benchmarks/femc_electron/config.yml'
- local: 'benchmarks/femc_photon/config.yml' # - local: 'benchmarks/femc_photon/config.yml'
- local: 'benchmarks/femc_pi0/config.yml' # - local: 'benchmarks/femc_pi0/config.yml'
- local: 'benchmarks/lowq2/reconstruction_training/config.yml'
deploy_results: deploy_results:
allow_failure: true allow_failure: true
stage: deploy stage: deploy
needs: needs:
- "collect_results:backgrounds" # - "collect_results:backgrounds"
- "collect_results:backwards_ecal" # - "collect_results:backwards_ecal"
- "collect_results:barrel_ecal" # - "collect_results:barrel_ecal"
- "collect_results:barrel_hcal" # - "collect_results:barrel_hcal"
- "collect_results:calo_pid" # - "collect_results:calo_pid"
- "collect_results:ecal_gaps" # - "collect_results:ecal_gaps"
- "collect_results:lfhcal" # - "collect_results:lfhcal"
- "collect_results:material_scan" # - "collect_results:material_scan"
- "collect_results:pid" # - "collect_results:pid"
- "collect_results:tracking_performance" # - "collect_results:tracking_performance"
- "collect_results:tracking_performance_campaigns" # - "collect_results:tracking_performance_campaigns"
- "collect_results:zdc_sigma" # - "collect_results:zdc_sigma"
- "collect_results:zdc_lambda" # - "collect_results:zdc_lambda"
- "collect_results:insert_neutron" # - "collect_results:insert_neutron"
- "collect_results:tracking_performances_dis" # - "collect_results:tracking_performances_dis"
- "collect_results:zdc" # - "collect_results:zdc"
- "collect_results:zdc_lyso" # - "collect_results:zdc_lyso"
- "collect_results:zdc_neutron" # - "collect_results:zdc_neutron"
- "collect_results:insert_muon" # - "collect_results:insert_muon"
- "collect_results:insert_tau" # - "collect_results:insert_tau"
- "collect_results:zdc_photon" # - "collect_results:zdc_photon"
- "collect_results:zdc_pi0" # - "collect_results:zdc_pi0"
- "collect_results:femc_electron" # - "collect_results:femc_electron"
- "collect_results:femc_photon" # - "collect_results:femc_photon"
- "collect_results:femc_pi0" # - "collect_results:femc_pi0"
- "collect_results:lowq2_reconstruction_training"
script: script:
- snakemake $SNAKEMAKE_FLAGS --cores 1 results/metadata.json - snakemake $SNAKEMAKE_FLAGS --cores 1 results/metadata.json
- find results -print | sort | tee summary.txt - find results -print | sort | tee summary.txt
......
import os
# Get the LOWQ2_DATADIR environment variable, or use a default value if it is not set
outputdir = os.getenv("LOWQ2_DATADIR", "")
resultsdir = os.getenv("LOWQ2_RESULTSDIR", "")
# Creates or copies the feature and target tensors for the lowq2 reconstruction training into a new file. # Creates or copies the feature and target tensors for the lowq2 reconstruction training into a new file.
rule lowq2_tensor_recon: rule lowq2_tensor_recon:
output: output:
"tensors.eicrecon.tree.edm4eic.root", f"{outputdir}tensors.eicrecon.tree.edm4eic.root",
log: log:
"tensors.eicrecon.tree.edm4eic.root.log", f"{outputdir}tensors.eicrecon.tree.edm4eic.root.log",
params: params:
input=expand("root://dtn-eic.jlab.org//work/eic2/EPIC/RECO/24.12.0/epic_craterlake/SIDIS/pythia6-eic/1.0.0/10x100/q2_0to1/pythia_ep_noradcor_10x100_q2_0.000000001_1.0_run{run}.ab.{number:04d}.eicrecon.tree.edm4eic.root", run=range(1,3), number=range(0, 20)), input=expand("root://dtn-eic.jlab.org//work/eic2/EPIC/RECO/24.12.0/epic_craterlake/SIDIS/pythia6-eic/1.0.0/10x100/q2_0to1/pythia_ep_noradcor_10x100_q2_0.000000001_1.0_run{run}.ab.{number:04d}.eicrecon.tree.edm4eic.root", run=range(1,2), number=range(0, 1)),
compact_xml="epic_lowq2.xml", compact_xml="epic_lowq2.xml",
shell: shell:
""" """
...@@ -18,10 +24,10 @@ rule lowq2_tensor_recon: ...@@ -18,10 +24,10 @@ rule lowq2_tensor_recon:
# Trains a regression model to predict the TaggerTrackerTargetTensor from the TaggerTrackerFeatureTensor. # Trains a regression model to predict the TaggerTrackerTargetTensor from the TaggerTrackerFeatureTensor.
rule lowq2_reconstruction_training: rule lowq2_reconstruction_training:
input: input:
data="tensors.eicrecon.tree.edm4eic.root", data=f"{outputdir}tensors.eicrecon.tree.edm4eic.root",
script="TaggerRegression.py", script="TaggerRegression.py",
output: output:
"TestTaggerRegression.onnx", f"{outputdir}TestTaggerTrackerTransportation.onnx",
shell: shell:
""" """
python {input.script} --dataFiles {input.data} --outModelFile {output} python {input.script} --dataFiles {input.data} --outModelFile {output}
...@@ -30,9 +36,9 @@ rule lowq2_reconstruction_training: ...@@ -30,9 +36,9 @@ rule lowq2_reconstruction_training:
# Runs the inference with the default model. # Runs the inference with the default model.
rule lowq2_reconstruction_inference: rule lowq2_reconstruction_inference:
input: input:
data="tensors.eicrecon.tree.edm4eic.root", data=f"{outputdir}tensors.eicrecon.tree.edm4eic.root",
output: output:
"TaggerInference.eicrecon.tree.edm4eic.root", f"{outputdir}TaggerInference.eicrecon.tree.edm4eic.root",
params: params:
compact_xml="epic_lowq2.xml", compact_xml="epic_lowq2.xml",
shell: shell:
...@@ -43,13 +49,13 @@ rule lowq2_reconstruction_inference: ...@@ -43,13 +49,13 @@ rule lowq2_reconstruction_inference:
-Pdd4hep:xml_files={params.compact_xml} \ -Pdd4hep:xml_files={params.compact_xml} \
""" """
# Runs the inference with the new model. # Check inference runs with the new model.
rule lowq2_reconstruction_new_inference: rule lowq2_reconstruction_new_inference:
input: input:
data="tensors.eicrecon.tree.edm4eic.root", data=f"{outputdir}tensors.eicrecon.tree.edm4eic.root",
model="TestTaggerRegression.onnx", model=f"{outputdir}TestTaggerTrackerTransportation.onnx",
output: output:
"TaggerInferenceNew.eicrecon.tree.edm4eic.root", f"{outputdir}TaggerInferenceNew.eicrecon.tree.edm4eic.root",
params: params:
compact_xml="epic_lowq2.xml", compact_xml="epic_lowq2.xml",
shell: shell:
...@@ -61,3 +67,29 @@ rule lowq2_reconstruction_new_inference: ...@@ -61,3 +67,29 @@ rule lowq2_reconstruction_new_inference:
-PLOWQ2:TaggerTrackerTransportationInference:modelPath={input.model} \ -PLOWQ2:TaggerTrackerTransportationInference:modelPath={input.model} \
-Peicrecon:LogLevel=error \ -Peicrecon:LogLevel=error \
""" """
# Create plots showing the performance of a model.
rule lowq2_reconstruction_new_plot:
input:
data=f"{outputdir}tensors.eicrecon.tree.edm4eic.root",
model=lambda wildcards: f"{outputdir}{wildcards.model}.onnx",
output:
directory(expand(f"{resultsdir}{{model}}", model="{model}")),
shell:
"""
mkdir -p {output}
python TestModel.py --dataFiles {input.data} --modelFile {input.model} --outDir {output}
"""
# Create plots showing the performance of a model.
rule lowq2_reconstruction_old_plot:
input:
data=f"{outputdir}tensors.eicrecon.tree.edm4eic.root",
model=lambda wildcards: f"calibrations/onnx/{wildcards.model}.onnx",
output:
directory(expand(f"{resultsdir}{{model}}", model="{model}")),
shell:
"""
mkdir -p {output}
python TestModel.py --dataFiles {input.data} --modelFile {input.model} --outDir {output}
"""
\ No newline at end of file
...@@ -38,7 +38,7 @@ val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False) ...@@ -38,7 +38,7 @@ val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
print(f"Training data: {len(train_input_data)} samples") print(f"Training data: {len(train_input_data)} samples")
epochs = 100 epochs = 1000
model = trainModel(epochs, train_loader, val_loader) model = trainModel(epochs, train_loader, val_loader)
# Save the trained model to ONNX format # Save the trained model to ONNX format
......
import onnxruntime as ort
import argparse
import numpy as np
from ProcessData import create_arrays
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
from scipy.stats import norm
from scipy.optimize import curve_fit
# Parse arguments
parser = argparse.ArgumentParser(description='Train a regression model for the Tagger.')
parser.add_argument('--modelFile', type=str, default="regression_model.onnx", help='Path to the ONNX model file')
parser.add_argument('--dataFiles', type=str, nargs='+', help='Path to the data files')
parser.add_argument('--outDir', type=str, default=".", help='Output directory')
args = parser.parse_args()
modelFile = args.modelFile
dataFiles = args.dataFiles
outDir = args.outDir
outGraphFile = outDir + "/output_vs_target.png"
outGraphFile2 = outDir + "/output_vs_target2.png"
outGraphFile3 = outDir + "/output_vs_target3.png"
outGraphFile4 = outDir + "/output_vs_target4.png"
input_data, target_data = create_arrays(dataFiles)
target_data = np.array(target_data)
# Load the ONNX model
session = ort.InferenceSession(modelFile)
# Run the model on the input data
input_name = session.get_inputs()[0].name
output_name = session.get_outputs()[0].name
input_data = np.array(input_data,dtype=np.float32)
output = session.run([output_name], {input_name: input_data})
output = np.array(output[0])
out_theta = np.arctan2(np.sqrt(output[:,0]**2 + output[:,1]**2),output[:,2])
out_phi = np.arctan2(output[:,1],output[:,0])
out_mag = np.sqrt(output[:,0]**2 + output[:,1]**2 + output[:,2]**2)
in_theta = np.arctan2(np.sqrt(target_data[:,0]**2 + target_data[:,1]**2),target_data[:,2])
in_phi = np.arctan2(target_data[:,1],target_data[:,0])
in_mag = np.sqrt(target_data[:,0]**2 + target_data[:,1]**2 + target_data[:,2]**2)
thetadiff = out_theta - in_theta
phidiff = out_phi - in_phi
# Move phidiff to within -pi/2 and pi/2
phidiff = (phidiff + np.pi) % (2 * np.pi) - np.pi
magdiff = out_mag - in_mag
diff = (target_data - output)/target_data
diffrange = [[-5,5],[-5,5],[-0.5,0.5]]
datarange = [[-0.02,0.02],[-0.02,0.02],[-1,0]]
# Use the 'seismic' colormap
cmap = plt.get_cmap('seismic')
# Creates histograms to compare the target and output data
fig, axs = plt.subplots(3, 3, figsize=(12, 12))
for i in range(3):
# 2D histograms showing trends in the data
axs[0,i].hist2d(target_data[:,i], output[:,i], bins=100, range=[datarange[i],datarange[i]], cmap="seismic", norm=LogNorm(), label="Output vs Target")
axs[0,i].set_xlabel(f"Variable {i} Target")
axs[0,i].set_ylabel(f"Variable {i} Output")
axs[1,i].hist(diff[:,i], bins=100, alpha=0.5, range=diffrange[i], label="Difference")
axs[1,i].set_xlabel(f"Variable {i} Difference")
axs[1,i].set_ylabel("Counts")
axs[2,i].hist2d(target_data[:,i], diff[:,i], bins=100, range=[datarange[i],diffrange[i]], cmap="seismic", norm=LogNorm(), label="Difference vs Target")
axs[2,i].set_xlabel(f"Variable {i} Target")
axs[2,i].set_ylabel(f"Variable {i} Difference")
plt.show()
plt.savefig(outGraphFile)
# Creates histograms to compare theta, phi and mag target and output data
fig2, axs2 = plt.subplots(3, 3, figsize=(12, 12))
thetarange = [np.pi-0.01,np.pi]
phirange = [-np.pi,np.pi]
magrange = [0,1]
thetadiffrange = [-0.01,0.01]
phidiffrange = [-np.pi,np.pi]
magdiffrange = [-0.1,0.1]
# 2D histograms showing trends in the data
axs2[0,0].hist2d(out_theta, in_theta, bins=100, range=[thetarange,thetarange], cmap="seismic", norm=LogNorm(), label="Output vs Target")
axs2[0,0].set_xlabel("Theta Target")
axs2[0,0].set_ylabel("Theta Output")
axs2[0,1].hist2d(out_phi, in_phi, bins=100, range=[phirange,phirange], cmap="seismic", label="Output vs Target")
axs2[0,1].set_xlabel("Phi Target")
axs2[0,1].set_ylabel("Phi Output")
axs2[0,2].hist2d(out_mag, in_mag, bins=100, range=[magrange,magrange], cmap="seismic", norm=LogNorm(), label="Output vs Target")
axs2[0,2].set_xlabel("Mag Target")
axs2[0,2].set_ylabel("Mag Output")
axs2[1,0].hist(thetadiff, bins=100, alpha=0.5, range=thetadiffrange, label="Difference")
axs2[1,0].set_xlabel("Theta Difference")
axs2[1,0].set_ylabel("Counts")
axs2[1,1].hist(phidiff, bins=100, alpha=0.5, range=phidiffrange, label="Difference")
axs2[1,1].set_xlabel("Phi Difference")
axs2[1,1].set_ylabel("Counts")
axs2[1,2].hist(magdiff, bins=100, alpha=0.5, range=magdiffrange, label="Difference")
axs2[1,2].set_xlabel("Mag Difference")
axs2[1,2].set_ylabel("Counts")
axs2[2,0].hist2d(in_theta, thetadiff, bins=100, range=[thetarange,thetadiffrange], cmap="seismic", norm=LogNorm(), label="Difference vs Target")
axs2[2,0].set_xlabel("Theta Target")
axs2[2,0].set_ylabel("Theta Difference")
axs2[2,1].hist2d(in_phi, phidiff, bins=100, range=[phirange,phidiffrange], cmap="seismic", label="Difference vs Target")
axs2[2,1].set_xlabel("Phi Target")
axs2[2,1].set_ylabel("Phi Difference")
axs2[2,2].hist2d(in_mag, magdiff, bins=100, range=[magrange,magdiffrange], cmap="seismic", norm=LogNorm(), label="Difference vs Target")
axs2[2,2].set_xlabel("Mag Target")
axs2[2,2].set_ylabel("Mag Difference")
plt.show()
plt.savefig(outGraphFile2)
# Create histograms where the theta value has been cut at less than 3.14
fig3, axs3 = plt.subplots(3, 3, figsize=(12, 12))
out_theta_cut = out_theta[out_theta < 3.14]
in_theta_cut = in_theta[out_theta < 3.14]
thetadiff_cut = thetadiff[out_theta < 3.14]
out_phi_cut = out_phi[out_theta < 3.14]
in_phi_cut = in_phi[out_theta < 3.14]
phidiff_cut = phidiff[out_theta < 3.14]
out_mag_cut = out_mag[out_theta < 3.14]
in_mag_cut = in_mag[out_theta < 3.14]
magdiff_cut = magdiff[out_theta < 3.14]
axs3[0,0].hist2d(out_theta_cut, in_theta_cut, bins=100, range=[thetarange,thetarange], cmap="seismic", norm=LogNorm(), label="Output vs Target")
axs3[0,0].set_xlabel("Theta Target")
axs3[0,0].set_ylabel("Theta Output")
axs3[0,1].hist2d(out_phi_cut, in_phi_cut, bins=100, range=[phirange,phirange], cmap="seismic", label="Output vs Target")
axs3[0,1].set_xlabel("Phi Target")
axs3[0,1].set_ylabel("Phi Output")
axs3[0,2].hist2d(out_mag_cut, in_mag_cut, bins=100, range=[magrange,magrange], cmap="seismic", norm=LogNorm(), label="Output vs Target")
axs3[0,2].set_xlabel("Mag Target")
axs3[0,2].set_ylabel("Mag Output")
axs3[1,0].hist(thetadiff_cut, bins=100, alpha=0.5, range=thetadiffrange, label="Difference")
axs3[1,0].set_xlabel("Theta Difference")
axs3[1,0].set_ylabel("Counts")
axs3[1,1].hist(phidiff_cut, bins=100, alpha=0.5, range=phidiffrange, label="Difference")
axs3[1,1].set_xlabel("Phi Difference")
axs3[1,1].set_ylabel("Counts")
axs3[1,2].hist(magdiff_cut, bins=100, alpha=0.5, range=magdiffrange, label="Difference")
axs3[1,2].set_xlabel("Mag Difference")
axs3[1,2].set_ylabel("Counts")
axs3[2,0].hist2d(in_theta_cut, thetadiff_cut, bins=100, range=[thetarange,thetadiffrange], cmap="seismic", norm=LogNorm(), label="Difference vs Target")
axs3[2,0].set_xlabel("Theta Target")
axs3[2,0].set_ylabel("Theta Difference")
axs3[2,1].hist2d(in_phi_cut, phidiff_cut, bins=100, range=[phirange,phidiffrange], cmap="seismic", label="Difference vs Target")
axs3[2,1].set_xlabel("Phi Target")
axs3[2,1].set_ylabel("Phi Difference")
axs3[2,2].hist2d(in_mag_cut, magdiff_cut, bins=100, range=[magrange,magdiffrange], cmap="seismic", norm=LogNorm(), label="Difference vs Target")
axs3[2,2].set_xlabel("Mag Target")
axs3[2,2].set_ylabel("Mag Difference")
plt.show()
plt.savefig(outGraphFile3)
# Create plots where a Gaussian has been fitted to the data
# Function to fit a Gaussian and plot the results
def plot_gaussian_fit(ax, data, range, xlabel, ylabel):
def gaussian(x, mu, sigma, amplitude):
return amplitude * np.exp(-0.5 * ((x - mu) / sigma) ** 2)
hist, bin_edges = np.histogram(data, bins=100, range=range, density=True)
bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
popt, _ = curve_fit(gaussian, bin_centers, hist, p0=[0, 0.01, 1])
mu, sigma, amplitude = popt
print(f"mu={mu}, sigma={sigma}, amplitude={amplitude}")
x = np.linspace(range[0], range[1], 100)
ax.plot(x, gaussian(x, *popt), 'k', linewidth=2)
ax.set_xlabel(xlabel)
ax.set_ylabel(ylabel)
ax.hist(data, bins=100, alpha=0.5, range=range, edgecolor='black', density=True)
ax.legend([f'Fit: $\mu$={mu:.5f}, $\sigma$={sigma:.5f}'])
# Create histograms with Gaussian fits
fig4, axs4 = plt.subplots(3, 1, figsize=(8, 12))
plot_gaussian_fit(axs4[0], thetadiff, thetadiffrange, "Theta Difference", "Density")
plot_gaussian_fit(axs4[1], phidiff_cut, phidiffrange, "Phi Difference", "Density")
plot_gaussian_fit(axs4[2], magdiff, magdiffrange, "Mag Difference", "Density")
plt.show()
plt.savefig(outGraphFile4)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment