Skip to content
Snippets Groups Projects
Commit 74ecae32 authored by Robinpreet Dhillon's avatar Robinpreet Dhillon Committed by Wouter Deconinck
Browse files

Run ML training on 100 events for every pipeline (manual for 100k)

parent cccbed9b
Branches
No related tags found
1 merge request!236Run ML training on 100 events for every pipeline (manual for 100k)
......@@ -15,8 +15,6 @@
#include "eicd/CalorimeterHitData.h"
#include "eicd/ClusterCollection.h"
#include "eicd/ClusterData.h"
#include "eicd/ClusterLayerData.h"
#include "eicd/RawCalorimeterHitCollection.h"
#include "eicd/RawCalorimeterHitData.h"
......@@ -229,7 +227,7 @@ void emcal_barrel_pion_rejection_analysis(
// EcalBarrelImagingClustersLayers Functions////////////////////////////////////////////////////////////////////////////////
// Number of hits
auto nhitsClusLayerImg = [] (const std::vector<eicd::ClusterLayerData>& evt) {
auto nhitsClusLayerImg = [] (const std::vector<eicd::ClusterData>& evt) {
int nhitsTot = 0;
for (const auto &i : evt){
nhitsTot += i.nhits;
......@@ -238,10 +236,10 @@ void emcal_barrel_pion_rejection_analysis(
};
// Number of clusters
auto nClusLayerImg = [] (const std::vector<eicd::ClusterLayerData>& evt) {return (int) evt.size(); };
auto nClusLayerImg = [] (const std::vector<eicd::ClusterData>& evt) {return (int) evt.size(); };
// Energy deposition in cluster [GeV]
auto EClusLayerImg = [](const std::vector<eicd::ClusterLayerData>& evt) {
auto EClusLayerImg = [](const std::vector<eicd::ClusterData>& evt) {
double total_edep = 0.0;
for (const auto& i: evt){
total_edep += i.energy;
......@@ -250,7 +248,7 @@ void emcal_barrel_pion_rejection_analysis(
};
// Max Energy deposition in cluster [GeV]
auto EClusLayerMaxImg = [](const std::vector<eicd::ClusterLayerData>& evt) {
auto EClusLayerMaxImg = [](const std::vector<eicd::ClusterData>& evt) {
double max = 0.0;
for (const auto& i: evt){
if (i.energy > max){max = i.energy;}
......@@ -259,7 +257,7 @@ void emcal_barrel_pion_rejection_analysis(
};
// Min Energy deposition in cluster [GeV]
auto EClusLayerMax2Img = [](const std::vector<eicd::ClusterLayerData>& evt) {
auto EClusLayerMax2Img = [](const std::vector<eicd::ClusterData>& evt) {
double max1 = 0.0;
double max2 = 0.0;
for (const auto& i: evt){
......
ml_shower:tagging_epi :
ml_shower:tagging_epimuphka_100:
extends: .rec_benchmark
stage: benchmarks1
script:
- pwd
- ls -l
- python3 benchmarks/imaging_shower_ML/sim_rec_tag.py -t imcal_epimuphka_100 -n 100 --particles "electron,pion-,muon,photon,kaon-"
--pmin 0.5 --pmax 10
ml_shower:tagging_epimuphka:
extends: .rec_benchmark
when: manual
stage: benchmarks1
script:
- python3 benchmarks/imaging_shower_ML/sim_rec_tag.py -t imcal_epi -n 100000
--particles "electron,pion-,pion-" --pmin 0.5 --pmax 10
- ls -hal
- python3 benchmarks/imaging_shower_ML/sim_rec_tag.py -t imcal_epimuphka -n 10000 --particles "electron,pion-,muon,photon,kaon-"
--pmin 0.5 --pmax 10
ml_shower:tagging_e:
extends: .rec_benchmark
when: manual
stage: benchmarks1
script:
- python3 benchmarks/imaging_shower_ML/sim_rec_tag.py -t imcal_e -n 100000
- python3 benchmarks/imaging_shower_ML/sim_rec_tag.py -t imcal_e -n 100
--particles "electron" --pmin 0.5 --pmax 10
ml_shower:tagging_pi:
......@@ -19,17 +29,25 @@ ml_shower:tagging_pi :
when: manual
stage: benchmarks1
script:
- python3 benchmarks/imaging_shower_ML/sim_rec_tag.py -t imcal_pi -n 100000
- python3 benchmarks/imaging_shower_ML/sim_rec_tag.py -t imcal_pi -n 100
--particles "pion-" --pmin 0.5 --pmax 10
ml_shower:training_100:
extends: .rec_benchmark
stage: process
needs: ["ml_shower:tagging_epimuphka_100"]#, "ml_shower:tagging_e", "ml_shower:tagging_pi"]
script:
- pip install tensorflow particle
- python3 benchmarks/imaging_shower_ML/scripts/ml_training.py -t imcal_epimuphka_100 --pmin 0.5 --pmax 10
ml_shower:training:
extends: .rec_benchmark
when: manual
stage: process
needs: ["ml_shower:tagging_epi", "ml_shower:tagging_e", "ml_shower:tagging_pi"]
when: manual
needs: ["ml_shower:tagging_epimuphka"]#, "ml_shower:tagging_e", "ml_shower:tagging_pi"]
script:
- ls -lrth
# TODO
- pip install tensorflow particle
- python3 benchmarks/imaging_shower_ML/scripts/ml_training.py -t imcal_epimuphka --pmin 0.5 --pmax 10
ml_shower:test:
extends: .rec_benchmark
......
......@@ -60,13 +60,13 @@ becal_img_reco = CalHitReco('becal_img_reco',
**becal_img_daq)
becal_img_merger = MLDataMerger('becal_img_merger',
inputHitCollection=becal_img_reco.outputHitCollection,
outputHitCollection='EcalBarrelImagingHitsSeg',
inputHits=becal_img_reco.outputHitCollection,
outputHits='EcalBarrelImagingHitsSeg',
etaSize=0.001,
phiSize=0.001)
becal_img_sorter = MLDataSorter('becal_img_sorter',
inputHitCollection=becal_img_merger.outputHitCollection,
inputHitCollection=becal_img_merger.outputHits,
outputHitCollection='EcalBarrelImagingHitsML',
numberOfLayers=kwargs['img_nlayers'],
numberOfHits=kwargs['nhits'])
......@@ -117,12 +117,14 @@ becal_scfi_sorter = MLDataSorter('becal_scfi_sorter',
# combine layers
becal_combiner = MLDataCombiner('becal_combiner',
inputHitCollection1=becal_img_sorter.outputHitCollection,
inputHitCollection2=becal_scfi_sorter.outputHitCollection,
outputHitCollection='EcalBarrelHitsCombinedML',
inputHits1=becal_img_sorter.outputHitCollection,
inputHits2=becal_scfi_sorter.outputHitCollection,
outputHits='EcalBarrelHitsCombinedML',
layerIncrement=100,
rule=kwargs['combine'])
podout.outputCommands = [
# 'keep *',
'drop *',
......
......@@ -3,15 +3,23 @@ import pandas as pd
import numpy as np
from collections import OrderedDict
from scipy.interpolate import CubicSpline
# import tensorflow as tf
import uproot as uproot
import matplotlib.pyplot as plt
import logging
import sys
import subprocess
import argparse
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib as mpl
from matplotlib import pyplot as plt
from matplotlib.ticker import MultipleLocator, FixedLocator, MaxNLocator
from matplotlib.colors import LogNorm
from particle import Particle
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
# 5 layers 0-1-2-6-9
# 6 layers 0-1-2-3-4-6-9
......@@ -36,20 +44,25 @@ def draw_layer_edep(edep, label=''):
axis.set_title(label, fontsize=26)
return figure, axis
parser = argparse.ArgumentParser()
parser.add_argument('-t', '--nametag', type=str, default='IMCAL_ML', help='Name tag for output files.')
parser.add_argument('--pmin', type=float, default=0.5, help='Minimum momentum of particles.')
parser.add_argument('--pmax', type=float, default=10, help='Maximum momentum of particles.')
args = parser.parse_args()
kwargs = vars(args)
train_model = True
do_epcut = False
nametag = 'real_imcal'
partag = 'epimuphka'
# partag = 'epi'
prange = (2.0, 2.0)
etag = '{:.1f}_{:.1f}_combined'.format(*prange)
#nametag = 'imcal'
#partag = 'epimuphka'
#prange = (0.5, 10.0)
#etag = '{:.1f}_{:.1f}_combined'.format(*prange)
epoch = 20
# data_shape = (20, 20, 3)
data_shape = (29, 20, 5)
data_dir = r'D:\Data\EIC\ImagingCalorimetry\{name}_{part}_{etag}'.format(name=nametag, part=partag, etag=etag)
out_dir = r'C:\Users\cpeng\OneDrive\EIC\ImagingCalorimeter\AstroPix\hybrid\{name}_{part}_{etag}'\
.format(name=nametag, part=partag, etag=etag)
#data_dir = r'sim_output/tag_data/{name}_{part}_{etag}'.format(name=nametag, part=partag, etag=etag)
#out_dir = r'sim_output/tag_data/output/{name}_{part}_{etag}'.format(name=nametag, part=partag, etag=etag)
data_dir = os.path.join('sim_output/tag_data/', '{nametag}_{pmin}_{pmax}_combined'.format(**kwargs))
out_dir = os.path.join('sim_output/tag_data/output/', '{nametag}_{pmin}_{pmax}_combined'.format(**kwargs))
raw_data = np.load(os.path.join(data_dir, 'data.npy'))
raw_tags = np.load(os.path.join(data_dir, 'tag.npy'))
edeps = np.load(os.path.join(data_dir, 'edep.npy'))
......@@ -107,12 +120,12 @@ if do_epcut:
ax.text(0.2, 0.9, '$e$ eff. = {:.2f}%\n$\pi$ rej. = {:.2f}%'.format(ep_eff*100., ep_rej*100.),
fontsize=24, transform=ax.transAxes, ha='left', va='top')
ax.set_title('$E/p > {:.2f}$% @ {:d} - {:d} X$_0$'.format(epcut*100., begin_layer, end_layer), fontsize=24)
fig.savefig(os.path.join(out_dir, 'pre_epcut.png'))
fig.savefig(os.path.join('results/ml/models/emcal/', 'pre_epcut.png'))
fig, _ = draw_layer_edep(edeps[emask], 'Cumulative Edep over layer for electrons')
fig.savefig(os.path.join(out_dir, 'cum_edep_el.png'))
fig.savefig(os.path.join('results/ml/models/emcal/', 'cum_edep_el.png'))
fig, _ = draw_layer_edep(edeps[pimask], 'Cumulative Edep over layer for pions')
fig.savefig(os.path.join(out_dir, 'cum_edep_pi.png'))
fig.savefig(os.path.join('results/ml/models/emcal/', 'cum_edep_pi.png'))
data = raw_data[epmask]
tags = raw_tags[epmask]
......@@ -140,19 +153,19 @@ if train_model:
w_valid = weights[id_valid]
model = keras.Sequential([
keras.layers.Conv2D(64, (3, 3), padding='same', activation='selu', input_shape=data_shape),
keras.layers.Conv2D(64, (3, 3), padding='same', activation='relu', input_shape=data_shape),
# keras.layers.MaxPooling2D((2, 2), strides=2),
keras.layers.Dropout(0.25),
keras.layers.Conv2D(128, (2, 2), padding='same', activation='selu'),
keras.layers.Conv2D(128, (2, 2), padding='same', activation='relu'),
# keras.layers.MaxPooling2D((2, 2), strides=2),
keras.layers.Dropout(0.3),
keras.layers.Conv2D(64, (2, 2), padding='same', activation='selu'),
keras.layers.Conv2D(64, (2, 2), padding='same', activation='relu'),
# keras.layers.MaxPooling2D((2, 2), strides=2),
keras.layers.Flatten(),
keras.layers.Dense(128, activation='selu'),
keras.layers.Dense(128, activation='relu'),
keras.layers.Dropout(0.25),
keras.layers.Dense(32, activation='selu'),
keras.layers.Dense(32, activation='relu'),
keras.layers.Dense(len(labels), activation='softmax')
])
......@@ -161,9 +174,15 @@ if train_model:
metrics=['accuracy'])
history = model.fit(x_train, y_train,
epochs=epoch, sample_weight=w_train, validation_data=(x_valid, y_valid, w_valid))
model.save(os.path.join(out_dir, 'model_epch{:d}'.format(epoch)))
model.save(os.path.join('results/ml/models/emcal/', 'model_epch{:d}'.format(epoch)))
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()
with open(os.path.join('results/ml/models/emcal/', 'lite_model_{nametag}.tflite'.format(**kwargs)), 'wb') as f:
f.write(tflite_model)
else:
model = keras.models.load_model(os.path.join(out_dir, 'model_epch{:d}'.format(epoch)))
model = keras.models.load_model(os.path.join('results/ml/models/emcal/', 'model_epch{:d}'.format(epoch)))
# ---- general ----
# x_test = data
# y_test = pid
......@@ -186,7 +205,7 @@ if len(labels) == 2:
# --- pion - electron ----
pion_prob_cut = 0.5
pid_probs = np.zeros(shape=(2, 2))
fig, ax = plt.subplots(1, 1, figsize=(12, 9), dpi=160)
fig, ax = plt.subplots(1, 1, figsize=(8, 6), dpi=160)
ax.set_yscale('log')
ax.set_ylabel('Counts', fontsize=24)
ax.set_xlabel(r'$P_{\pi}$', fontsize=24)
......@@ -201,8 +220,8 @@ if len(labels) == 2:
ax.text(0.55, 0.9, '$e$ eff. = {:.2f}%\n$\pi$ rej. = {:.2f}%'.format(pid_probs[0][0]*100., pid_probs[1][1]*100.),
fontsize=24, transform=ax.transAxes, ha='left', va='top')
ax.legend(fontsize=24, loc='lower left')
fig.savefig(os.path.join(out_dir, 'pid_tag_hist.png'))
fig.savefig(os.path.join(out_dir, 'pid_tag_hist.pdf'))
fig.savefig(os.path.join('results/ml/models/emcal/', 'pid_tag_hist.png'))
fig.savefig(os.path.join('results/ml/models/emcal/', 'pid_tag_hist.pdf'))
# --- pion - electron ----
result_text = open(os.path.join(out_dir, 'rejection_result.txt'), 'w')
lines = [
......@@ -222,7 +241,7 @@ if len(labels) == 2:
else:
# --- multi-particles ---
pid_probs = np.zeros(shape=(len(labels), len(labels)))
fig, ax = plt.subplots(1, 1, figsize=(8, 8), dpi=160, gridspec_kw={'left': 0.15, 'right': 0.95})
fig, ax = plt.subplots(1, 1, figsize=(12, 8), dpi=160, gridspec_kw={'left': 0.15, 'right': 0.95})
ax.set_yscale('log')
ax.set_ylabel('Counts', fontsize=26)
ax.set_xlabel(r'Likelihood of the Corresponding Label', fontsize=26)
......@@ -237,8 +256,8 @@ else:
pid_probs[i][j] = np.sum(pred_labels == j)/float(np.sum(mask))
ax.legend(fontsize=26, loc='upper center', ncol=3, handletextpad=0.3, columnspacing=0.6)
fig.savefig(os.path.join(out_dir, 'pid_tag_hist.png'))
fig.savefig(os.path.join(out_dir, 'pid_tag_hist.pdf'))
fig.savefig(os.path.join('results/ml/models/emcal/', 'pid_tag_hist.png'))
fig.savefig(os.path.join('results/ml/models/emcal/', 'pid_tag_hist.pdf'))
# --- multi-particles ---
fig, ax = plt.subplots(1, 1, figsize=(8, 8), dpi=160, gridspec_kw={'left': 0.15, 'right': 0.95})
......@@ -260,5 +279,5 @@ for i in range(pid_probs.shape[0]):
text = ax.text(j, i, '{:.2f}%'.format(pid_probs[i, j]*100.), ha='center', va='center',
color=color, fontsize=28 - len(labels)*2)
ax.grid('-', color='k', which='minor')
fig.savefig(os.path.join(out_dir, 'pid_tag.png'))
fig.savefig(os.path.join(out_dir, 'pid_tag.pdf'))
fig.savefig(os.path.join('results/ml/models/emcal/', 'pid_tag.png'))
fig.savefig(os.path.join('results/ml/models/emcal/', 'pid_tag.pdf'))
......@@ -145,7 +145,8 @@ if __name__ == '__main__':
event_ids = df['event'].unique()
data = df.set_index('event')[featcols].clip(0, 1).values.reshape([len(event_ids)] + dshape)
tags = dfm.loc[event_ids, ['pdgID', 'p', 'pT', 'eta', 'phi', 'mass']]
tags = dfm.loc[event_ids, ['PDG', 'p', 'pT', 'eta', 'phi', 'mass']]
# also save Edep per layers
# merge the sandwich layers
df['layer'] = df['layer'] % 100
......
......@@ -30,13 +30,13 @@ parser.add_argument('--physics-list', type=str, default='FTFP_BERT', help='Path
args = parser.parse_args()
kwargs = vars(args)
for mdir in ['gen_data', 'sim_data', 'rec_data', 'tag_data']:
for mdir in ['sim_output/gen_data', 'sim_output/sim_data', 'sim_output/rec_data', 'sim_output/tag_data']:
os.makedirs(mdir, exist_ok=True)
gen_file = os.path.join('gen_data', '{nametag}_{pmin}_{pmax}.hepmc'.format(**kwargs))
sim_file = os.path.join('sim_data', '{nametag}_{pmin}_{pmax}.edm4hep.root'.format(**kwargs))
rec_file = os.path.join('rec_data', '{nametag}_{pmin}_{pmax}.root'.format(**kwargs))
tag_dir = os.path.join('tag_data', '{nametag}_{pmin}_{pmax}'.format(**kwargs))
gen_file = os.path.join('sim_output/gen_data', '{nametag}_{pmin}_{pmax}.hepmc'.format(**kwargs))
sim_file = os.path.join('sim_output/sim_data', '{nametag}_{pmin}_{pmax}.edm4hep.root'.format(**kwargs))
rec_file = os.path.join('sim_output/rec_data', '{nametag}_{pmin}_{pmax}.root'.format(**kwargs))
tag_dir = os.path.join('sim_output/tag_data', '{nametag}_{pmin}_{pmax}'.format(**kwargs))
procs = [p.strip() for p in args.process.split(',')]
sdir = os.path.dirname(os.path.realpath(__file__))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment