diff --git a/benchmarks/sampling_ecal/prof_emcal_barrel_electrons.sh b/benchmarks/sampling_ecal/prof_emcal_barrel_electrons.sh index 2cb1f5440dcf6bb58b79ccd9fc68ab1482b578d5..b4b6f4fd525bc9528b814b4bdc9c5969fe249f48 100644 --- a/benchmarks/sampling_ecal/prof_emcal_barrel_electrons.sh +++ b/benchmarks/sampling_ecal/prof_emcal_barrel_electrons.sh @@ -1,30 +1,35 @@ # Directory for plots mkdir -p results +# CB_EMCAL_OPTION_DIR=${JUGGLER_INSTALL_PREFIX}/Examples/options +# CB_EMCAL_SCRIPT_DIR=${JUGGLER_INSTALL_PREFIX}/Examples/scripts/sampling_calorimeter +CB_EMCAL_OPTION_DIR=benchmarks/sampling_ecal/options +CB_EMCAL_SCRIPT_DIR=benchmarks/sampling_ecal/scripts + # Run Juggler xenv -x ${JUGGLER_INSTALL_PREFIX}/Juggler.xenv \ - gaudirun.py benchmarks/sampling_ecal/options/sampling_cluster3d.py + gaudirun.py ${CB_EMCAL_OPTION_DIR}/sampling_cluster3d.py if [[ "$?" -ne "0" ]] ; then echo "ERROR running juggler" exit 1 fi # Run analysis script -python ${JUGGLER_INSTALL_PREFIX}/Examples/scripts/sampling_calorimeter/draw_cluster_layers.py \ +python ${CB_EMCAL_SCRIPT_DIR}/draw_cluster_layers.py \ ${CB_EMCAL_REC_FILE} -e 0 --topo-size=1.0 --compact=${CB_EMCAL_COMPACT_PATH} -o results if [[ "$?" -ne "0" ]] ; then echo "ERROR running analysis script: draw_cluster_layers" exit 1 fi -python ${JUGGLER_INSTALL_PREFIX}/Examples/scripts/sampling_calorimeter/draw_cluster.py \ +python ${CB_EMCAL_SCRIPT_DIR}/sampling_calorimeter/draw_cluster.py \ ${CB_EMCAL_REC_FILE} -e 0 --topo-size=2.0 --compact=${CB_EMCAL_COMPACT_PATH} -o results if [[ "$?" -ne "0" ]] ; then echo "ERROR running analysis script: draw_cluster" exit 1 fi -python ${JUGGLER_INSTALL_PREFIX}/Examples/scripts/sampling_calorimeter/energy_profile.py \ +python ${CB_EMCAL_SCRIPT_DIR}/sampling_calorimeter/energy_profile.py \ ${CB_EMCAL_REC_FILE} --type=EM --energy=${CB_EMCAL_ENERGY} --save=results/profile.csv --color=royalblue if [[ "$?" -ne "0" ]] ; then echo "ERROR running analysis script: energy_profile" @@ -32,11 +37,11 @@ if [[ "$?" -ne "0" ]] ; then fi -root_filesize=$(stat --format=%s "${JUGGLER_REC_FILE}") -if [[ "${JUGGLER_N_EVENTS}" -lt "500" ]] ; then +root_filesize=$(stat --format=%s "${CB_EMCAL_REC_FILE}") +if [[ "${CB_EMCAL_NUMEV}" -lt "500" ]] ; then # file must be less than 10 MB to upload - if [[ "${root_filesize}" -lt "10000000" ]] ; then - cp ${JUGGLER_REC_FILE} results/. + if [[ "${root_filesize}" -lt "10000000" ]] ; the + cp ${CB_EMCAL_REC_FILE} results/. fi fi diff --git a/benchmarks/sampling_ecal/scripts/draw_cluster.py b/benchmarks/sampling_ecal/scripts/draw_cluster.py new file mode 100644 index 0000000000000000000000000000000000000000..50043f01c1c56f86447aabd78049e9d36fad4028 --- /dev/null +++ b/benchmarks/sampling_ecal/scripts/draw_cluster.py @@ -0,0 +1,207 @@ +''' + A script to visualize the cluster + It reads the output from the Juggler component ImagingClusterReco, which is supposed to be clusters of hits after + digitization, reconstruction, and clustering + + Author: Chao Peng (ANL) + Date: 04/30/2021 +''' + +import os +import numpy as np +import pandas as pd +import argparse +import matplotlib +from matplotlib import cm +from matplotlib import pyplot as plt +from matplotlib.ticker import MultipleLocator +from matplotlib.collections import PatchCollection +from matplotlib.patches import Rectangle +from mpl_toolkits.axes_grid1 import make_axes_locatable +from utils import * +import sys + + +# draw cluster in a 3d axis, expect a numpy array of (nhits, 4) shape with each row contains (x, y, z, E) +# note z and x axes are switched +def draw_hits3d(axis, data, cmap, units=('mm', 'mm', 'mm', 'MeV'), fontsize=24, **kwargs): + # normalize energy to get colors + x, y, z, edep = np.transpose(data) + cvals = edep - min(edep) / (max(edep) - min(edep)) + cvals[np.isnan(cvals)] = 1.0 + colors = cmap(cvals) + + # hits + axis.scatter(z, y, x, c=colors, marker='o', **kwargs) + axis.tick_params(labelsize=fontsize) + axis.set_zlabel('x ({})'.format(units[2]), fontsize=fontsize + 2, labelpad=fontsize) + axis.set_ylabel('y ({})'.format(units[1]), fontsize=fontsize + 2, labelpad=fontsize) + axis.set_xlabel('z ({})'.format(units[0]), fontsize=fontsize + 2, labelpad=fontsize) + cb = plt.colorbar(cm.ScalarMappable(norm=matplotlib.colors.Normalize(vmin=min(edep), vmax=max(edep)), cmap=cmap), + ax=axis, shrink=0.85) + cb.ax.tick_params(labelsize=fontsize) + cb.ax.get_yaxis().labelpad = fontsize + cb.ax.set_ylabel('Energy Deposit ({})'.format(units[3]), rotation=90, fontsize=fontsize + 4) + return axis + + +# draw a cylinder in 3d axes +# note z and x axes are switched +def draw_cylinder3d(axis, r, z, order=['x', 'y', 'z'], rsteps=500, zsteps=500, **kwargs): + x = np.linspace(-r, r, rsteps) + z = np.linspace(-z, z, zsteps) + Xc, Zc = np.meshgrid(x, z) + Yc = np.sqrt(r**2 - Xc**2) + + axis.plot_surface(Zc, Yc, Xc, alpha=0.1, **kwargs) + axis.plot_surface(Zc, -Yc, Xc, alpha=0.1, **kwargs) + return axis + + +# fit the track of cluster and draw the fit +def draw_track_fit(axis, dfh, length=200, stop_layer=8, scat_kw=dict(), line_kw=dict()): + dfh = dfh[dfh['layer'] <= stop_layer] + data = dfh.groupby('layer')[['z', 'y','x']].mean().values + # data = dfh[['z', 'y', 'x']].values + # ax.scatter(*data.T, **scat_kw) + datamean = data.mean(axis=0) + uu, dd, vv = np.linalg.svd(data - datamean) + linepts = vv[0] * np.mgrid[-length:length:2j][:, np.newaxis] + linepts += datamean + axis.plot3D(*linepts.T, 'k:') + return axis + + +# color map +def draw_heatmap(axis, x, y, weights, bins=1000, cmin=0., cmap=plt.get_cmap('rainbow'), pc_kw=dict()): + w, xedg, yedg = np.histogram2d(x, y, weights=weights, bins=bins) + xsz = np.mean(np.diff(xedg)) + ysz = np.mean(np.diff(yedg)) + wmin, wmax = w.min(), w.max() + recs, clrs = [], [] + for i in np.arange(len(xedg) - 1): + for j in np.arange(len(yedg) - 1): + if w[i][j] > cmin: + recs.append(Rectangle((xedg[i], yedg[j]), xsz, ysz)) + clrs.append(cmap((w[i][j] - wmin) / (wmax - wmin))) + axis.add_collection(PatchCollection(recs, facecolor=clrs, **pc_kw)) + axis.set_xlim(xedg[0], xedg[-1]) + axis.set_ylim(yedg[0], yedg[-1]) + return axis, cm.ScalarMappable(norm=matplotlib.colors.Normalize(vmin=wmin, vmax=wmax), cmap=cmap) + + +# execute this script +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Visualize the cluster from analysis') + parser.add_argument('file', type=str, help='path to root file') + parser.add_argument('-e', type=int, default=0, dest='iev', help='event number to plot') + parser.add_argument('-c', type=int, default=0, dest='icl', help='cluster number to plot') + parser.add_argument('-s', type=int, default=8, dest='stop', help='stop layer for track fit') + parser.add_argument('-o', type=str, default='./plots', dest='outdir', help='output directory') + parser.add_argument('--compact', type=str, default='', dest='compact', help='compact file') + parser.add_argument('-m', '--macros', type=str, default='rootlogon.C', dest='macros', + help='root macros to load (accept multiple paths separated by \",\")') + parser.add_argument('-b', '--branch-name', type=str, default='EcalBarrelClustersLayers', dest='branch', + help='branch name in the root file (outputLayerCollection from ImagingClusterReco)') + parser.add_argument('--topo-size', type=float, default=2.0, dest='topo_size', + help='bin size for projection plot (mrad)') + parser.add_argument('--topo-range', type=float, default=50.0, dest='topo_range', + help='half range for projection plot (mrad)') + args = parser.parse_args() + + + # we can read these values from xml file + desc = compact_constants(args.compact, ['cb_ECal_RMin', 'cb_ECal_ReadoutLayerThickness', + 'cb_ECal_ReadoutLayerNumber', 'cb_ECal_Length']) + if not len(desc): + # or define Ecal shapes + rmin, thickness, length = 890, 20*(10. + 1.65), 860*2+500 + else: + # convert cm to mm + rmin = desc[0]*10. + thickness = desc[1]*desc[2]*10. + length = desc[3]*10. + + + # read data + load_root_macros(args.macros) + df = get_hits_data(args.file, args.iev, branch=args.branch) + dfmcp = get_mcp_data(args.file, args.iev, 'mcparticles2') + vec = dfmcp.loc[dfmcp['status'] == 24578, ['px', 'py', 'pz']].iloc[0].values + vec = vec/np.linalg.norm(vec) + df = df[df['cluster'] == args.icl] + if not len(df): + print("Error: do not find any hits for cluster {:d} in event {:d}".format(args.icl, args.iev)) + exit(-1) + # particle line from (0, 0, 0) to the inner Ecal surface + length = rmin/np.sqrt(vec[0]**2 + vec[1]**2) + pline = np.transpose(vec*np.mgrid[0:length:2j][:, np.newaxis]) + + + os.makedirs(args.outdir, exist_ok=True) + # cluster plot + cmap = truncate_colormap(plt.get_cmap('jet'), 0.1, 0.9) + fig = plt.figure(figsize=(20, 16), dpi=160) + ax = fig.add_subplot(111, projection='3d') + # draw particle line + ax.plot(*pline[[2, 1]], '--', zs=pline[0], color='green') + # draw hits + draw_hits3d(ax, df[['x', 'y', 'z', 'edep']].values, cmap, s=5.0) + draw_cylinder3d(ax, rmin, length, rstride=10, cstride=10, color='royalblue') + draw_cylinder3d(ax, rmin + thickness, length, rstride=10, cstride=10, color='forestgreen') + ax.set_zlim(-(rmin + thickness), rmin + thickness) + ax.set_ylim(-(rmin + thickness), rmin + thickness) + ax.set_xlim(-length, length) + fig.tight_layout() + fig.savefig(os.path.join(args.outdir, 'e{}_cluster.png'.format(args.iev))) + + + # zoomed-in cluster plot + fig = plt.figure(figsize=(20, 16), dpi=160) + ax = fig.add_subplot(111, projection='3d') + # draw particle line + ax.plot(*pline[[2, 1]], '--', zs=pline[0], color='green') + # draw hits + draw_hits3d(ax, df[['x', 'y', 'z', 'edep']].values, cmap, s=20.0) + draw_track_fit(ax, df, stop_layer=args.stop, + scat_kw=dict(color='k', s=50.0), line_kw=dict(linestyle=':', color='k', lw=3)) + # view range + center = (length + thickness/2.)*vec + ranges = np.vstack([center - thickness/4., center + thickness/4.]).T + ax.set_zlim(*ranges[0]) + ax.set_ylim(*ranges[1]) + ax.set_xlim(*ranges[2]) + + fig.tight_layout() + fig.savefig(os.path.join(args.outdir, 'e{}_cluster_zoom.png'.format(args.iev))) + + + # projection plot + # convert to polar coordinates (mrad), and stack all r values + df['r'] = np.sqrt(df['x'].values**2 + df['y'].values**2 + df['z'].values**2) + df['phi'] = np.arctan2(df['y'].values, df['x'].values)*1000. + df['theta'] = np.arccos(df['z'].values/df['r'].values)*1000. + + # convert to mrad + vecp = np.asarray([np.arccos(vec[2]), np.arctan2(vec[1], vec[0])])*1000. + phi_rg = np.asarray([vecp[1] - args.topo_range, vecp[1] + args.topo_range]) + th_rg = np.asarray([vecp[0] - args.topo_range, vecp[0] + args.topo_range]) + + fig, axs = plt.subplots(1, 2, figsize=(17, 16), dpi=160, gridspec_kw={'wspace':0., 'width_ratios': [16, 1]}) + ax, sm = draw_heatmap(axs[0], df['theta'].values, df['phi'].values, weights=df['edep'].values, + bins=(np.arange(*th_rg, step=args.topo_size), np.arange(*phi_rg, step=args.topo_size)), + cmap=cmap, cmin=0., pc_kw=dict(alpha=0.8, edgecolor='k')) + + ax.set_ylabel(r'$\phi$ (mrad)', fontsize=28) + ax.set_xlabel(r'$\theta$ (mrad)', fontsize=28) + ax.tick_params(labelsize=24) + ax.xaxis.set_minor_locator(MultipleLocator(5)) + ax.yaxis.set_minor_locator(MultipleLocator(5)) + ax.grid(linestyle=':', which='both') + ax.set_axisbelow(True) + cb = plt.colorbar(sm, cax=axs[1], shrink=0.85) + cb.ax.tick_params(labelsize=24) + cb.ax.get_yaxis().labelpad = 24 + cb.ax.set_ylabel('Energy Deposit (MeV)', rotation=90, fontsize=28) + fig.savefig(os.path.join(args.outdir, 'e{}_topo.png'.format(args.iev))) + diff --git a/benchmarks/sampling_ecal/scripts/draw_cluster_layers.py b/benchmarks/sampling_ecal/scripts/draw_cluster_layers.py new file mode 100644 index 0000000000000000000000000000000000000000..3c545f7952be50793334b82b7f5beeb9e806904b --- /dev/null +++ b/benchmarks/sampling_ecal/scripts/draw_cluster_layers.py @@ -0,0 +1,180 @@ +''' + A script to visualize the cluster layer-by-layer + It reads the output from the Juggler component ImagingClusterReco, which is supposed to be clusters of hits after + digitization, reconstruction, and clustering + + Author: Chao Peng (ANL) + Date: 04/30/2021 +''' + +import os +import numpy as np +import pandas as pd +import argparse +import matplotlib +from matplotlib import cm +from matplotlib import pyplot as plt +from matplotlib.ticker import MultipleLocator +from matplotlib.collections import PatchCollection +from matplotlib.patches import Rectangle +from mpl_toolkits.axes_grid1 import make_axes_locatable +from matplotlib.backends.backend_pdf import PdfPages +from utils import * +import sys +import imageio + + +# color map +def draw_heatmap(axis, x, y, weights, bins=1000, vmin=None, vmax=None, cmap=plt.get_cmap('rainbow'), pc_kw=dict()): + w, xedg, yedg = np.histogram2d(x, y, weights=weights, bins=bins) + xsz = np.mean(np.diff(xedg)) + ysz = np.mean(np.diff(yedg)) + if vmin == None: + vmin = w.min() + if vmax == None: + vmax = w.max() + recs, clrs = [], [] + for i in np.arange(len(xedg) - 1): + for j in np.arange(len(yedg) - 1): + if w[i][j] > vmin: + recs.append(Rectangle((xedg[i], yedg[j]), xsz, ysz)) + clrs.append(cmap((w[i][j] - vmin) / (vmax - vmin))) + axis.add_collection(PatchCollection(recs, facecolor=clrs, **pc_kw)) + axis.set_xlim(xedg[0], xedg[-1]) + axis.set_ylim(yedg[0], yedg[-1]) + return axis, cm.ScalarMappable(norm=matplotlib.colors.Normalize(vmin=vmin, vmax=vmax), cmap=cmap) + + +# execute this script +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Visualize the cluster (layer-wise) from analysis') + parser.add_argument('file', type=str, help='path to root file') + parser.add_argument('-e', type=int, default=0, dest='iev', help='event number to plot') + parser.add_argument('-c', type=int, default=0, dest='icl', help='cluster number to plot') + parser.add_argument('-s', type=int, default=8, dest='stop', help='stop layer for track fit') + parser.add_argument('-o', type=str, default='./plots', dest='outdir', help='output directory') + parser.add_argument('-d', type=float, default=1.0, dest='dura', help='duration of gif') + parser.add_argument('--compact', type=str, default='', dest='compact', help='compact file') + parser.add_argument('-m', '--macros', type=str, default='rootlogon.C', dest='macros', + help='root macros to load (accept multiple paths separated by \",\")') + parser.add_argument('-b', '--branch-name', type=str, default='EcalBarrelClustersLayers', dest='branch', + help='branch name in the root file (outputLayerCollection from ImagingClusterReco)') + parser.add_argument('--topo-size', type=float, default=2.0, dest='topo_size', + help='bin size for projection plot (mrad)') + parser.add_argument('--topo-range', type=float, default=50.0, dest='topo_range', + help='half range for projection plot (mrad)') + args = parser.parse_args() + + + # we can read these values from xml file + desc = compact_constants(args.compact, ['cb_ECal_RMin', 'cb_ECal_ReadoutLayerThickness', + 'cb_ECal_ReadoutLayerNumber', 'cb_ECal_Length']) + if not len(desc): + # or define Ecal shapes + rmin, thickness, length = 890, 20*(10. + 1.65), 860*2+500 + else: + # convert cm to mm + rmin = desc[0]*10. + thickness = desc[1]*desc[2]*10. + length = desc[3]*10. + + + # read data + load_root_macros(args.macros) + df = get_hits_data(args.file, args.iev, args.branch) + dfmcp = get_mcp_data(args.file, args.iev, 'mcparticles2') + vec = dfmcp.loc[dfmcp['status'] == 24578, ['px', 'py', 'pz']].iloc[0].values + vec = vec/np.linalg.norm(vec) + df = df[df['cluster'] == args.icl] + # convert to polar coordinates (mrad), and stack all r values + df['r'] = np.sqrt(df['x'].values**2 + df['y'].values**2 + df['z'].values**2) + df['phi'] = np.arctan2(df['y'].values, df['x'].values)*1000. + df['theta'] = np.arccos(df['z'].values/df['r'].values)*1000. + if not len(df): + print("Error: do not find any hits for cluster {:d} in event {:d}".format(args.icl, args.iev)) + exit(-1) + # particle line from (0, 0, 0) to the inner Ecal surface + length = rmin/np.sqrt(vec[0]**2 + vec[1]**2) + pline = np.transpose(vec*np.mgrid[0:length:2j][:, np.newaxis]) + cmap = truncate_colormap(plt.get_cmap('jet'), 0.1, 0.9) + + + # convert truth to mrad + vecp = np.asarray([np.arccos(vec[2]), np.arctan2(vec[1], vec[0])])*1000. + phi_rg = np.asarray([vecp[1] - args.topo_range, vecp[1] + args.topo_range]) + th_rg = np.asarray([vecp[0] - args.topo_range, vecp[0] + args.topo_range]) + + os.makedirs(args.outdir, exist_ok=True) + # cluster plot by layers (rebinned) + pdf = PdfPages(os.path.join(args.outdir, 'e{}_c{}_layers.pdf'.format(args.iev, args.icl))) + bpos, bprops = (0.5, 0.95), dict(boxstyle='round', facecolor='wheat', alpha=0.2) + frames = [] + for i in np.arange(1, df['layer'].max() + 1, dtype=int): + data = df[df['layer'] == i] + fig, axs = plt.subplots(1, 2, figsize=(17, 16), dpi=160, gridspec_kw={'wspace':0., 'width_ratios': [16, 1]}) + ax, sm = draw_heatmap(axs[0], data['theta'].values, data['phi'].values, weights=data['edep'].values, + bins=(np.arange(*th_rg, step=args.topo_size), np.arange(*phi_rg, step=args.topo_size)), + cmap=cmap, vmin=0.0, vmax=1.0, pc_kw=dict(alpha=0.8, edgecolor='k')) + ax.set_ylabel(r'$\phi$ (mrad)', fontsize=28) + ax.set_xlabel(r'$\theta$ (mrad)', fontsize=28) + ax.tick_params(labelsize=24) + ax.xaxis.set_minor_locator(MultipleLocator(5)) + ax.yaxis.set_minor_locator(MultipleLocator(5)) + ax.grid(linestyle=':', which='both') + ax.set_axisbelow(True) + ax.text(*bpos, 'Layer {:d}'.format(i), transform=ax.transAxes, + fontsize=26, va='top', ha='center', bbox=bprops) + cb = plt.colorbar(sm, cax=axs[1], shrink=0.85) + cb.ax.tick_params(labelsize=24) + cb.ax.get_yaxis().labelpad = 24 + cb.ax.set_ylabel('Energy Deposit (MeV)', rotation=90, fontsize=28) + pdf.savefig(fig) + # gif frames + fig.savefig('ltmp.png') + plt.close(fig) + frames.append(imageio.imread('ltmp.png')) + pdf.close() + os.remove('ltmp.png') + + # build GIF + imageio.mimsave(os.path.join(args.outdir, 'e{:d}_c{:d}_layers.gif'.format(args.iev, args.icl)), + frames, 'GIF', duration=args.dura) + + + # cluster plot by layers (scatters) + pdf = PdfPages(os.path.join(args.outdir, 'e{}_c{}_layers2.pdf'.format(args.iev, args.icl))) + bpos, bprops = (0.5, 0.95), dict(boxstyle='round', facecolor='wheat', alpha=0.2) + frames = [] + for i in np.arange(1, df['layer'].max() + 1, dtype=int): + data = df[df['layer'] == i] + fig, axs = plt.subplots(1, 2, figsize=(17, 16), dpi=160, gridspec_kw={'wspace':0., 'width_ratios': [16, 1]}) + ax = axs[0] + colors = cmap(data['edep']/1.0) + ax.scatter(data['theta'].values, data['phi'].values, c=colors, marker='s', s=15.0) + ax.set_xlim(*th_rg) + ax.set_ylim(*phi_rg) + ax.set_ylabel(r'$\phi$ (mrad)', fontsize=28) + ax.set_xlabel(r'$\theta$ (mrad)', fontsize=28) + ax.tick_params(labelsize=24) + ax.xaxis.set_minor_locator(MultipleLocator(5)) + ax.yaxis.set_minor_locator(MultipleLocator(5)) + ax.grid(linestyle=':', which='both') + ax.set_axisbelow(True) + ax.text(*bpos, 'Layer {:d}'.format(i), transform=ax.transAxes, + fontsize=26, va='top', ha='center', bbox=bprops) + cb = plt.colorbar(sm, cax=axs[1], shrink=0.85) + cb.ax.tick_params(labelsize=24) + cb.ax.get_yaxis().labelpad = 24 + cb.ax.set_ylabel('Energy Deposit (MeV)', rotation=90, fontsize=28) + pdf.savefig(fig) + # gif frames + fig.savefig('ltmp.png') + plt.close(fig) + frames.append(imageio.imread('ltmp.png')) + pdf.close() + os.remove('ltmp.png') + + # build GIF + imageio.mimsave(os.path.join(args.outdir, 'e{:d}_c{:d}_layers2.gif'.format(args.iev, args.icl)), + frames, 'GIF', duration=args.dura) + diff --git a/benchmarks/sampling_ecal/scripts/energy_profile.py b/benchmarks/sampling_ecal/scripts/energy_profile.py new file mode 100644 index 0000000000000000000000000000000000000000..b0667f9bfef8867e3a965856d8078a2345b33922 --- /dev/null +++ b/benchmarks/sampling_ecal/scripts/energy_profile.py @@ -0,0 +1,129 @@ +''' + A script to generate the energy profile (layer-wise) + It reads the output from the Juggler component ImagingClusterReco, which is supposed to be clusters of hits after + digitization, reconstruction, and clustering + + Author: Chao Peng (ANL) + Date: 04/30/2021 +''' + +import os +import numpy as np +import pandas as pd +import ROOT +from ROOT import gROOT, gInterpreter +import argparse +import matplotlib +from matplotlib import pyplot as plt +from matplotlib.ticker import MultipleLocator, MaxNLocator +import sys +from utils import * + + +def find_start_layer(grp, min_edep=0.5): + ids, edeps = grp.sort_values('layer')[['layer', 'edep']].values.T + edeps = np.cumsum(edeps) + return min(ids[edeps > min_edep]) if sum(edeps > min_edep) > 0 else 20 + + +# execute this script +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Generate energy profiles') + parser.add_argument('file', type=str, help='path to root file') + parser.add_argument('--plot-dir', type=str, default='./plots', dest='outdir', help='output directory') + parser.add_argument('--type', type=str, default='unknown', dest='type', help='profile type (used in save)') + parser.add_argument('--energy', type=float, default=5., dest='energy', help='incident particle energy (GeV)') + parser.add_argument('--save', type=str, default='', dest='save', help='path to save profile') + parser.add_argument('--color', type=str, default='royalblue', dest='color', help='colors for bar plots') + parser.add_argument('-b', '--branch-name', type=str, default='EcalBarrelClustersLayers', dest='branch', + help='branch name in the root file (outputLayerCollection from ImagingClusterReco)') + parser.add_argument('-m', '--macros', type=str, default='rootlogon.C', dest='macros', + help='root macros to load (accept multiple paths separated by \",\")') + args = parser.parse_args() + + load_root_macros(args.macros) + # prepare data + dfe = get_layers_data(args.file, branch=args.branch) + dfe.loc[:, 'total_edep'] = dfe.groupby(['event', 'cluster'])['edep'].transform('sum') + # dfe.loc[:, 'efrac'] = dfe['edep']/dfe['total_edep'] + dfe.loc[:, 'efrac'] = dfe['edep']/(args.energy*1000.) + dfe = dfe[dfe['cluster'] == 0] + + os.makedirs(args.outdir, exist_ok=True) + + slayer = dfe.groupby('event').apply(lambda x: find_start_layer(x, 1.0)).astype(int) + dfe = dfe.merge(slayer.to_frame(name='slayer'), on='event') + # dfe.loc[:, 'eff_layer'] = dfe['layer'] - dfe['slayer'] + # prof = dfe[dfe['eff_layer'] > 0].groupby('eff_layer')['edep'].describe() + prof = dfe.groupby('layer')['efrac'].describe() + + # print(prof['mean']) + # plot profiles + bpos, bprops = (0.5, 0.95), dict(boxstyle='round', facecolor='wheat', alpha=0.3) + fig, ax = plt.subplots(figsize=(16, 9), dpi=160) + nev = len(dfe['event'].unique()) + ax.hist(dfe.groupby('event')['slayer'].min(), weights=[1/float(nev)]*nev, + ec='black', bins=np.arange(0.5, 10.5, step=1.0)) + ax.xaxis.set_major_locator(MaxNLocator(integer=True)) + ax.xaxis.set_minor_locator(MultipleLocator(1)) + ax.yaxis.set_minor_locator(MultipleLocator(1)) + ax.grid(linestyle=':', which='both') + ax.tick_params(labelsize=24) + ax.set_xlabel('Start Layer', fontsize=26) + ax.set_ylabel('Normalized Counts', fontsize=26) + ax.text(*bpos, 'Mininum Edep\n' + '{:.1f} MeV'.format(1.0), + transform=ax.transAxes, fontsize=26, verticalalignment='top', bbox=bprops) + fig.savefig(os.path.join(args.outdir, 'edep_start_{}_{}.png'.format(args.type, int(args.energy))) + + + fig, ax = plt.subplots(figsize=(16, 9), dpi=160) + ax.plot(prof.index, prof['mean'].values*100., color=args.color, lw=2) + # ax.fill_between(prof.index, prof['25%'], prof['75%'], color=args.color, alpha=0.3) + ax.fill_between(prof.index, (prof['mean'] - prof['std'])*100., (prof['mean'] + prof['std'])*100., + color=args.color, alpha=0.3) + ax.xaxis.set_major_locator(MaxNLocator(integer=True)) + ax.xaxis.set_minor_locator(MultipleLocator(1)) + ax.yaxis.set_minor_locator(MultipleLocator(1)) + ax.grid(linestyle=':', which='both') + ax.tick_params(labelsize=24) + ax.set_xlabel('Layer', fontsize=26) + ax.set_ylabel('Energy Deposit Percentage', fontsize=26) + fig.savefig(os.path.join(args.outdir, 'efrac_{}_{}.png'.format(args.type, int(args.energy))) + + layers = np.asarray([ + [1, 5, 8,], + [10, 15, 20], + ]) + + fig, ax = plt.subplots(*layers.shape, figsize=(16, 9), dpi=160, sharex='col', sharey='all', + gridspec_kw=dict(hspace=0.05, wspace=0.05)) + + for ax, layer in zip(ax.flat, layers.flatten()): + data = dfe[dfe['layer'] == layer] + ax.hist(data['efrac'].values*100., weights=[1/float(len(data))]*len(data), bins=np.linspace(0, 30, 60), + ec='black', color=args.color) + ax.tick_params(labelsize=24) + ax.xaxis.set_minor_locator(MultipleLocator(1)) + ax.grid(linestyle=':', which='both') + # ax.set_xlabel('Energy Deposit (MeV)', fontsize=26) + # ax.set_ylabel('Normalized Counts', fontsize=26) + mean, std = data.describe().loc[['mean', 'std'], 'edep'].values + label = 'Layer {}'.format(layer) + # + '\n' + r'$\mu = {:.3f}$ MeV'.format(mean) + # + '\n' + r'$\sigma = {:.3f}$ MeV'.format(std) + ax.text(*bpos, label, transform=ax.transAxes, fontsize=26, verticalalignment='top', bbox=bprops) + ax.set_axisbelow(True) + ax.set_yscale('log') + fig.text(0.5, 0.02, 'Energy Deposit Percentage', fontsize=26, ha='center') + fig.text(0.02, 0.5, 'Normalized Counts', fontsize=26, va='center', rotation=90) + fig.savefig(os.path.join(args.outdir, 'efrac_layers_{}_{}.png'.format(args.type, int(args.energy)))) + + + if args.save: + prof.loc[:, 'energy'] = args.energy*1000. + prof.loc[:, 'type'] = args.type + if os.path.exists(args.save): + prev = pd.read_csv(args.save).set_index('layer', drop=True) + prof = pd.concat([prof, prev]) + prof.to_csv(args.save) + diff --git a/benchmarks/sampling_ecal/scripts/epi_separation.py b/benchmarks/sampling_ecal/scripts/epi_separation.py new file mode 100644 index 0000000000000000000000000000000000000000..e552f21a8c4c7afdbb0189d09a3db3470d4f3433 --- /dev/null +++ b/benchmarks/sampling_ecal/scripts/epi_separation.py @@ -0,0 +1,100 @@ +''' + A script to use the energy profile for e-pi separation + It reads the output from the Juggler component ImagingClusterReco, which is supposed to be clusters of hits after + digitization, reconstruction, and clustering + + Author: Chao Peng (ANL) + Date: 04/30/2021 +''' + +import os +import numpy as np +import pandas as pd +import ROOT +from ROOT import gROOT, gInterpreter +import argparse +import matplotlib +from matplotlib import pyplot as plt +from matplotlib.ticker import MultipleLocator, MaxNLocator +import sys +from utils import * + + +def prepare_data(path, **kwargs): + tmp = get_layers_data(path, **kwargs) + tmp.loc[:, 'total_edep'] = tmp.groupby(['event', 'cluster'])['edep'].transform('sum') + tmp.loc[:, 'efrac'] = tmp['edep']/tmp['total_edep'] + # tmp = tmp[tmp['cluster'] == 0] + return tmp + + +def calc_chi2(grp, pr, lmin=5, lmax=12): + grp2 = grp[(grp['layer'] >= lmin) & (grp['layer'] <= lmax)] + mean, std = pr.loc[grp2['layer'], ['mean', 'std']].values.T*pr['energy'].mean() + edeps = grp2['edep'].values + return np.sqrt(np.sum((edeps - mean)**2/std**2)/float(len(edeps))) + + +# execute this script +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='epi_separation') + parser.add_argument('efile', type=str, help='path to root file (electrons)') + parser.add_argument('pifile', type=str, help='path to root file (pions)') + parser.add_argument('--prof', type=str, default='profile.csv', help='path to electron profile') + parser.add_argument('--plot-dir', type=str, default='./plots', dest='outdir', help='output directory') + parser.add_argument('-b', '--branch-name', type=str, default='EcalBarrelClustersLayers', dest='branch', + help='branch name in the root file (outputLayerCollection from ImagingClusterReco)') + parser.add_argument('-m', '--macros', type=str, default='rootlogon.C', dest='macros', + help='root macros to load (accept multiple paths separated by \",\")') + args = parser.parse_args() + + os.makedirs(args.outdir, exist_ok=True) + + + load_root_macros(args.macros) + # prepare data + dfe = prepare_data(args.efile, branch=args.branch) + dfpi = prepare_data(args.pifile, branch=args.branch) + + colors = ['royalblue', 'indianred', 'limegreen'] + prof = pd.read_csv(args.prof).set_index('layer', drop=True) + + # profile comparison + fig, ax = plt.subplots(figsize=(16, 9), dpi=160) + for title, color in zip(sorted(prof['type'].unique()), colors): + pr = prof[prof['type'] == title] + ax.plot(pr.index, pr['mean'], color=color, lw=2) + ax.fill_between(pr.index, (pr['mean'] - pr['std']), (pr['mean'] + pr['std']), + color=color, alpha=0.3, label=title) + ax.xaxis.set_major_locator(MaxNLocator(integer=True)) + ax.xaxis.set_minor_locator(MultipleLocator(1)) + ax.yaxis.set_minor_locator(MultipleLocator(1)) + ax.grid(linestyle=':', which='both') + ax.tick_params(labelsize=24) + ax.set_xlabel('Layer', fontsize=26) + ax.set_ylabel('Energy Deposit Percentage', fontsize=26) + ax.legend(fontsize=26, loc='upper left') + fig.savefig(os.path.join(args.outdir, 'compare_prof.png')) + + + # check profile + layer_range = (4, 12) + pre = prof[prof['type'].str.lower() == 'em'] + fig, ax = plt.subplots(figsize=(16, 9), dpi=160) + chi2 = dfe.groupby(['event', 'cluster']).apply(lambda x: calc_chi2(x, pre, *layer_range)) + ax.hist(chi2, bins=np.linspace(0, 5, 200), weights=[1/float(len(chi2))]*chi2, + ec='royalblue', color='royalblue', alpha=0.5, label='electrons') + chi2 = dfpi.groupby(['event', 'cluster']).apply(lambda x: calc_chi2(x, pre, *layer_range)) + # print(chi2[chi2 < 0.7]) + ax.hist(chi2, bins=np.linspace(0, 5, 200), weights=[1/float(len(chi2))]*chi2, + ec='indianred', color='indianred', alpha=0.5, label='pions') + ax.grid(linestyle=':', which='major') + ax.set_axisbelow(True) + ax.tick_params(labelsize=24) + ax.set_xlabel(r'$\chi^2$', fontsize=26) + ax.set_ylabel('Normalized Counts', fontsize=26) + # ax.set_yscale('log') + # ax.set_ylim(1e-3, 1.) + ax.legend(title=r'$\chi^2$ for $E_{{dep}}$ in layer {:d} - {:d}'.format(*layer_range), title_fontsize=28, fontsize=26) + fig.savefig(os.path.join(args.outdir, 'efrac_chi2.png')) + diff --git a/benchmarks/sampling_ecal/scripts/utils.py b/benchmarks/sampling_ecal/scripts/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..2e81c5088b6e25b4222f92feec933bf2ac1e0e1a --- /dev/null +++ b/benchmarks/sampling_ecal/scripts/utils.py @@ -0,0 +1,124 @@ +''' + A utility script to help the analysis of imaging calorimeter data + + Author: Chao Peng (ANL) + Date: 04/30/2021 +''' + +import os +import ROOT +import numpy as np +import pandas as pd +import matplotlib +import DDG4 +from ROOT import gROOT, gInterpreter + + +# helper function to truncate color map (for a better view from the rainbow colormap) +def truncate_colormap(cmap, minval=0.0, maxval=1.0, n=100): + new_cmap = matplotlib.colors.LinearSegmentedColormap.from_list( + 'trunc({n},{a:.2f},{b:.2f})'.format(n=cmap.name, a=minval, b=maxval), + cmap(np.linspace(minval, maxval, n))) + return new_cmap + + +# load root macros, input is an argument string +def load_root_macros(arg_macros): + for path in arg_macros.split(','): + path = path.strip() + if os.path.exists(path): + gROOT.Macro(path) + else: + print('\"{}\" does not exist, skip loading it.'.format(path)) + + +# read mc particles from root file +def get_mcp_data(path, evnums=None, branch='mcparticles2'): + f = ROOT.TFile(path) + events = f.events + if evnums is None: + evnums = np.arange(events.GetEntries()) + elif isinstance(evnums, int): + evnums = [evnums] + + dbuf = np.zeros(shape=(2000*len(evnums), 6)) + idb = 0 + for iev in evnums: + if iev >= events.GetEntries(): + print('Error: event {:d} is out of range (0 - {:d})'.format(iev, events.GetEntries() - 1)) + continue + + events.GetEntry(iev) + # extract full mc particle data + for part in getattr(events, branch): + dbuf[idb] = (iev, part.psx, part.psy, part.psz, part.pdgID, part.status) + idb += 1 + return pd.DataFrame(data=dbuf[:idb], columns=['event', 'px', 'py', 'pz', 'pid', 'status']) + + +# read hits data from root file +def get_hits_data(path, evnums=None, branch='EcalBarrelClustersLayers'): + f = ROOT.TFile(path) + events = f.events + if evnums is None: + evnums = np.arange(events.GetEntries()) + elif isinstance(evnums, int): + evnums = [evnums] + + dbuf = np.zeros(shape=(2000*len(evnums), 7)) + idb = 0 + for iev in evnums: + if iev >= events.GetEntries(): + print('Error: event {:d} is out of range (0 - {:d})'.format(iev, events.GetEntries() - 1)) + continue + + events.GetEntry(iev) + for layer in getattr(events, branch): + for k, hit in enumerate(layer.hits): + if k < layer.nhits: + dbuf[idb] = (iev, layer.clusterID, layer.layerID, hit.x, hit.y, hit.z, hit.E) + idb += 1 + + return pd.DataFrame(data=dbuf[:idb], columns=['event', 'cluster', 'layer', 'x', 'y', 'z', 'edep']) + + +# read layers data from root file +def get_layers_data(path, evnums=None, branch="EcalBarrelClustersLayers"): + f = ROOT.TFile(path) + events = f.events + if evnums is None: + evnums = np.arange(events.GetEntries()) + elif isinstance(evnums, int): + evnums = [evnums] + + dbuf = np.zeros(shape=(2000*len(evnums), 7)) + idb = 0 + for iev in evnums: + if iev >= events.GetEntries(): + print('Error: event {:d} is out of range (0 - {:d})'.format(iev, events.GetEntries() - 1)) + continue + + events.GetEntry(iev) + for layer in getattr(events, branch): + dbuf[idb] = (iev, layer.clusterID, layer.layerID, + layer.position.x, layer.position.y, layer.position.z, layer.edep) + idb += 1 + + return pd.DataFrame(data=dbuf[:idb], columns=['event', 'cluster', 'layer', 'x', 'y', 'z', 'edep']) + + +def compact_constants(path, names): + if not os.path.exists(path): + print('Cannot find compact file \"{}\".'.format(path)) + return [] + kernel = DDG4.Kernel() + description = kernel.detectorDescription() + kernel.loadGeometry("file:{}".format(path)) + try: + vals = [description.constantAsDouble(n) for n in names] + except: + print('Fail to extract values from {}, return empty.'.format(names)) + vals = [] + kernel.terminate() + return vals +