From 77dc1849334d5cf81ca204e7333f1df041cf6c74 Mon Sep 17 00:00:00 2001
From: Chao Peng <cpeng@anl.gov>
Date: Sat, 18 Apr 2020 13:26:46 -0500
Subject: [PATCH] add waveform analysis scripts

---
 scripts/mode1_analysis/analyze_waveform.py    |  96 ++++++++++++
 .../mode1_analysis/draw_waveform_events.py    | 141 ++++++++++++++++++
 src/esb_analyze.cpp                           |   1 -
 3 files changed, 237 insertions(+), 1 deletion(-)
 create mode 100644 scripts/mode1_analysis/analyze_waveform.py
 create mode 100644 scripts/mode1_analysis/draw_waveform_events.py

diff --git a/scripts/mode1_analysis/analyze_waveform.py b/scripts/mode1_analysis/analyze_waveform.py
new file mode 100644
index 0000000..7a90455
--- /dev/null
+++ b/scripts/mode1_analysis/analyze_waveform.py
@@ -0,0 +1,96 @@
+import ROOT
+import os
+import numpy as np
+import json
+import pandas as pd
+import argparse
+from matplotlib import pyplot as plt
+from scipy import signal
+from collections.abc import Iterable
+
+
+# fit pedestal
+def fit_pedestal(bk, outlier_thres=2.0, min_lengths=10):
+    ped_mean = np.average(bk)
+    # no need to calculate error
+    ped_err = np.std(bk)
+    if np.isclose(ped_err, 0.):
+        return ped_mean, 0.02*ped_mean
+    # exclude outliers
+    errs = np.abs(bk - ped_mean)/ped_err
+    outliers = errs > outlier_thres
+    if sum(outliers) > 0 and sum(~outliers) > min_lengths:
+        return fit_pedestal(bk[~outliers], outlier_thres, min_lengths)
+    return ped_mean, ped_err
+
+
+def get_peak(vals, thres=8.0, twindow=(0, 192), zero_thres=5.0):
+    if len(vals) < 1:
+        return 0, 0, 0, 0
+    # find peaks
+    peaks, peak_props = signal.find_peaks(vals, prominence=thres)
+    # fit pedestal
+    peak_intervals = pd.IntervalIndex.from_arrays(peak_props['left_bases'], peak_props['right_bases'])
+    ped_mean, ped_err = fit_pedestal(np.array(vals)[[~peak_intervals.contains(x).any() for x in np.arange(len(vals))]])
+    # check peak
+    peak, pos = 0, 0
+    for p, lside, rside in zip(peaks, peak_props['left_bases'], peak_props['right_bases']):
+        if p > twindow[1] or p < twindow[0]:
+            continue
+        height = vals[p] - ped_mean
+        if height < zero_thres*ped_err:
+            continue
+        width = rside - lside
+        l, r = max(0, lside - width), min(len(vals), rside + width)
+        valley = min(vals[l:r]) - ped_mean
+        if valley < 0 and abs(valley) >= height:
+            continue
+        if height > peak:
+            peak = height
+            pos = p
+    return peak, pos, ped_mean, ped_err
+
+
+def get_channels(json_path):
+    dbf = open(json_path)
+    db = json.load(dbf)
+    channels = dict()
+    for mod, mod_prop in db.items():
+        for ch in mod_prop['channels']:
+            channels[ch['name']] = ch['type']
+    return channels
+
+
+parser = argparse.ArgumentParser('Raw waveform analysis')
+
+parser.add_argument('root_file', help='a root file of waveform data')
+parser.add_argument('module_file', help='a json database of modules and channels')
+parser.add_argument('output_file', help='path to the output csv file')
+parser.add_argument('--calo-thres', dest='cal_thres', help='threshold of the calorimeter sum for the event, default 200.', type=float, default=200)
+parser.add_argument('--peak-thres', dest='pthres', help='sigma threshold for finding the peak, default 8', type=float, default=15)
+parser.add_argument('--peak-min', dest='pmin', help='lower limit of the peak adc value, default 10', type=int, default=10)
+parser.add_argument('--peak-max', dest='pmax', help='upper limit of the peak adc value, default 8000', type=int, default=8000)
+parser.add_argument('--time-min', dest='tmin', help='lower limit of the time window, default 0', type=int, default=0)
+parser.add_argument('--time-max', dest='tmax', help='upper limit of the time window, default 192 (max)', type=int, default=192)
+
+args = parser.parse_args()
+
+
+f = ROOT.TFile.Open(args.root_file, 'read')
+ch = get_channels(args.module_file)
+res = pd.DataFrame(columns=[c + '_peak' for c in list(ch)] + [c + '_pos' for c in list(ch)])
+
+count = 0
+for iev, ev in enumerate(f.EvTree):
+    if iev > 10000:
+        break
+    if iev % 100 == 0:
+        print('processed {}'.format(iev), end='\r')
+    for c, t in ch.items():
+        branch = ev.__getattr__(c + '_raw')
+        samples = np.ndarray((len(branch), ), dtype=np.int32, buffer=branch)
+        peak, pos, _ , _ = get_peak(samples, args.pthres, (args.tmin, args.tmax))
+        res.loc[iev, c + '_peak'] = peak
+        res.loc[iev, c + '_pos'] = pos
+
+res.to_csv(args.output_file)
diff --git a/scripts/mode1_analysis/draw_waveform_events.py b/scripts/mode1_analysis/draw_waveform_events.py
new file mode 100644
index 0000000..ea9f264
--- /dev/null
+++ b/scripts/mode1_analysis/draw_waveform_events.py
@@ -0,0 +1,141 @@
+import ROOT
+import os
+import numpy as np
+import pandas as pd
+import argparse
+from matplotlib import pyplot as plt
+from scipy import signal
+from collections.abc import Iterable
+
+
+
+# fit pedestal
+def fit_pedestal(bk, outlier_thres=2.0, min_lengths=10):
+    ped_mean = np.average(bk)
+    # no need to calculate error
+    if len(bk) < 5:
+        return ped_mean, 0.02*ped_mean
+    ped_err = np.std(bk)
+    # exclude outliers
+    errs = np.abs(bk - ped_mean)/ped_err
+    outliers = errs > outlier_thres
+    if sum(outliers) > 0 and sum(~outliers) > min_lengths:
+        return fit_pedestal(bk[~outliers], outlier_thres, min_lengths)
+    return ped_mean, ped_err
+
+
+def get_peak(vals, thres=8.0, twindow=(0, 192), zero_thres=5.0):
+    if len(vals) < 1:
+        return 0, 0, 0, 0
+    # find peaks
+    peaks, peak_props = signal.find_peaks(vals, prominence=thres)
+    # fit pedestal
+    peak_intervals = pd.IntervalIndex.from_arrays(peak_props['left_bases'], peak_props['right_bases'])
+    ped_mean, ped_err = fit_pedestal(np.array(vals)[[~peak_intervals.contains(x).any() for x in np.arange(len(vals))]])
+    # check peak
+    peak, pos = 0, 0
+    for p, lside, rside in zip(peaks, peak_props['left_bases'], peak_props['right_bases']):
+        if p > twindow[1] or p < twindow[0]:
+            continue
+        height = vals[p] - ped_mean
+        if height < zero_thres*ped_err:
+            continue
+        width = rside - lside
+        l, r = max(0, lside - width), min(len(vals), rside + width)
+        valley = min(vals[l:r]) - ped_mean
+        if valley < 0 and abs(valley) >= height:
+            continue
+        if height > peak:
+            peak = height
+            pos = p
+    return peak, pos, ped_mean, ped_err
+
+
+parser = argparse.ArgumentParser('Raw waveform analysis')
+
+parser.add_argument('root_file', help='a root file of waveform data')
+parser.add_argument('output_dir', help='Output directory')
+parser.add_argument('--max-plots', dest='max_plots', help='number of events to plot', type=int, default=10)
+parser.add_argument('--peak-thres', dest='pthres', help='sigma threshold for finding the peak, default 8', type=float, default=15)
+parser.add_argument('--peak-min', dest='pmin', help='lower limit of the peak adc value, default 10', type=int, default=10)
+parser.add_argument('--peak-max', dest='pmax', help='upper limit of the peak adc value, default 8000', type=int, default=8000)
+parser.add_argument('--time-min', dest='tmin', help='lower limit of the time window, default 0', type=int, default=0)
+parser.add_argument('--time-max', dest='tmax', help='upper limit of the time window, default 192 (max)', type=int, default=192)
+
+args = parser.parse_args()
+
+
+f = ROOT.TFile.Open(args.root_file, 'read')
+
+# alignment of channels
+figsize = (16, 16)
+nrows, ncols = 4, 4
+ch = [
+    'Cer11_5', 'Cer12_5', 'Cer13_5', 'Cer14_5',
+    'Cer21_5', 'Cer22_5', 'Cer23_5', 'Cer24_5',
+    'Cer31_5', 'Cer32_5', 'Cer33_5', 'Cer34_5',
+    'Cer41_5', 'Cer42_5', 'Cer43_5', 'Cer44_5',
+]
+
+pmt = [
+    'A', 'A', 'A', 'A',
+    'B', 'B', 'B', 'B',
+    'C', 'C', 'C', 'C',
+    'D', 'D', 'D', 'D',
+]
+
+pmt_colors = {
+    'A': ('lightcoral', 0.05),
+    'B': ('lightgreen', 0.05),
+    'C': ('moccasin', 0.05),
+    'D': ('lightskyblue', 0.05),
+}
+
+
+count = 0
+for iev, ev in enumerate(f.EvTree):
+    samples = dict()
+    pprops = dict()
+    psum = 0
+    fire = 0
+    for c in ch:
+        branch = ev.__getattr__(c + '_raw')
+        samples[c] = np.ndarray((len(branch), ), dtype=np.int32, buffer=branch)
+        peak, pos, ped_mean, ped_err = get_peak(samples[c], args.pthres, (args.tmin, args.tmax))
+        pprops[c] = (peak, pos, ped_mean, ped_err)
+        if peak > args.pmin and peak < args.pmax:
+            psum += peak
+            fire += 1
+
+    # plot
+    if fire > 0:
+        fig, axs = plt.subplots(nrows, ncols, figsize=figsize)
+        for (i, ax) in enumerate(axs.flat):
+            box = dict(boxstyle='round', facecolor='white', alpha=0.5)
+            ax.text(0.68, 0.83, ch[i], transform=ax.transAxes, fontsize=16, verticalalignment='top', bbox=box)
+            ax.patch.set_facecolor(pmt_colors[pmt[i]][0])
+            ax.patch.set_alpha(pmt_colors[pmt[i]][1])
+
+            sp = samples[ch[i]]
+            if len(sp) > 0:
+                pp = pprops[ch[i]]
+                ax.plot(np.arange(len(sp)), sp, label='Waveform Data')
+                ax.axhline(y=pp[2], color='red', label='Pedestal Mean')
+                xr = ax.get_xlim()
+                ax.fill_between(xr, pp[2]-pp[3], pp[2]+pp[3], color='red', alpha=0.25, label='Pedestal Error')
+                ax.set_xlim(xr[0], xr[1])
+                ax.axvline(x=args.tmin, color='k', linestyle=':', label='Time Window')
+                ax.axvline(x=args.tmax, color='k', linestyle=':')
+                if (pp[0] > args.pmin):
+                    ax.plot(pp[1], pp[0] + pp[2], 'kv', label='Peak')
+                lines, labels = ax.get_legend_handles_labels()
+
+        fig.tight_layout(rect=(0.03, 0.03, 0.99, 0.95))
+        fig.text(0.5, 0.02, 'Sample', ha='center', fontsize=18)
+        fig.text(0.02, 0.5, 'ADC Value', ha='center', rotation=90, fontsize=18)
+        fig.legend(lines, labels, loc = 'upper center', ncol=5, fontsize=18)
+        fig.savefig(os.path.join(args.output_dir, 'sample_ev{}.png'.format(iev)))
+        count += 1
+        if count > args.max_plots:
+            break
+
diff --git a/src/esb_analyze.cpp b/src/esb_analyze.cpp
index 6936802..a00ce4d 100644
--- a/src/esb_analyze.cpp
+++ b/src/esb_analyze.cpp
@@ -162,7 +162,6 @@ int main(int argc, char* argv[])
 
 std::vector<Module> read_modules(const std::string &path)
 {
-
     // read in file
     std::string buffer = ConfigParser::file_to_string(path);
 
-- 
GitLab