From 04151d5e3dc6849f65f43bc2b17767f2509e2219 Mon Sep 17 00:00:00 2001
From: Wouter Deconinck <wouter.deconinck@umanitoba.ca>
Date: Wed, 28 Jul 2021 05:48:25 +0000
Subject: [PATCH] Part5 analysis docs

---
 src/config/sidebar.yml                    |   6 +-
 src/docs/index.mdx                        |   3 +
 src/docs/part5/reconstruction_analysis.md | 304 ++++++++++++++++++++++
 3 files changed, 311 insertions(+), 2 deletions(-)
 create mode 100644 src/docs/part5/reconstruction_analysis.md

diff --git a/src/config/sidebar.yml b/src/config/sidebar.yml
index a7dfe19..7b481e2 100644
--- a/src/config/sidebar.yml
+++ b/src/config/sidebar.yml
@@ -24,8 +24,10 @@
       link: '/part2/adding_detectors'
     - label: 'Part 3: Digitization and Reconstruction'
       link: '/part3/running_juggler'
-        #    - label: 'Part 4: Sensitive Detectors'
-        #      link: '/part4/sensitive_detector'
+#    - label: 'Part 4: Sensitive Detectors'
+#      link: '/part4/sensitive_detector'
+    - label: 'Part 5: Analysis'
+      link: '/part5/reconstruction_analysis'
 - label: References
   items:
     - label: 'DD4hep Reference Documentation'
diff --git a/src/docs/index.mdx b/src/docs/index.mdx
index a37f91d..2677951 100644
--- a/src/docs/index.mdx
+++ b/src/docs/index.mdx
@@ -21,6 +21,9 @@ together to get some results and serve as a jumping off point for further develo
 
 - [Part 3](part3/running_juggler) - Running Juggler  reconstruction
 
+- Part 4 - Creating sensitive detectors (under development)
+
+- [Part 5](part5/reconstruction_analysis) - Analysis of the reconstruction output
 
 ## Background Discussion
 
diff --git a/src/docs/part5/reconstruction_analysis.md b/src/docs/part5/reconstruction_analysis.md
new file mode 100644
index 0000000..774b9cb
--- /dev/null
+++ b/src/docs/part5/reconstruction_analysis.md
@@ -0,0 +1,304 @@
+---
+title: 'Reconstruction Analysis'
+---
+
+This part of the tutorial outlines how reconstruction output from full simulations can be used for analysis by physics working groups.
+
+At the end of this tutorial you should able to:
+- determine the location of relevant full simulation reconstruction files,
+- understand the file structure of the full simulation reconstruction output,
+- use one of several approaches to perform low-level analysis on the reconstruction output.
+
+## Location of full simulation reconstruction output
+
+Full simulation outputs are stored on one of several locations:
+- S3: https://dtn01.sdcc.bnl.gov:9000/minio/eictest/ATHENA/ (web interface)
+- xrootd: root://sci-xrootd.jlab.org//osgpool/eic/ATHENA/ (no preview currently available)
+
+Data at these location is organized in a predicatable structure:
+- `EVGEN/` contains all initial generated events (in hepmc3 or other format),
+- `FULL/` contains the raw full simulation output without any reconstruction,
+- `RECO/` contains the reconstruction output.
+
+Under the different top-level directories, you can find the different physics processes:
+- `SINGLE/` contains single particle initial states,
+- `DIS/` contains DIS events.
+
+For details on accessing these locations, please refer to https://doc.athena-eic.org/en/latest/howto/.
+
+For the purpose of this tutorial we will use the S3 interface since it does not rely on a mirroring process at Jefferson Lab.
+
+## File structure of the full simulation reconstruction output
+
+Each reconstruction output file has essentially the same structure, defined by the EIC Data Model. This structure can be retrieved with `rootls`, e.g.
+```console
+root -l s3https://dtn01.sdcc.bnl.gov:9000/eictest/ATHENA/RECO/SINGLE/pi+/1GeV/45to135deg/pi+_1GeV_45to135deg.0001.root
+```
+which produces the following output, two trees:
+```console
+events  metadata
+```
+
+The `events` tree is of course what we are interested in. We can explore its top-level structure as follows. We first start a ROOT session:
+```console
+root -l s3https://dtn01.sdcc.bnl.gov:9000/eictest/ATHENA/RECO/SINGLE/pi+/1GeV/45to135deg/pi+_1GeV_45to135deg.0001.root
+```
+and then list the top-level branches in the `events` tree:
+```console
+events->Print("toponly")
+```
+This will display a large number of branches (and their size):
+```console
+******************************************************************************
+*Tree    :events    : Events tree                                            *
+*Entries :  1000002 : Total =     34023140832 bytes  File  Size = 8907188433 *
+*        :          : Tree compression factor =   3.82                       *
+******************************************************************************
+branch: mcparticles2         471802624
+branch: TrackerBarrelHits2   184032064
+branch: ReconstructedParticles   2106267
+branch: ReconstructedParticles#0   1447954
+branch: ReconstructedParticles#1   1447954
+branch: ReconstructedParticles#2   1447954
+branch: ReconstructedParticles#3   1447954
+branch: EcalBarrelHitsSimpleDigi  93024680
+branch: EcalBarrelHitsSimpleReco 142564938
+branch: EcalBarrelHitsSimpleReco#0   6351262
+branch: EcalEndcapNHitsDigi   17873069
+branch: EcalEndcapNHitsReco    9750180
+branch: EcalEndcapNHitsReco#0   2294175
+branch: EcalEndcapNClusterHits   3686937
+branch: EcalEndcapNClusterHits#0   1594591
+branch: EcalEndcapNClusters    4128541
+branch: EcalEndcapNClusters#0   1444440
+branch: EcalEndcapNClusters#1   1444440
+branch: EcalEndcapPHitsDigi   43298624
+branch: EcalEndcapPHitsReco   51470328
+branch: EcalEndcapPHitsReco#0   3366251
+branch: EcalEndcapPHitsRecoXY  48939215
+branch: EcalEndcapPHitsRecoXY#0   3383190
+branch: EcalEndcapPClusterHits   6539903
+branch: EcalEndcapPClusterHits#0   1633199
+branch: EcalEndcapPClusters    4494705
+branch: EcalEndcapPClusters#0   1444440
+branch: EcalEndcapPClusters#1   1444440
+branch: EcalBarrelHitsDigi    68681726
+branch: EcalBarrelHitsReco   441682325
+branch: EcalBarrelClusterHits  22471306
+branch: EcalBarrelLayers       7351806
+branch: EcalBarrelLayers#0     1441221
+branch: EcalBarrelClusters     7131448
+branch: EcalBarrelClusters#0   1443270
+branch: EcalBarrelClusters#1   1443270
+branch: EcalBarrelScFiHitsDigi 1636334484
+branch: EcalBarrelScFiHitsReco 3932534929
+branch: EcalBarrelScFiHitsReco#0  14507111
+branch: EcalBarrelScFiGridReco 815861620
+branch: EcalBarrelScFiGridReco#0   9424066
+branch: EcalBarrelScFiClusterHits 396488274
+branch: EcalBarrelScFiClusterHits#0   6066163
+branch: EcalBarrelScFiClusters  41044730
+branch: EcalBarrelScFiClusters#0   1447954
+branch: EcalBarrelScFiClusters#1   1447954
+branch: HcalBarrelHitsDigi    13328585
+branch: HcalBarrelHitsReco    15260047
+branch: HcalBarrelHitsReco#0   2425669
+branch: HcalBarrelHitsRecoXY  13434597
+branch: HcalBarrelHitsRecoXY#0   2410125
+branch: HcalBarrelClusterHits   2352864
+branch: HcalBarrelClusterHits#0   1451387
+branch: HcalBarrelClusters     2687875
+branch: HcalBarrelClusters#0   1443270
+branch: HcalBarrelClusters#1   1443270
+branch: HcalElectronEndcapHitsDigi   5707858
+branch: HcalElectronEndcapHitsReco   8514858
+branch: HcalElectronEndcapHitsReco#0   1928251
+branch: HcalElectronEndcapHitsRecoXY   7773148
+branch: HcalElectronEndcapHitsRecoXY#0   1922562
+branch: HcalElectronEndcapClusterHits   2375296
+branch: HcalElectronEndcapClusterHits#0   1459130
+branch: HcalElectronEndcapClusters   2726638
+branch: HcalElectronEndcapClusters#0   1452637
+branch: HcalElectronEndcapClusters#1   1452637
+branch: HcalHadronEndcapHitsDigi   4112230
+branch: HcalHadronEndcapHitsReco   5003312
+branch: HcalHadronEndcapHitsReco#0   1689929
+branch: HcalHadronEndcapHitsRecoXY   4723184
+branch: HcalHadronEndcapHitsRecoXY#0   1689633
+branch: HcalHadronEndcapClusterHits   2328711
+branch: HcalHadronEndcapClusterHits#0   1454456
+branch: HcalHadronEndcapClusters   2690491
+branch: HcalHadronEndcapClusters#0   1450880
+branch: HcalHadronEndcapClusters#1   1450880
+branch: TrackerBarrelRawHits  24139635
+branch: TrackerEndcapRawHits   3166129
+branch: VertexBarrelRawHits   29242781
+branch: VertexEndcapRawHits    2219822
+branch: TrackerBarrelRecHits  64929921
+branch: TrackerEndcapRecHits   6218655
+branch: VertexBarrelRecHits   75481339
+branch: VertexEndcapRecHits    3798116
+branch: ReconstructedParticlesInitFromTruth   1958375
+branch: outputTrackParameters   2035687
+branch: ForwardRICHHits2      20341908
+branch: ForwardRICHHitsDigi    1500666
+branch: ForwardRICHHitsReco    1858812
+branch: EcalEndcapNHits        2255311
+branch: EcalEndcapPHits        2255311
+branch: EcalBarrelHits         2248873
+branch: EcalBarrelScFiHits     2272009
+branch: HcalBarrelHits         2248873
+branch: HcalHadronEndcapHits   2280802
+branch: HcalElectronEndcapHits   2291933
+branch: TrackerEndcapHits      2558412
+branch: TrackerBarrelHits      2558412
+branch: VertexBarrelHits       2554303
+branch: VertexEndcapHits       2554303
+branch: ForwardRICHHits        2544344
+```
+
+During the development of the reconstruction, there are more branches enabled here than are strictly necessary (e.g. simulated and digitized hits, intermediate reconstruction parameters). These are all available for analysis (with fixed interfaces). In this tutorial we will focus on a few branches in particular:
+- ReconstructedParticles: contains the results from track finding and fitting,
+- EcalBarrelClusters: contains the results from the barrel Ecal cluster finding,
+- EcalBarrelScFiClusters: contains the results from the barrel Ecal ScFi cluster finding.
+
+We can inspect each of these three branches in more detail (some information removed for formatting)
+```console
+root [14] events->Print("ReconstructedParticles*")
+******************************************************************************
+*Tree    :events    : Events tree                                            *
+*Entries :  1000002 : Total =     34023140832 bytes  File  Size = 8907188433 *
+*        :          : Tree compression factor =   3.82                       *
+******************************************************************************
+*Br    0 :ReconstructedParticles : Int_t ReconstructedParticles_             *
+*Br    1 :ReconstructedParticles.pid : Long64_t pid[ReconstructedParticles_] *
+*Br    2 :ReconstructedParticles.energy :                                    *
+*         | Double_t energy[ReconstructedParticles_]                         *
+*Br    3 :ReconstructedParticles.p.x : Double_t x[ReconstructedParticles_]   *
+*Br    4 :ReconstructedParticles.p.y : Double_t y[ReconstructedParticles_]   *
+*Br    5 :ReconstructedParticles.p.z : Double_t z[ReconstructedParticles_]   *
+*Br    6 :ReconstructedParticles.charge :                                    *
+*         | Double_t charge[ReconstructedParticles_]                         *
+*Br    7 :ReconstructedParticles.mass :                                      *
+*         | Double_t mass[ReconstructedParticles_]                           *
+*Br    8 :ReconstructedParticles.clusters_begin :                            *
+*         | UInt_t clusters_begin[ReconstructedParticles_]                   *
+*Br    9 :ReconstructedParticles.clusters_end :                              *
+*         | UInt_t clusters_end[ReconstructedParticles_]                     *
+*Br   10 :ReconstructedParticles.tracks_begin :                              *
+*         | UInt_t tracks_begin[ReconstructedParticles_]                     *
+*Br   11 :ReconstructedParticles.tracks_end :                                *
+*         | UInt_t tracks_end[ReconstructedParticles_]                       *
+*Br   12 :ReconstructedParticles.particles_begin :                           *
+*         | UInt_t particles_begin[ReconstructedParticles_]                  *
+*Br   13 :ReconstructedParticles.particles_end :                             *
+*         | UInt_t particles_end[ReconstructedParticles_]                    *
+*............................................................................*
+
+root [6] events->Print("EcalBarrelClusters*")
+******************************************************************************
+*Tree    :events    : Events tree                                            *
+*Entries :   500002 : Total =     22026619265 bytes  File  Size = 6339571464 *
+*        :          : Tree compression factor =   3.47                       *
+******************************************************************************
+*Br    0 :EcalBarrelClusters : Int_t EcalBarrelClusters_                     *
+*Br    1 :EcalBarrelClusters.clusterID : Int_t clusterID[EcalBarrelClusters_]*
+*Br    2 :EcalBarrelClusters.nhits : Int_t nhits[EcalBarrelClusters_]        *
+*Br    3 :EcalBarrelClusters.energy : Double_t energy[EcalBarrelClusters_]   *
+*Br    4 :EcalBarrelClusters.edep : Double_t edep[EcalBarrelClusters_]       *
+*Br    5 :EcalBarrelClusters.radius : Double_t radius[EcalBarrelClusters_]   *
+*Br    6 :EcalBarrelClusters.skewness :                                      *
+*         | Double_t skewness[EcalBarrelClusters_]                           *
+*Br    7 :EcalBarrelClusters.leakcorr :                                      *
+*         | Double_t leakcorr[EcalBarrelClusters_]                           *
+*Br    8 :EcalBarrelClusters.eta : Double_t eta[EcalBarrelClusters_]         *
+*Br    9 :EcalBarrelClusters.position.x : Double_t x[EcalBarrelClusters_]    *
+*Br   10 :EcalBarrelClusters.position.y : Double_t y[EcalBarrelClusters_]    *
+*Br   11 :EcalBarrelClusters.position.z : Double_t z[EcalBarrelClusters_]    *
+*Br   12 :EcalBarrelClusters.polar.r : Double_t r[EcalBarrelClusters_]       *
+*Br   13 :EcalBarrelClusters.polar.theta :                                   *
+*         | Double_t theta[EcalBarrelClusters_]                              *
+*Br   14 :EcalBarrelClusters.polar.phi : Double_t phi[EcalBarrelClusters_]   *
+*Br   15 :EcalBarrelClusters.cl_theta :                                      *
+*         | Double_t cl_theta[EcalBarrelClusters_]                           *
+*Br   16 :EcalBarrelClusters.cl_phi : Double_t cl_phi[EcalBarrelClusters_]   *
+*Br   17 :EcalBarrelClusters.hits_begin :                                    *
+*         | UInt_t hits_begin[EcalBarrelClusters_]                           *
+*Br   18 :EcalBarrelClusters.hits_end : UInt_t hits_end[EcalBarrelClusters_] *
+*Br   19 :EcalBarrelClusters.layers_begin :                                  *
+*         | UInt_t layers_begin[EcalBarrelClusters_]                         *
+*Br   20 :EcalBarrelClusters.layers_end :                                    *
+*         | UInt_t layers_end[EcalBarrelClusters_]                           *
+*............................................................................*
+```
+
+The ReconstructedParticles branch contains the momentum, e.g. `ReconstructedParticles.p.x` for the x-component, and references to other entities related to this particle (clusters, tracks, particles).
+
+Note: Due to conditions that are currently being addressed, some momentum vectors use `p.x` whereas others use `p.px`.
+
+## Analysis of full simulation reconstruction output with traditional ROOT commands
+
+After opening the reconstruction output file, let's make some pretty plots. We start with:
+```console
+root -l s3https://dtn01.sdcc.bnl.gov:9000/eictest/ATHENA/RECO/SINGLE/pi+/1GeV/45to135deg/pi+_1GeV_45to135deg.0001.root
+```
+and in the ROOT session:
+```console
+root [1] .ls
+TNetXNGFile**           root://sci-xrootd.jlab.org//osgpool/eic/ATHENA/RECO/SINGLE/pi+/1GeV/45to135deg/pi+_1GeV_45to135deg.0001.root    data file
+ TNetXNGFile*           root://sci-xrootd.jlab.org//osgpool/eic/ATHENA/RECO/SINGLE/pi+/1GeV/45to135deg/pi+_1GeV_45to135deg.0001.root    data file
+  KEY: TTree    events;1        Events tree
+  KEY: TTree    metadata;1      Metadata tree
+events->Draw("ReconstructedParticlesInitFromTruth.p.px")
+events->Draw("EcalBarrelClusters.polar.phi:EcalBarrelClusters.polar.theta", "EcalBarrelClusters.edep", "colz")
+```
+These types of plots will likely be limited to simple data inspection.
+
+## Analysis of full simulation reconstruction output with RDataFrame commands
+
+For a more advanced analysis, you can take advantage of the RDataFrame features, such as in this (shortened) DIS example. The [original](https://eicweb.phy.anl.gov/EIC/benchmarks/physics_benchmarks/-/blob/master/benchmarks/dis/analysis/dis_electrons.cxx) is used in our CI system and determines DIS parameters for every change to the detector geometry, simulation, digitization, or reconstruction.
+```console
+auto momenta_from_reconstruction(const std::vector<eic::ReconstructedParticleData>& parts) {
+  std::vector<ROOT::Math::PxPyPzEVector> momenta{parts.size()};
+  std::transform(parts.begin(), parts.end(), momenta.begin(), [](const auto& part) {
+    return ROOT::Math::PxPyPzEVector{part.p.x, part.p.y, part.p.z, part.energy};
+  });
+  return momenta;
+}
+
+auto convertMtoE(const std::vector<ROOT::Math::PxPyPzMVector>& mom) {
+  std::vector<ROOT::Math::PxPyPzEVector> momenta{mom.size()};
+  std::transform(mom.begin(), mom.end(), momenta.begin(), [](const auto& part) {
+    return ROOT::Math::PxPyPzEVector{part.Px(), part.Py(), part.Pz(), part.energy()};
+  });
+  return momenta;
+}
+
+bool sort_mom_bool(ROOT::Math::PxPyPzEVector &mom1, ROOT::Math::PxPyPzEVector &mom2) {
+  return  mom1.energy() > mom2.energy(); 
+}
+
+auto sort_momenta(const std::vector<ROOT::Math::PxPyPzEVector>& mom) {
+  std::vector <ROOT::Math::PxPyPzEVector> sort_mom = mom;
+  sort(sort_mom.begin(), sort_mom.end(), sort_mom_bool);
+  return sort_mom;
+}
+
+auto Q2(const std::vector<ROOT::Math::PxPyPzEVector>& mom) {
+  std::vector<double> Q2Vec(mom.size() );
+  ROOT::Math::PxPyPzEVector beamMom = {0, 0, 5, 5};
+  std::transform(mom.begin(), mom.end(), Q2Vec.begin(), [beamMom](const auto& part) {
+    return -(part - beamMom).M2();
+  });
+  return Q2Vec;
+}
+
+ROOT::RDataFrame d("events", "s3https://dtn01.sdcc.bnl.gov:9000/eictest/ATHENA/RECO//DIS/crossDivNrgCrab/DIS_NC_Q2gt10_crossDivNrgCrab_25mRad_5x41_v2.root");
+
+auto d0 = d.Define("p", momenta_from_reconstruction, {"ReconstructedParticles"}).Define("Q2", Q2, {"p"});
+
+auto h_Q2_sim = d0.Histo1D({"h_Q2_sim", "; GeV; counts", 100, -5, 25}, "Q2");
+auto& h1_Q2_sim = *h_Q2_sim;
+h1_Q2_sim.DrawClone("hist");
+
+```
-- 
GitLab