diff --git a/CMakeLists.txt b/CMakeLists.txt index d8d743f7ff65307c10d1849c2413193aa1ea2f80..a105413e8fdaee4db21193cb5c82b7e586f0ed25 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -49,6 +49,21 @@ if(NOT CMAKE_BUILD_TYPE MATCHES Release) add_compile_options(-Wall -Wextra -Werror) endif() +# Following check is SYCL Implementation specific (Intel OneAPI) +# OneAPI sets an env variable after its environment is initialized, +# which we use to detect presence of the DPCPP SYCL compiler +option(USE_SYCL "Enable SYCL support" OFF) +if(USE_SYCL) + if(DEFINED ENV{SETVARS_COMPLETED}) + add_compile_definitions(USE_SYCL) + find_package(IntelDPCPP REQUIRED) # Requires CMAKE_PREFIX_PATH setup by environment + elseif(EXISTS "/opt/intel/oneapi/compiler/latest/linux/bin/dpcpp") + message(FATAL_ERROR "OneAPI environment not initalized! source /opt/intel/oneapi/setvars.sh before proceeding") + else() + message(WARNING "OneAPI environment not found, not using SYCL") + endif() +endif() + find_package(Microsoft.GSL CONFIG) find_package(EDM4EIC REQUIRED) diff --git a/JugReco/CMakeLists.txt b/JugReco/CMakeLists.txt index 748d0b2ee7652e1f19675a59c36c278b50ba34ec..c6aa4df4a6e89f618fa197119fa4b77797e40e4c 100644 --- a/JugReco/CMakeLists.txt +++ b/JugReco/CMakeLists.txt @@ -23,4 +23,8 @@ target_include_directories(JugRecoPlugins PUBLIC $ $) +if(USE_SYCL) + target_include_directories(JugRecoPlugins PUBLIC ${SYCL_INCLUDE_DIR}) +endif() + target_compile_options(JugRecoPlugins PRIVATE -Wno-suggest-override) diff --git a/JugReco/src/components/CalorimeterIslandCluster.cpp b/JugReco/src/components/CalorimeterIslandCluster.cpp index 3a3ab6d9687ace50c157df4c09b78b84f5841161..5ef67a8cd289c0e5d8f63de4a20e4874d3e4d6c1 100644 --- a/JugReco/src/components/CalorimeterIslandCluster.cpp +++ b/JugReco/src/components/CalorimeterIslandCluster.cpp @@ -16,6 +16,7 @@ #include #include "fmt/format.h" +#include "fmt/ranges.h" #include "Gaudi/Property.h" #include "GaudiAlg/GaudiAlgorithm.h" @@ -40,6 +41,11 @@ #include "edm4hep/Vector3f.h" #include "edm4hep/Vector2f.h" +#ifdef USE_SYCL + #include + #include +#endif + #if defined __has_include # if __has_include ("edm4eic/Vector3f.h") # include "edm4eic/Vector3f.h" @@ -117,9 +123,324 @@ static std::map& lpos, + const sycl::accessor& gpos, + const sycl::accessor& dims, + const sycl::accessor& sectors, + const sycl::accessor& secDist, + const sycl::accessor& neighDist, + sycl::id<1> hit1, int hit2, int hitDist_type) { + + if(sectors[hit1] != sectors[hit2]) { + auto delta = gpos[hit1] - gpos[hit2]; + return (sycl::length(delta) <= secDist[0]); + } else { + auto delta = lpos[hit1] - lpos[hit2]; + switch(hitDist_type){ + case 0: { + // LocalXY + return (delta.x() <= neighDist[0] && delta.y() <= neighDist[1]); + break; + } + case 1: { + // LocalXZ + return (delta.x() <= neighDist[0] && delta.z() <= neighDist[1]); + break; + } + case 2: { + // LocalYZ + return (delta.y() <= neighDist[0] && delta.z() <= neighDist[1]); + break; + } + case 3: { + // dimScaleLocalXY + auto dimsum = dims[hit1] + dims[hit2]; + return ((2*delta.x()/dimsum.x()) <= neighDist[0] && (2*delta.y()/dimsum.y()) <= neighDist[1]); + break; + } + case 4: { + // globalRPhi + double r = sycl::length(gpos[hit1]) - sycl::length(gpos[hit2]); + double phi = sycl::atan2(gpos[hit1].y(), gpos[hit1].x()) - sycl::atan2(gpos[hit2].y(), gpos[hit2].x()); + return (r <= neighDist[0] && phi <= neighDist[1]); + break; + } + case 5: { + // globalEtaPhi + double ang_polar = sycl::atan2(sycl::hypot(gpos[hit1].x(), gpos[hit1].y()), gpos[hit1].z()); + double eta = -sycl::log(sycl::tan(0.5 * ang_polar)); + double phi = sycl::atan2(gpos[hit1].y(), gpos[hit1].x()) - sycl::atan2(gpos[hit2].y(), gpos[hit2].x()); + return (eta <= neighDist[0] && phi <= neighDist[1]); + } + } + } + + } + + void parallel_group(std::vector>>& groups, + const CaloHitCollection& hits, std::array& neighbourDist, + double minClusterHitEdep, double sectorDist) { + // Corner Cases + if(hits.size() <= 0) return; + + // Host memory + // + // Get location data from hits + std::vector sectors; + std::vector energy; + std::vector> lpos, gpos, dims; + + // Neighbour Indices + std::vector nidx (hits.size()); + + // Can't filter out hits here as index numbers will not match + for(size_t i = 0; i < hits.size(); i++) { + lpos.emplace_back(hits[i].getLocal().x, hits[i].getLocal().y, hits[i].getLocal().z); + gpos.emplace_back(hits[i].getPosition().x, hits[i].getPosition().y, hits[i].getPosition().z); + dims.emplace_back(hits[i].getDimension().x, hits[i].getDimension().y, hits[i].getDimension().z); + energy.emplace_back(hits[i].getEnergy()); + sectors.emplace_back(hits[i].getSector()); + } + + { + // Device memory + sycl::buffer nidx_buf (nidx.data(), sycl::range<1>(nidx.size())); + sycl::buffer minClusterHitEdep_buf (&minClusterHitEdep, sycl::range<1>(1)); + sycl::buffer energy_buf (energy.data(), sycl::range<1>(energy.size())); + + sycl::buffer lpos_buf (lpos.data(), sycl::range<1>(lpos.size())); + sycl::buffer gpos_buf (gpos.data(), sycl::range<1>(gpos.size())); + sycl::buffer dims_buf (dims.data(), sycl::range<1>(dims.size())); + + sycl::buffer sectors_buf (sectors.data(), sycl::range<1>(sectors.size())); + sycl::buffer sectorDist_buf (§orDist, sycl::range<1>(1)); + sycl::buffer neighbourDist_buf (neighbourDist.data(), sycl::range<1>(neighbourDist.size())); + + try { + // Initalize Neighbour Indices + queue.submit([&](sycl::handler& h) { + auto nidx_acc = nidx_buf.get_access(h); + h.parallel_for(sycl::range<1>(hits.size()), [=](sycl::id<1> idx) { + nidx_acc[idx] = idx; + }); + }); + + /** + * @brief Assign current vertex id (idx) to neighbour if idx < id + * held at neighbour index. Emulates sequential assignment of + * clusters by DFS in parallel. + */ + queue.submit([&](sycl::handler& h) { + auto nidx_acc = nidx_buf.get_access(h); + auto minClusterHitEdep_acc = minClusterHitEdep_buf.get_access(h); + auto energy_acc = energy_buf.get_access(h); + + int hitsDist_type = static_cast(sHitsDist); + auto lpos_acc = lpos_buf.get_access(h); + auto gpos_acc = gpos_buf.get_access(h); + auto dims_acc = dims_buf.get_access(h); + + auto sectors_acc = sectors_buf.get_access(h); + auto sectorDist_acc = sectorDist_buf.get_access(h); + auto neighbourDist_acc = neighbourDist_buf.get_access(h); + + h.parallel_for(sycl::range<1>(hits.size()), [=](sycl::id<1> idx) { + // not a qualified hit - skip + if(energy_acc[idx] < minClusterHitEdep_acc[0]){ + return; + } + + for(size_t i = 0; i < nidx_acc.size(); i++) { + + if(energy_acc[i] < minClusterHitEdep_acc[0]) continue; + + if(!Jug::Reco::is_neighbour(lpos_acc,gpos_acc,dims_acc,sectors_acc, + sectorDist_acc,neighbourDist_acc,idx,i,hitsDist_type)){ + continue; + } + + // Atomic exchange of min element between current neighbour index nidx_acc[i] and idx + nidx_acc[i].fetch_min(idx); + + } + + }); + }).wait_and_throw(); + } catch(sycl::exception e) { + std::cerr << "Caught SYCL Exception: " << e.what() << "\n"; + } + + } // Sync Device and Host memory + + if(is_debug){ + fmt::print("Parallel grouping results are:\n{}\n",fmt::join(nidx," ")); + } + + // Emplace index array into groups for further processing + std::unordered_map>> grp_map; + for(size_t i = 0; i < hits.size(); i++) { + grp_map[nidx[i]].emplace_back(i, hits[i]); + } + for(auto i : grp_map) { + groups.emplace_back(i.second); + } + + } + + std::vector + parallel_find_maxima(const std::vector>& group, + std::array& neighbourDist, + double minClusterCenterEdep, double sectorDist, bool global = false) { + std::vector maxima; + + if (group.empty()) { + return maxima; + } + + if (global) { + int mpos = 0; + for (size_t i = 0; i < group.size(); ++i) { + if (group[mpos].second.getEnergy() < group[i].second.getEnergy()) { + mpos = i; + } + } + if (group[mpos].second.getEnergy() >= minClusterCenterEdep) { + maxima.push_back(group[mpos].second); + } + return maxima; + } + + // Prep Host memory for device offload + std::vector energy; + std::vector max_idx (group.size()); + + // Get location data from hits + std::vector sectors; + std::vector> lpos, gpos, dims; + + for (const auto& [idx, hit] : group){ + lpos.emplace_back(hit.getLocal().x, hit.getLocal().y, hit.getLocal().z); + gpos.emplace_back(hit.getPosition().x, hit.getPosition().y, hit.getPosition().z); + dims.emplace_back(hit.getDimension().x, hit.getDimension().y, hit.getDimension().z); + energy.push_back(hit.getEnergy()); + sectors.push_back(hit.getSector()); + } + + // Device memory + { + + sycl::buffer energy_buf (energy.data(), sycl::range<1>(energy.size())); + sycl::buffer max_idx_buf (max_idx.data(), sycl::range<1>(max_idx.size())); + sycl::buffer minClusterCenterEdep_buf (&minClusterCenterEdep, sycl::range<1>(1)); + + int hitsDist_type = static_cast(sHitsDist); + sycl::buffer lpos_buf (lpos.data(), sycl::range<1>(lpos.size())); + sycl::buffer gpos_buf (gpos.data(), sycl::range<1>(gpos.size())); + sycl::buffer dims_buf (dims.data(), sycl::range<1>(dims.size())); + + sycl::buffer sectors_buf (sectors.data(), sycl::range<1>(sectors.size())); + sycl::buffer sectorDist_buf (§orDist, sycl::range<1>(1)); + sycl::buffer neighbourDist_buf (neighbourDist.data(), sycl::range<1>(neighbourDist.size())); + + try { + queue.submit([&](sycl::handler& h){ + auto energy_acc = energy_buf.get_access(h); + auto max_idx_acc = max_idx_buf.get_access(h); + auto minClusterCenterEdep_acc = minClusterCenterEdep_buf.get_access(h); + + auto lpos_acc = lpos_buf.get_access(h); + auto gpos_acc = gpos_buf.get_access(h); + auto dims_acc = dims_buf.get_access(h); + + auto sectors_acc = sectors_buf.get_access(h); + auto sectorDist_acc = sectorDist_buf.get_access(h); + auto neighbourDist_acc = neighbourDist_buf.get_access(h); + + h.parallel_for(sycl::range<1>(group.size()), [=](sycl::id<1> idx){ + // not a qualified hit - skip + if(energy_acc[idx] < minClusterCenterEdep_acc[0]){ + return; + } + + bool is_max = true; + + for(size_t i = 0; i < energy_acc.size(); i++){ + if(idx == i){ + continue; + } + + if(energy_acc[i] > energy_acc[idx]){ + if(Jug::Reco::is_neighbour(lpos_acc,gpos_acc,dims_acc,sectors_acc, + sectorDist_acc,neighbourDist_acc,idx,i,hitsDist_type)){ + is_max = false; + break; + } + + } + } + + if(is_max){ + max_idx_acc[idx] = 1; + } + + }); + }).wait_and_throw(); + + } catch(sycl::exception e) { + std::cerr << "Caught SYLC Exception: " << e.what() << "\n"; + } + + } // Sync Device memory to Host + + // Convert maxima index array to hit vector for further processing + for(size_t i = 0; i < max_idx.size(); i++){ + if(max_idx[i] == 1){ + if(is_debug){ + std::cout << "Found Maxima at: " << group[i].first << "\n"; + } + maxima.push_back(group[i].second); + } + } + + return maxima; + } + +#endif + /** * Island Clustering Algorithm for Calorimeter Blocks. * @@ -189,6 +510,24 @@ public: neighbourDist[i] = uprop.value()[i] / units[i]; } hitsDist = method; + + #ifdef USE_SYCL + if(uprop.name() == "localDistXY") + sHitsDist = sycl_hits_dist::localXY; + if(uprop.name() == "localDistXZ") + sHitsDist = sycl_hits_dist::localXZ; + if(uprop.name() == "localDistYZ") + sHitsDist = sycl_hits_dist::localYZ; + if(uprop.name() == "dimScaledLocalDistXY") + sHitsDist = sycl_hits_dist::dimScaleLocalXY; + if(uprop.name() == "globalDistRPhi") + sHitsDist = sycl_hits_dist::globalRPhi; + if(uprop.name() == "globalDistEtaPhi") + sHitsDist = sycl_hits_dist::globalEtaPhi; + + if(msgLevel(MSG::DEBUG)) is_debug = true; + #endif + info() << fmt::format("Clustering uses {} with distances <= [{}]", uprop.name(), fmt::join(neighbourDist, ",")) << endmsg; } @@ -229,6 +568,20 @@ public: // group neighboring hits std::vector>> groups; + #ifdef USE_SYCL + for (size_t i = 0; i < hits.size(); ++i) { + if (msgLevel(MSG::DEBUG)) { + const auto& hit = hits[i]; + debug() << fmt::format("hit {:d}: energy = {:.4f} MeV, local = ({:.4f}, {:.4f}) mm, " + "global=({:.4f}, {:.4f}, {:.4f}) mm", + i, hit.getEnergy() * 1000., hit.getLocal().x, hit.getLocal().y, hit.getPosition().x, + hit.getPosition().y, hit.getPosition().z) + << endmsg; + } + } + + parallel_group(groups, hits, neighbourDist, minClusterHitEdep, sectorDist); + #else std::vector visits(hits.size(), false); for (size_t i = 0; i < hits.size(); ++i) { if (msgLevel(MSG::DEBUG)) { @@ -247,12 +600,17 @@ public: // create a new group, and group all the neighboring hits dfs_group(groups.back(), i, hits, visits); } + #endif for (auto& group : groups) { if (group.empty()) { continue; } + #ifdef USE_SYCL + auto maxima = parallel_find_maxima(group, neighbourDist, minClusterCenterEdep, sectorDist, !m_splitCluster.value()); + #else auto maxima = find_maxima(group, !m_splitCluster.value()); + #endif split_group(group, maxima, proto); if (msgLevel(MSG::DEBUG)) { debug() << "hits in a group: " << group.size() << ", " @@ -296,7 +654,7 @@ private: } } - // find local maxima that above a certain threshold + // find local maxima that are above a certain threshold std::vector find_maxima(const std::vector>& group, bool global = false) const {