From 83aff4ce6dbbdce870693df57a105a84c0a61594 Mon Sep 17 00:00:00 2001 From: Ben Prather Date: Mon, 26 Aug 2024 07:43:53 -0600 Subject: [PATCH] Add `par_reduce_inner` functions (#1147) * Add par_reduce_inner functions * Formatting, changelog * Be explicit that existing inner reductions are TeamThreadRange --- CHANGELOG.md | 1 + src/kokkos_abstraction.hpp | 59 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7b3fa70d917e..a29012d8ffb6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## Current develop ### Added (new features/APIs/variables/...) +- [[PR 1147]](https://github.com/parthenon-hpc-lab/parthenon/pull/1147) Add `par_reduce_inner` functions - [[PR 1159]](https://github.com/parthenon-hpc-lab/parthenon/pull/1159) Add additional timestep controllers in parthenon/time. - [[PR 1148]](https://github.com/parthenon-hpc-lab/parthenon/pull/1148) Add `GetPackDimension` to `StateDescriptor` for calculating pack sizes before `Mesh` initialization - [[PR 1143]](https://github.com/parthenon-hpc-lab/parthenon/pull/1143) Add tensor indices to VariableState, add radiation constant to constants, add TypeLists, allow for arbitrary containers for solvers diff --git a/src/kokkos_abstraction.hpp b/src/kokkos_abstraction.hpp index ca8c59ffe12e..8fa89f82e95e 100644 --- a/src/kokkos_abstraction.hpp +++ b/src/kokkos_abstraction.hpp @@ -976,6 +976,65 @@ KOKKOS_FORCEINLINE_FUNCTION void par_for_inner(team_mbr_t team_member, Args &&.. par_for_inner(DEFAULT_INNER_LOOP_PATTERN, team_member, std::forward(args)...); } +// Inner reduction loops +template +KOKKOS_FORCEINLINE_FUNCTION void +par_reduce_inner(InnerLoopPatternTTR, team_mbr_t team_member, const int kl, const int ku, + const int jl, const int ju, const int il, const int iu, + const Function &function, T reduction) { + const int Nk = ku - kl + 1; + const int Nj = ju - jl + 1; + const int Ni = iu - il + 1; + const int NkNjNi = Nk * Nj * Ni; + const int NjNi = Nj * Ni; + Kokkos::parallel_reduce( + Kokkos::TeamThreadRange(team_member, NkNjNi), + [&](const int &idx, typename T::value_type &lreduce) { + int k = idx / NjNi; + int j = (idx - k * NjNi) / Ni; + int i = idx - k * NjNi - j * Ni; + k += kl; + j += jl; + i += il; + function(k, j, i, lreduce); + }, + reduction); +} + +template +KOKKOS_FORCEINLINE_FUNCTION void +par_reduce_inner(InnerLoopPatternTTR, team_mbr_t team_member, const int jl, const int ju, + const int il, const int iu, const Function &function, T reduction) { + const int Nj = ju - jl + 1; + const int Ni = iu - il + 1; + const int NjNi = Nj * Ni; + Kokkos::parallel_reduce( + Kokkos::TeamThreadRange(team_member, NjNi), + [&](const int &idx, typename T::value_type &lreduce) { + int j = idx / Ni; + int i = idx - j * Ni; + j += jl; + i += il; + function(j, i, lreduce); + }, + reduction); +} + +template +KOKKOS_FORCEINLINE_FUNCTION void +par_reduce_inner(InnerLoopPatternTTR, team_mbr_t team_member, const int il, const int iu, + const Function &function, T reduction) { + const int Ni = iu - il + 1; + Kokkos::parallel_reduce( + Kokkos::TeamThreadRange(team_member, Ni), + [&](const int &idx, typename T::value_type &lreduce) { + int i = idx; + i += il; + function(i, lreduce); + }, + reduction); +} + // reused from kokoks/core/perf_test/PerfTest_ExecSpacePartitioning.cpp // commit a0d011fb30022362c61b3bb000ae3de6906cb6a7 template