From 7529be3f4790424b13d94ac55e349527396044b1 Mon Sep 17 00:00:00 2001 From: Ben Prather Date: Mon, 26 Aug 2024 04:23:36 -0600 Subject: [PATCH 1/2] Update driver.rst (#1158) --- doc/sphinx/src/driver.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/sphinx/src/driver.rst b/doc/sphinx/src/driver.rst index a7c400bc9086..2587a806c070 100644 --- a/doc/sphinx/src/driver.rst +++ b/doc/sphinx/src/driver.rst @@ -32,7 +32,9 @@ The ``EvolutionDriver`` class derives from ``Driver``, defining the loop, including periodic outputs. It has a single pure virtual member function called ``Step`` which a derived class must define and which -will be called during each pass of the loop above. +will be called during each pass of the loop above. The +``SetGlobalTimeStep`` and ``OutputCycleDiagnostics`` functions have +default implementations, but can be overridden for flexibility. MultiStageDriver ---------------- From 83aff4ce6dbbdce870693df57a105a84c0a61594 Mon Sep 17 00:00:00 2001 From: Ben Prather Date: Mon, 26 Aug 2024 07:43:53 -0600 Subject: [PATCH 2/2] Add `par_reduce_inner` functions (#1147) * Add par_reduce_inner functions * Formatting, changelog * Be explicit that existing inner reductions are TeamThreadRange --- CHANGELOG.md | 1 + src/kokkos_abstraction.hpp | 59 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7b3fa70d917e..a29012d8ffb6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## Current develop ### Added (new features/APIs/variables/...) +- [[PR 1147]](https://github.com/parthenon-hpc-lab/parthenon/pull/1147) Add `par_reduce_inner` functions - [[PR 1159]](https://github.com/parthenon-hpc-lab/parthenon/pull/1159) Add additional timestep controllers in parthenon/time. - [[PR 1148]](https://github.com/parthenon-hpc-lab/parthenon/pull/1148) Add `GetPackDimension` to `StateDescriptor` for calculating pack sizes before `Mesh` initialization - [[PR 1143]](https://github.com/parthenon-hpc-lab/parthenon/pull/1143) Add tensor indices to VariableState, add radiation constant to constants, add TypeLists, allow for arbitrary containers for solvers diff --git a/src/kokkos_abstraction.hpp b/src/kokkos_abstraction.hpp index ca8c59ffe12e..8fa89f82e95e 100644 --- a/src/kokkos_abstraction.hpp +++ b/src/kokkos_abstraction.hpp @@ -976,6 +976,65 @@ KOKKOS_FORCEINLINE_FUNCTION void par_for_inner(team_mbr_t team_member, Args &&.. par_for_inner(DEFAULT_INNER_LOOP_PATTERN, team_member, std::forward(args)...); } +// Inner reduction loops +template +KOKKOS_FORCEINLINE_FUNCTION void +par_reduce_inner(InnerLoopPatternTTR, team_mbr_t team_member, const int kl, const int ku, + const int jl, const int ju, const int il, const int iu, + const Function &function, T reduction) { + const int Nk = ku - kl + 1; + const int Nj = ju - jl + 1; + const int Ni = iu - il + 1; + const int NkNjNi = Nk * Nj * Ni; + const int NjNi = Nj * Ni; + Kokkos::parallel_reduce( + Kokkos::TeamThreadRange(team_member, NkNjNi), + [&](const int &idx, typename T::value_type &lreduce) { + int k = idx / NjNi; + int j = (idx - k * NjNi) / Ni; + int i = idx - k * NjNi - j * Ni; + k += kl; + j += jl; + i += il; + function(k, j, i, lreduce); + }, + reduction); +} + +template +KOKKOS_FORCEINLINE_FUNCTION void +par_reduce_inner(InnerLoopPatternTTR, team_mbr_t team_member, const int jl, const int ju, + const int il, const int iu, const Function &function, T reduction) { + const int Nj = ju - jl + 1; + const int Ni = iu - il + 1; + const int NjNi = Nj * Ni; + Kokkos::parallel_reduce( + Kokkos::TeamThreadRange(team_member, NjNi), + [&](const int &idx, typename T::value_type &lreduce) { + int j = idx / Ni; + int i = idx - j * Ni; + j += jl; + i += il; + function(j, i, lreduce); + }, + reduction); +} + +template +KOKKOS_FORCEINLINE_FUNCTION void +par_reduce_inner(InnerLoopPatternTTR, team_mbr_t team_member, const int il, const int iu, + const Function &function, T reduction) { + const int Ni = iu - il + 1; + Kokkos::parallel_reduce( + Kokkos::TeamThreadRange(team_member, Ni), + [&](const int &idx, typename T::value_type &lreduce) { + int i = idx; + i += il; + function(i, lreduce); + }, + reduction); +} + // reused from kokoks/core/perf_test/PerfTest_ExecSpacePartitioning.cpp // commit a0d011fb30022362c61b3bb000ae3de6906cb6a7 template