From 6995d119e36a994493658ccba91ef0735ff5d655 Mon Sep 17 00:00:00 2001 From: adam reyes Date: Sat, 22 Jun 2024 18:00:22 +0200 Subject: [PATCH 01/12] wrap 3D flat loop abstractions --- src/kokkos_abstraction.hpp | 54 ++++++++++++++++++++++++++------- tst/unit/kokkos_abstraction.cpp | 35 +++++++++++++++++++++ 2 files changed, 78 insertions(+), 11 deletions(-) diff --git a/src/kokkos_abstraction.hpp b/src/kokkos_abstraction.hpp index 2d74ce00932a..c44ea6c39a77 100644 --- a/src/kokkos_abstraction.hpp +++ b/src/kokkos_abstraction.hpp @@ -21,12 +21,14 @@ #define KOKKOS_ABSTRACTION_HPP_ #include +#include #include #include #include #include +#include "Kokkos_Macros.hpp" #include "basic_types.hpp" #include "config.hpp" #include "parthenon_array_generic.hpp" @@ -35,6 +37,7 @@ #include "utils/multi_pointer.hpp" #include "utils/object_pool.hpp" + namespace parthenon { #ifdef KOKKOS_ENABLE_CUDA_UVM @@ -258,6 +261,37 @@ par_dispatch(LoopPatternMDRange, const std::string &name, DevExecSpace exec_spac function, std::forward(args)...); } +template class Functor; + +template +class Functor +{ + using F = std::function; + F m_f; +public: + Functor( F function, + int _NjNi, int _Ni, int _kl, int _jl, int _il ) + : m_f(function), NjNi(_NjNi), Ni(_Ni), kl(_kl), jl(_jl), il(_il) {} + KOKKOS_INLINE_FUNCTION + void operator()(const int &idx, Args... args) const { + int k = idx / NjNi; + int j = (idx - k * NjNi) / Ni; + int i = idx - k * NjNi - j * Ni; + k += kl; + j += jl; + i += il; + m_f(k, j, i, std::forward(args)...); + } + int NjNi, Ni, kl, jl, il; +}; + +template +auto MakeFunctor(F &function, const int &NjNi, const int &Ni, + const int &kl, const int &jl, const int &il) { + return Functor(function, NjNi, Ni, kl, jl, il); +} + + // 3D loop using Kokkos 1D Range template inline typename std::enable_if::type @@ -270,17 +304,10 @@ par_dispatch(LoopPatternFlatRange, const std::string &name, DevExecSpace exec_sp const int Ni = iu - il + 1; const int NkNjNi = Nk * Nj * Ni; const int NjNi = Nj * Ni; + kokkos_dispatch( tag, name, Kokkos::RangePolicy<>(exec_space, 0, NkNjNi), - KOKKOS_LAMBDA(const int &idx) { - int k = idx / NjNi; - int j = (idx - k * NjNi) / Ni; - int i = idx - k * NjNi - j * Ni; - k += kl; - j += jl; - i += il; - function(k, j, i); - }, + MakeFunctor(function, NjNi, Ni, kl, jl, il), std::forward(args)...); } @@ -649,8 +676,13 @@ inline void par_dispatch(LoopPatternSimdFor, const std::string &name, template inline void par_dispatch(const std::string &name, Args &&...args) { - par_dispatch(DEFAULT_LOOP_PATTERN, name, DevExecSpace(), - std::forward(args)...); + if constexpr (std::is_same::value) { + par_dispatch(DEFAULT_LOOP_PATTERN, name, DevExecSpace(), + std::forward(args)...); + } else { + par_dispatch(loop_pattern_mdrange_tag, name, DevExecSpace(), + std::forward(args)...); + } } template diff --git a/tst/unit/kokkos_abstraction.cpp b/tst/unit/kokkos_abstraction.cpp index 0adc61c441b0..82a2e174f494 100644 --- a/tst/unit/kokkos_abstraction.cpp +++ b/tst/unit/kokkos_abstraction.cpp @@ -500,6 +500,33 @@ bool test_wrapper_reduce_1d(T loop_pattern, DevExecSpace exec_space) { return total == test_tot; } +template +bool test_wrapper_reduce_3d(T loop_pattern, DevExecSpace exec_space) { + constexpr int N = 10; + parthenon::IndexRange r{0, N - 1}; + parthenon::ParArray3D buffer("Testing buffer", N, N, N); + // Initialize data + parthenon::par_for( + loop_pattern, "Initialize parallel reduce array", exec_space, 0, N-1, 0, N-1, 0, N-1, + KOKKOS_LAMBDA(const int k, const int j, const int i) { buffer(k,j,i) = i+j+k; }); + int max = 0; + for (int k = 0; k < N; ++k) { + for (int j = 0; j < N; ++j) { + for (int i = 0; i < N; ++i) { + max = std::max(max, i+j+k); + } + } + } + int test_max = 0; + parthenon::par_reduce( + loop_pattern, "Max via par reduce", exec_space, + 0, N-1, 0, N-1, 0, N-1, + KOKKOS_LAMBDA(const int k, const int j, const int i, int &t) { + t = i+j+k; + }, Kokkos::Max(test_max)); + return max == test_max; +} + TEST_CASE("Parallel reduce", "[par_reduce]") { auto default_exec_space = DevExecSpace(); REQUIRE(test_wrapper_reduce_1d(parthenon::loop_pattern_flatrange_tag, @@ -508,4 +535,12 @@ TEST_CASE("Parallel reduce", "[par_reduce]") { REQUIRE(test_wrapper_reduce_1d(parthenon::loop_pattern_simdfor_tag, default_exec_space) == true); } + REQUIRE(test_wrapper_reduce_3d(parthenon::loop_pattern_flatrange_tag, + default_exec_space) == true); + /* REQUIRE(test_wrapper_reduce_3d(parthenon::LoopPatternMDRange(), */ + /* default_exec_space) == true); */ + /* if constexpr (std::is_same::value) { */ + /* REQUIRE(test_wrapper_reduce_3d(parthenon::loop_pattern_simdfor_tag, */ + /* default_exec_space) == true); */ + /* } */ } From 2e61847f65828eedc35ec3cdb836826c7bed7f0f Mon Sep 17 00:00:00 2001 From: adam reyes Date: Sat, 22 Jun 2024 20:49:14 +0200 Subject: [PATCH 02/12] add 4D loop and test --- src/kokkos_abstraction.hpp | 128 ++++++++++++++++++-------------- tst/unit/kokkos_abstraction.cpp | 59 +++++++++++---- 2 files changed, 116 insertions(+), 71 deletions(-) diff --git a/src/kokkos_abstraction.hpp b/src/kokkos_abstraction.hpp index c44ea6c39a77..b4971f2080cd 100644 --- a/src/kokkos_abstraction.hpp +++ b/src/kokkos_abstraction.hpp @@ -261,36 +261,42 @@ par_dispatch(LoopPatternMDRange, const std::string &name, DevExecSpace exec_spac function, std::forward(args)...); } -template class Functor; - -template -class Functor -{ - using F = std::function; - F m_f; -public: - Functor( F function, - int _NjNi, int _Ni, int _kl, int _jl, int _il ) - : m_f(function), NjNi(_NjNi), Ni(_Ni), kl(_kl), jl(_jl), il(_il) {} - KOKKOS_INLINE_FUNCTION - void operator()(const int &idx, Args... args) const { - int k = idx / NjNi; - int j = (idx - k * NjNi) / Ni; - int i = idx - k * NjNi - j * Ni; - k += kl; - j += jl; - i += il; - m_f(k, j, i, std::forward(args)...); - } - int NjNi, Ni, kl, jl, il; -}; +template class FlatFunctor; template -auto MakeFunctor(F &function, const int &NjNi, const int &Ni, - const int &kl, const int &jl, const int &il) { - return Functor(function, NjNi, Ni, kl, jl, il); +auto MakeFlatFunctor(F &function) { + return FlatFunctor(); } +template +class FlatFunctor +{ + public: + FlatFunctor(){}; + template + inline void operator()(Tag tag, const std::string &name, + DevExecSpace exec_space, const int kl, const int ku, + const int jl, const int ju, const int il, const int iu, + const F &function, Args ...args) const { + const int Nk = ku - kl + 1; + const int Nj = ju - jl + 1; + const int Ni = iu - il + 1; + const int NkNjNi = Nk * Nj * Ni; + const int NjNi = Nj * Ni; + kokkos_dispatch( + tag, name, Kokkos::RangePolicy<>(exec_space, 0, NkNjNi), + KOKKOS_LAMBDA(const int &idx, FArgs ...fargs) { + int k = idx / NjNi; + int j = (idx - k * NjNi) / Ni; + int i = idx - k * NjNi - j * Ni; + k += kl; + j += jl; + i += il; + function(k, j, i, std::forward(fargs)...); + }, + std::forward(args)...); + } +}; // 3D loop using Kokkos 1D Range template @@ -299,16 +305,8 @@ par_dispatch(LoopPatternFlatRange, const std::string &name, DevExecSpace exec_sp const int kl, const int ku, const int jl, const int ju, const int il, const int iu, const Function &function, Args &&...args) { Tag tag; - const int Nk = ku - kl + 1; - const int Nj = ju - jl + 1; - const int Ni = iu - il + 1; - const int NkNjNi = Nk * Nj * Ni; - const int NjNi = Nj * Ni; - - kokkos_dispatch( - tag, name, Kokkos::RangePolicy<>(exec_space, 0, NkNjNi), - MakeFunctor(function, NjNi, Ni, kl, jl, il), - std::forward(args)...); + const auto func = MakeFlatFunctor(function); + func(tag, name, exec_space, kl, ku, jl, ju, il, iu, function, std::forward(args)...); } // 3D loop using MDRange loops @@ -399,6 +397,40 @@ inline void par_dispatch(LoopPatternSimdFor, const std::string &name, function(k, j, i); } +template +class FlatFunctor +{ + public: + FlatFunctor(){}; + template + inline void operator()(Tag tag, const std::string &name, + DevExecSpace exec_space, const int nl, const int nu, + const int kl, const int ku, const int jl, const int ju, + const int il, const int iu, const F &function, Args ...args) const { + const int Nn = nu - nl + 1; + const int Nk = ku - kl + 1; + const int Nj = ju - jl + 1; + const int Ni = iu - il + 1; + const int NnNkNjNi = Nn * Nk * Nj * Ni; + const int NkNjNi = Nk * Nj * Ni; + const int NjNi = Nj * Ni; + kokkos_dispatch( + tag, name, Kokkos::RangePolicy<>(exec_space, 0, NnNkNjNi), + KOKKOS_LAMBDA(const int &idx, FArgs ...fargs) { + int n = idx / NkNjNi; + int k = (idx - n * NkNjNi) / NjNi; + int j = (idx - n * NkNjNi - k * NjNi) / Ni; + int i = idx - n * NkNjNi - k * NjNi - j * Ni; + n += nl; + k += kl; + j += jl; + i += il; + function(n, k, j, i, std::forward(fargs)...); + }, + std::forward(args)...); + } +}; + // 4D loop using Kokkos 1D Range template inline typename std::enable_if::type @@ -407,27 +439,9 @@ par_dispatch(LoopPatternFlatRange, const std::string &name, DevExecSpace exec_sp const int ju, const int il, const int iu, const Function &function, Args &&...args) { Tag tag; - const int Nn = nu - nl + 1; - const int Nk = ku - kl + 1; - const int Nj = ju - jl + 1; - const int Ni = iu - il + 1; - const int NnNkNjNi = Nn * Nk * Nj * Ni; - const int NkNjNi = Nk * Nj * Ni; - const int NjNi = Nj * Ni; - kokkos_dispatch( - tag, name, Kokkos::RangePolicy<>(exec_space, 0, NnNkNjNi), - KOKKOS_LAMBDA(const int &idx) { - int n = idx / NkNjNi; - int k = (idx - n * NkNjNi) / NjNi; - int j = (idx - n * NkNjNi - k * NjNi) / Ni; - int i = idx - n * NkNjNi - k * NjNi - j * Ni; - n += nl; - k += kl; - j += jl; - i += il; - function(n, k, j, i); - }, - std::forward(args)...); + const auto func = MakeFlatFunctor(function); + func(tag, name, exec_space, nl, nu, kl, ku, jl, ju, il, iu, + function, std::forward(args)...); } // 4D loop using MDRange loops diff --git a/tst/unit/kokkos_abstraction.cpp b/tst/unit/kokkos_abstraction.cpp index 82a2e174f494..539951a29e13 100644 --- a/tst/unit/kokkos_abstraction.cpp +++ b/tst/unit/kokkos_abstraction.cpp @@ -503,44 +503,75 @@ bool test_wrapper_reduce_1d(T loop_pattern, DevExecSpace exec_space) { template bool test_wrapper_reduce_3d(T loop_pattern, DevExecSpace exec_space) { constexpr int N = 10; - parthenon::IndexRange r{0, N - 1}; parthenon::ParArray3D buffer("Testing buffer", N, N, N); // Initialize data parthenon::par_for( loop_pattern, "Initialize parallel reduce array", exec_space, 0, N-1, 0, N-1, 0, N-1, KOKKOS_LAMBDA(const int k, const int j, const int i) { buffer(k,j,i) = i+j+k; }); - int max = 0; + int tot = 0; for (int k = 0; k < N; ++k) { for (int j = 0; j < N; ++j) { for (int i = 0; i < N; ++i) { - max = std::max(max, i+j+k); + tot += i+j+k; } } } - int test_max = 0; + int test_tot = 0; parthenon::par_reduce( - loop_pattern, "Max via par reduce", exec_space, + loop_pattern, "Sum via par reduce", exec_space, 0, N-1, 0, N-1, 0, N-1, KOKKOS_LAMBDA(const int k, const int j, const int i, int &t) { - t = i+j+k; - }, Kokkos::Max(test_max)); - return max == test_max; + t += i+j+k; + }, Kokkos::Sum(test_tot)); + return tot == test_tot; +} + +template +bool test_wrapper_reduce_4d(T loop_pattern, DevExecSpace exec_space) { + constexpr int N = 10; + parthenon::ParArray4D buffer("Testing buffer", N, N, N, N); + // Initialize data + parthenon::par_for( + loop_pattern, "Initialize parallel reduce array", exec_space, 0, N-1, 0, N-1, 0, N-1, 0, N-1, + KOKKOS_LAMBDA(const int n, const int k, const int j, const int i) { buffer(n,k,j,i) = i+j+k+n; }); + int tot = 0; + for (int n = 0; n < N; ++n) { + for (int k = 0; k < N; ++k) { + for (int j = 0; j < N; ++j) { + for (int i = 0; i < N; ++i) { + tot += i+j+k+n; + } + } + } + } + int test_tot = 0; + parthenon::par_reduce( + loop_pattern, "Sum via par reduce", exec_space, + 0, N-1, 0, N-1, 0, N-1, 0, N-1, + KOKKOS_LAMBDA(const int n, const int k, const int j, const int i, int &t) { + t += i+j+k+n; + }, Kokkos::Sum(test_tot)); + return tot == test_tot; } TEST_CASE("Parallel reduce", "[par_reduce]") { auto default_exec_space = DevExecSpace(); + SECTION("1D loops") { REQUIRE(test_wrapper_reduce_1d(parthenon::loop_pattern_flatrange_tag, default_exec_space) == true); if constexpr (std::is_same::value) { REQUIRE(test_wrapper_reduce_1d(parthenon::loop_pattern_simdfor_tag, default_exec_space) == true); } + } + + SECTION("3D loops") { REQUIRE(test_wrapper_reduce_3d(parthenon::loop_pattern_flatrange_tag, default_exec_space) == true); - /* REQUIRE(test_wrapper_reduce_3d(parthenon::LoopPatternMDRange(), */ - /* default_exec_space) == true); */ - /* if constexpr (std::is_same::value) { */ - /* REQUIRE(test_wrapper_reduce_3d(parthenon::loop_pattern_simdfor_tag, */ - /* default_exec_space) == true); */ - /* } */ + } + + SECTION("4D loops") { + REQUIRE(test_wrapper_reduce_4d(parthenon::loop_pattern_flatrange_tag, + default_exec_space) == true); + } } From 054ca0ef9e8519fa9fec9540f4511a1feca81ab9 Mon Sep 17 00:00:00 2001 From: Cloud User Date: Sat, 22 Jun 2024 20:12:22 +0000 Subject: [PATCH 03/12] add specialization for const int & --- tst/unit/kokkos_abstraction.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tst/unit/kokkos_abstraction.cpp b/tst/unit/kokkos_abstraction.cpp index 539951a29e13..015f1bcb8242 100644 --- a/tst/unit/kokkos_abstraction.cpp +++ b/tst/unit/kokkos_abstraction.cpp @@ -568,10 +568,14 @@ TEST_CASE("Parallel reduce", "[par_reduce]") { SECTION("3D loops") { REQUIRE(test_wrapper_reduce_3d(parthenon::loop_pattern_flatrange_tag, default_exec_space) == true); + REQUIRE(test_wrapper_reduce_3d(parthenon::loop_pattern_mdrange_tag, + default_exec_space) == true); } SECTION("4D loops") { REQUIRE(test_wrapper_reduce_4d(parthenon::loop_pattern_flatrange_tag, default_exec_space) == true); + REQUIRE(test_wrapper_reduce_4d(parthenon::loop_pattern_mdrange_tag, + default_exec_space) == true); } } From 4c83c4cc3a392bfae7fb99551a210c00a1356ef4 Mon Sep 17 00:00:00 2001 From: Cloud User Date: Sat, 22 Jun 2024 20:12:46 +0000 Subject: [PATCH 04/12] added mdrange loops to par_reduce tests --- src/kokkos_abstraction.hpp | 65 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/src/kokkos_abstraction.hpp b/src/kokkos_abstraction.hpp index b4971f2080cd..30b84dc9564f 100644 --- a/src/kokkos_abstraction.hpp +++ b/src/kokkos_abstraction.hpp @@ -268,6 +268,36 @@ auto MakeFlatFunctor(F &function) { return FlatFunctor(); } +template +class FlatFunctor +{ + public: + FlatFunctor(){}; + template + inline void operator()(Tag tag, const std::string &name, + DevExecSpace exec_space, const int kl, const int ku, + const int jl, const int ju, const int il, const int iu, + const F &function, Args ...args) const { + const int Nk = ku - kl + 1; + const int Nj = ju - jl + 1; + const int Ni = iu - il + 1; + const int NkNjNi = Nk * Nj * Ni; + const int NjNi = Nj * Ni; + kokkos_dispatch( + tag, name, Kokkos::RangePolicy<>(exec_space, 0, NkNjNi), + KOKKOS_LAMBDA(const int &idx, FArgs ...fargs) { + int k = idx / NjNi; + int j = (idx - k * NjNi) / Ni; + int i = idx - k * NjNi - j * Ni; + k += kl; + j += jl; + i += il; + function(k, j, i, std::forward(fargs)...); + }, + std::forward(args)...); + } +}; + template class FlatFunctor { @@ -397,8 +427,43 @@ inline void par_dispatch(LoopPatternSimdFor, const std::string &name, function(k, j, i); } + template class FlatFunctor +{ + public: + FlatFunctor(){}; + template + inline void operator()(Tag tag, const std::string &name, + DevExecSpace exec_space, const int nl, const int nu, + const int kl, const int ku, const int jl, const int ju, + const int il, const int iu, const F &function, Args ...args) const { + const int Nn = nu - nl + 1; + const int Nk = ku - kl + 1; + const int Nj = ju - jl + 1; + const int Ni = iu - il + 1; + const int NnNkNjNi = Nn * Nk * Nj * Ni; + const int NkNjNi = Nk * Nj * Ni; + const int NjNi = Nj * Ni; + kokkos_dispatch( + tag, name, Kokkos::RangePolicy<>(exec_space, 0, NnNkNjNi), + KOKKOS_LAMBDA(const int &idx, FArgs ...fargs) { + int n = idx / NkNjNi; + int k = (idx - n * NkNjNi) / NjNi; + int j = (idx - n * NkNjNi - k * NjNi) / Ni; + int i = idx - n * NkNjNi - k * NjNi - j * Ni; + n += nl; + k += kl; + j += jl; + i += il; + function(n, k, j, i, std::forward(fargs)...); + }, + std::forward(args)...); + } +}; + +template +class FlatFunctor { public: FlatFunctor(){}; From 0730429877b72c75ff10ad8853604eee12010961 Mon Sep 17 00:00:00 2001 From: adam reyes Date: Sat, 22 Jun 2024 23:31:32 +0200 Subject: [PATCH 05/12] refactor flatloop specialization --- src/kokkos_abstraction.hpp | 142 ++++++++++++++++--------------------- 1 file changed, 61 insertions(+), 81 deletions(-) diff --git a/src/kokkos_abstraction.hpp b/src/kokkos_abstraction.hpp index 30b84dc9564f..fc183baec7f5 100644 --- a/src/kokkos_abstraction.hpp +++ b/src/kokkos_abstraction.hpp @@ -268,33 +268,44 @@ auto MakeFlatFunctor(F &function) { return FlatFunctor(); } +template +struct FlatLoop3D { + FlatLoop3D(){}; + template + inline void operator()(Tag tag, const F &function, const std::string &name, + DevExecSpace exec_space, const int kl, const int ku, + const int jl, const int ju, const int il, const int iu, + Args ...args) const { + const int Nk = ku - kl + 1; + const int Nj = ju - jl + 1; + const int Ni = iu - il + 1; + const int NkNjNi = Nk * Nj * Ni; + const int NjNi = Nj * Ni; + kokkos_dispatch( + tag, name, Kokkos::RangePolicy<>(exec_space, 0, NkNjNi), + KOKKOS_LAMBDA(const int &idx, FArgs ...fargs) { + int k = idx / NjNi; + int j = (idx - k * NjNi) / Ni; + int i = idx - k * NjNi - j * Ni; + k += kl; + j += jl; + i += il; + function(k, j, i, std::forward(fargs)...); + }, + std::forward(args)...); + } +}; + template class FlatFunctor { public: FlatFunctor(){}; template - inline void operator()(Tag tag, const std::string &name, - DevExecSpace exec_space, const int kl, const int ku, - const int jl, const int ju, const int il, const int iu, - const F &function, Args ...args) const { - const int Nk = ku - kl + 1; - const int Nj = ju - jl + 1; - const int Ni = iu - il + 1; - const int NkNjNi = Nk * Nj * Ni; - const int NjNi = Nj * Ni; - kokkos_dispatch( - tag, name, Kokkos::RangePolicy<>(exec_space, 0, NkNjNi), - KOKKOS_LAMBDA(const int &idx, FArgs ...fargs) { - int k = idx / NjNi; - int j = (idx - k * NjNi) / Ni; - int i = idx - k * NjNi - j * Ni; - k += kl; - j += jl; - i += il; - function(k, j, i, std::forward(fargs)...); - }, - std::forward(args)...); + inline void operator()(Tag tag, const F &function, Args ...args) const { + + const FlatLoop3D flat3D; + flat3D(tag, function, std::forward(args)...); } }; @@ -304,27 +315,9 @@ class FlatFunctor public: FlatFunctor(){}; template - inline void operator()(Tag tag, const std::string &name, - DevExecSpace exec_space, const int kl, const int ku, - const int jl, const int ju, const int il, const int iu, - const F &function, Args ...args) const { - const int Nk = ku - kl + 1; - const int Nj = ju - jl + 1; - const int Ni = iu - il + 1; - const int NkNjNi = Nk * Nj * Ni; - const int NjNi = Nj * Ni; - kokkos_dispatch( - tag, name, Kokkos::RangePolicy<>(exec_space, 0, NkNjNi), - KOKKOS_LAMBDA(const int &idx, FArgs ...fargs) { - int k = idx / NjNi; - int j = (idx - k * NjNi) / Ni; - int i = idx - k * NjNi - j * Ni; - k += kl; - j += jl; - i += il; - function(k, j, i, std::forward(fargs)...); - }, - std::forward(args)...); + inline void operator()(Tag tag, const F &function, Args ...args) const { + const FlatLoop3D flat3D; + flat3D(tag, function, std::forward(args)...); } }; @@ -336,7 +329,7 @@ par_dispatch(LoopPatternFlatRange, const std::string &name, DevExecSpace exec_sp const int iu, const Function &function, Args &&...args) { Tag tag; const auto func = MakeFlatFunctor(function); - func(tag, name, exec_space, kl, ku, jl, ju, il, iu, function, std::forward(args)...); + func(tag, function, name, exec_space, kl, ku, jl, ju, il, iu, std::forward(args)...); } // 3D loop using MDRange loops @@ -427,17 +420,14 @@ inline void par_dispatch(LoopPatternSimdFor, const std::string &name, function(k, j, i); } - -template -class FlatFunctor -{ - public: - FlatFunctor(){}; - template - inline void operator()(Tag tag, const std::string &name, - DevExecSpace exec_space, const int nl, const int nu, - const int kl, const int ku, const int jl, const int ju, - const int il, const int iu, const F &function, Args ...args) const { +template +struct FlatLoop4D { + FlatLoop4D(){}; + template + inline void operator()(Tag tag, const F &function, const std::string &name, + DevExecSpace exec_space, const int nl, const int nu, + const int kl, const int ku, const int jl, const int ju, + const int il, const int iu, Args ...args) const { const int Nn = nu - nl + 1; const int Nk = ku - kl + 1; const int Nj = ju - jl + 1; @@ -459,6 +449,18 @@ class FlatFunctor(fargs)...); }, std::forward(args)...); + } +}; + +template +class FlatFunctor +{ + public: + FlatFunctor(){}; + template + inline void operator()(Tag tag, const F &function, Args ...args) const { + const FlatLoop4D flat4D; + flat4D(tag, function, std::forward(args)...); } }; @@ -468,31 +470,9 @@ class FlatFunctor - inline void operator()(Tag tag, const std::string &name, - DevExecSpace exec_space, const int nl, const int nu, - const int kl, const int ku, const int jl, const int ju, - const int il, const int iu, const F &function, Args ...args) const { - const int Nn = nu - nl + 1; - const int Nk = ku - kl + 1; - const int Nj = ju - jl + 1; - const int Ni = iu - il + 1; - const int NnNkNjNi = Nn * Nk * Nj * Ni; - const int NkNjNi = Nk * Nj * Ni; - const int NjNi = Nj * Ni; - kokkos_dispatch( - tag, name, Kokkos::RangePolicy<>(exec_space, 0, NnNkNjNi), - KOKKOS_LAMBDA(const int &idx, FArgs ...fargs) { - int n = idx / NkNjNi; - int k = (idx - n * NkNjNi) / NjNi; - int j = (idx - n * NkNjNi - k * NjNi) / Ni; - int i = idx - n * NkNjNi - k * NjNi - j * Ni; - n += nl; - k += kl; - j += jl; - i += il; - function(n, k, j, i, std::forward(fargs)...); - }, - std::forward(args)...); + inline void operator()(Tag tag, const F &function, Args ...args) const { + const FlatLoop4D flat4D; + flat4D(tag, function, std::forward(args)...); } }; @@ -505,8 +485,8 @@ par_dispatch(LoopPatternFlatRange, const std::string &name, DevExecSpace exec_sp Args &&...args) { Tag tag; const auto func = MakeFlatFunctor(function); - func(tag, name, exec_space, nl, nu, kl, ku, jl, ju, il, iu, - function, std::forward(args)...); + func(tag, function, name, exec_space, nl, nu, kl, ku, jl, ju, il, iu, + std::forward(args)...); } // 4D loop using MDRange loops From adb15dd5d71d9e88a712fb375eec905e2909a4c4 Mon Sep 17 00:00:00 2001 From: adam reyes Date: Sun, 23 Jun 2024 14:05:30 +0200 Subject: [PATCH 06/12] clean up --- src/kokkos_abstraction.hpp | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/kokkos_abstraction.hpp b/src/kokkos_abstraction.hpp index fc183baec7f5..7338e43d4567 100644 --- a/src/kokkos_abstraction.hpp +++ b/src/kokkos_abstraction.hpp @@ -21,14 +21,12 @@ #define KOKKOS_ABSTRACTION_HPP_ #include -#include #include #include #include #include -#include "Kokkos_Macros.hpp" #include "basic_types.hpp" #include "config.hpp" #include "parthenon_array_generic.hpp" @@ -37,7 +35,6 @@ #include "utils/multi_pointer.hpp" #include "utils/object_pool.hpp" - namespace parthenon { #ifdef KOKKOS_ENABLE_CUDA_UVM @@ -735,13 +732,8 @@ inline void par_dispatch(LoopPatternSimdFor, const std::string &name, template inline void par_dispatch(const std::string &name, Args &&...args) { - if constexpr (std::is_same::value) { - par_dispatch(DEFAULT_LOOP_PATTERN, name, DevExecSpace(), - std::forward(args)...); - } else { - par_dispatch(loop_pattern_mdrange_tag, name, DevExecSpace(), - std::forward(args)...); - } + par_dispatch(DEFAULT_LOOP_PATTERN, name, DevExecSpace(), + std::forward(args)...); } template From 5db0d34954901ec5193d6a538aeddbd941473f08 Mon Sep 17 00:00:00 2001 From: adam reyes Date: Sun, 23 Jun 2024 14:10:54 +0200 Subject: [PATCH 07/12] formatting --- src/kokkos_abstraction.hpp | 199 ++++++++++++++++---------------- tst/unit/kokkos_abstraction.cpp | 91 ++++++++------- 2 files changed, 146 insertions(+), 144 deletions(-) diff --git a/src/kokkos_abstraction.hpp b/src/kokkos_abstraction.hpp index 7338e43d4567..6032d3cc817b 100644 --- a/src/kokkos_abstraction.hpp +++ b/src/kokkos_abstraction.hpp @@ -258,64 +258,63 @@ par_dispatch(LoopPatternMDRange, const std::string &name, DevExecSpace exec_spac function, std::forward(args)...); } -template class FlatFunctor; +template +class FlatFunctor; -template +template auto MakeFlatFunctor(F &function) { - return FlatFunctor(); + return FlatFunctor(); } -template +template struct FlatLoop3D { - FlatLoop3D(){}; - template - inline void operator()(Tag tag, const F &function, const std::string &name, - DevExecSpace exec_space, const int kl, const int ku, - const int jl, const int ju, const int il, const int iu, - Args ...args) const { - const int Nk = ku - kl + 1; - const int Nj = ju - jl + 1; - const int Ni = iu - il + 1; - const int NkNjNi = Nk * Nj * Ni; - const int NjNi = Nj * Ni; - kokkos_dispatch( - tag, name, Kokkos::RangePolicy<>(exec_space, 0, NkNjNi), - KOKKOS_LAMBDA(const int &idx, FArgs ...fargs) { - int k = idx / NjNi; - int j = (idx - k * NjNi) / Ni; - int i = idx - k * NjNi - j * Ni; - k += kl; - j += jl; - i += il; - function(k, j, i, std::forward(fargs)...); - }, - std::forward(args)...); - } + FlatLoop3D() {}; + template + inline void operator()(Tag tag, const F &function, const std::string &name, + DevExecSpace exec_space, const int kl, const int ku, + const int jl, const int ju, const int il, const int iu, + Args... args) const { + const int Nk = ku - kl + 1; + const int Nj = ju - jl + 1; + const int Ni = iu - il + 1; + const int NkNjNi = Nk * Nj * Ni; + const int NjNi = Nj * Ni; + kokkos_dispatch( + tag, name, Kokkos::RangePolicy<>(exec_space, 0, NkNjNi), + KOKKOS_LAMBDA(const int &idx, FArgs... fargs) { + int k = idx / NjNi; + int j = (idx - k * NjNi) / Ni; + int i = idx - k * NjNi - j * Ni; + k += kl; + j += jl; + i += il; + function(k, j, i, std::forward(fargs)...); + }, + std::forward(args)...); + } }; -template -class FlatFunctor -{ - public: - FlatFunctor(){}; - template - inline void operator()(Tag tag, const F &function, Args ...args) const { - - const FlatLoop3D flat3D; - flat3D(tag, function, std::forward(args)...); - } +template +class FlatFunctor { + public: + FlatFunctor() {}; + template + inline void operator()(Tag tag, const F &function, Args... args) const { + + const FlatLoop3D flat3D; + flat3D(tag, function, std::forward(args)...); + } }; -template -class FlatFunctor -{ - public: - FlatFunctor(){}; - template - inline void operator()(Tag tag, const F &function, Args ...args) const { - const FlatLoop3D flat3D; - flat3D(tag, function, std::forward(args)...); - } +template +class FlatFunctor { + public: + FlatFunctor() {}; + template + inline void operator()(Tag tag, const F &function, Args... args) const { + const FlatLoop3D flat3D; + flat3D(tag, function, std::forward(args)...); + } }; // 3D loop using Kokkos 1D Range @@ -326,7 +325,8 @@ par_dispatch(LoopPatternFlatRange, const std::string &name, DevExecSpace exec_sp const int iu, const Function &function, Args &&...args) { Tag tag; const auto func = MakeFlatFunctor(function); - func(tag, function, name, exec_space, kl, ku, jl, ju, il, iu, std::forward(args)...); + func(tag, function, name, exec_space, kl, ku, jl, ju, il, iu, + std::forward(args)...); } // 3D loop using MDRange loops @@ -417,60 +417,59 @@ inline void par_dispatch(LoopPatternSimdFor, const std::string &name, function(k, j, i); } -template +template struct FlatLoop4D { - FlatLoop4D(){}; - template - inline void operator()(Tag tag, const F &function, const std::string &name, - DevExecSpace exec_space, const int nl, const int nu, - const int kl, const int ku, const int jl, const int ju, - const int il, const int iu, Args ...args) const { - const int Nn = nu - nl + 1; - const int Nk = ku - kl + 1; - const int Nj = ju - jl + 1; - const int Ni = iu - il + 1; - const int NnNkNjNi = Nn * Nk * Nj * Ni; - const int NkNjNi = Nk * Nj * Ni; - const int NjNi = Nj * Ni; - kokkos_dispatch( - tag, name, Kokkos::RangePolicy<>(exec_space, 0, NnNkNjNi), - KOKKOS_LAMBDA(const int &idx, FArgs ...fargs) { - int n = idx / NkNjNi; - int k = (idx - n * NkNjNi) / NjNi; - int j = (idx - n * NkNjNi - k * NjNi) / Ni; - int i = idx - n * NkNjNi - k * NjNi - j * Ni; - n += nl; - k += kl; - j += jl; - i += il; - function(n, k, j, i, std::forward(fargs)...); - }, - std::forward(args)...); - } + FlatLoop4D() {}; + template + inline void operator()(Tag tag, const F &function, const std::string &name, + DevExecSpace exec_space, const int nl, const int nu, + const int kl, const int ku, const int jl, const int ju, + const int il, const int iu, Args... args) const { + const int Nn = nu - nl + 1; + const int Nk = ku - kl + 1; + const int Nj = ju - jl + 1; + const int Ni = iu - il + 1; + const int NnNkNjNi = Nn * Nk * Nj * Ni; + const int NkNjNi = Nk * Nj * Ni; + const int NjNi = Nj * Ni; + kokkos_dispatch( + tag, name, Kokkos::RangePolicy<>(exec_space, 0, NnNkNjNi), + KOKKOS_LAMBDA(const int &idx, FArgs... fargs) { + int n = idx / NkNjNi; + int k = (idx - n * NkNjNi) / NjNi; + int j = (idx - n * NkNjNi - k * NjNi) / Ni; + int i = idx - n * NkNjNi - k * NjNi - j * Ni; + n += nl; + k += kl; + j += jl; + i += il; + function(n, k, j, i, std::forward(fargs)...); + }, + std::forward(args)...); + } }; -template -class FlatFunctor -{ - public: - FlatFunctor(){}; - template - inline void operator()(Tag tag, const F &function, Args ...args) const { - const FlatLoop4D flat4D; - flat4D(tag, function, std::forward(args)...); - } +template +class FlatFunctor { + public: + FlatFunctor() {}; + template + inline void operator()(Tag tag, const F &function, Args... args) const { + const FlatLoop4D flat4D; + flat4D(tag, function, std::forward(args)...); + } }; -template -class FlatFunctor -{ - public: - FlatFunctor(){}; - template - inline void operator()(Tag tag, const F &function, Args ...args) const { - const FlatLoop4D flat4D; - flat4D(tag, function, std::forward(args)...); - } +template +class FlatFunctor { + public: + FlatFunctor() {}; + template + inline void operator()(Tag tag, const F &function, Args... args) const { + const FlatLoop4D flat4D; + flat4D(tag, function, std::forward(args)...); + } }; // 4D loop using Kokkos 1D Range @@ -483,7 +482,7 @@ par_dispatch(LoopPatternFlatRange, const std::string &name, DevExecSpace exec_sp Tag tag; const auto func = MakeFlatFunctor(function); func(tag, function, name, exec_space, nl, nu, kl, ku, jl, ju, il, iu, - std::forward(args)...); + std::forward(args)...); } // 4D loop using MDRange loops diff --git a/tst/unit/kokkos_abstraction.cpp b/tst/unit/kokkos_abstraction.cpp index 015f1bcb8242..ae0e3fcb79e8 100644 --- a/tst/unit/kokkos_abstraction.cpp +++ b/tst/unit/kokkos_abstraction.cpp @@ -506,76 +506,79 @@ bool test_wrapper_reduce_3d(T loop_pattern, DevExecSpace exec_space) { parthenon::ParArray3D buffer("Testing buffer", N, N, N); // Initialize data parthenon::par_for( - loop_pattern, "Initialize parallel reduce array", exec_space, 0, N-1, 0, N-1, 0, N-1, - KOKKOS_LAMBDA(const int k, const int j, const int i) { buffer(k,j,i) = i+j+k; }); + loop_pattern, "Initialize parallel reduce array", exec_space, 0, N - 1, 0, N - 1, 0, + N - 1, KOKKOS_LAMBDA(const int k, const int j, const int i) { + buffer(k, j, i) = i + j + k; + }); int tot = 0; for (int k = 0; k < N; ++k) { - for (int j = 0; j < N; ++j) { - for (int i = 0; i < N; ++i) { - tot += i+j+k; - } - } + for (int j = 0; j < N; ++j) { + for (int i = 0; i < N; ++i) { + tot += i + j + k; + } + } } int test_tot = 0; parthenon::par_reduce( - loop_pattern, "Sum via par reduce", exec_space, - 0, N-1, 0, N-1, 0, N-1, - KOKKOS_LAMBDA(const int k, const int j, const int i, int &t) { - t += i+j+k; - }, Kokkos::Sum(test_tot)); + loop_pattern, "Sum via par reduce", exec_space, 0, N - 1, 0, N - 1, 0, N - 1, + KOKKOS_LAMBDA(const int k, const int j, const int i, int &t) { t += i + j + k; }, + Kokkos::Sum(test_tot)); return tot == test_tot; } template bool test_wrapper_reduce_4d(T loop_pattern, DevExecSpace exec_space) { - constexpr int N = 10; - parthenon::ParArray4D buffer("Testing buffer", N, N, N, N); - // Initialize data - parthenon::par_for( - loop_pattern, "Initialize parallel reduce array", exec_space, 0, N-1, 0, N-1, 0, N-1, 0, N-1, - KOKKOS_LAMBDA(const int n, const int k, const int j, const int i) { buffer(n,k,j,i) = i+j+k+n; }); - int tot = 0; - for (int n = 0; n < N; ++n) { - for (int k = 0; k < N; ++k) { - for (int j = 0; j < N; ++j) { - for (int i = 0; i < N; ++i) { - tot += i+j+k+n; - } - } + constexpr int N = 10; + parthenon::ParArray4D buffer("Testing buffer", N, N, N, N); + // Initialize data + parthenon::par_for( + loop_pattern, "Initialize parallel reduce array", exec_space, 0, N - 1, 0, N - 1, 0, + N - 1, 0, N - 1, KOKKOS_LAMBDA(const int n, const int k, const int j, const int i) { + buffer(n, k, j, i) = i + j + k + n; + }); + int tot = 0; + for (int n = 0; n < N; ++n) { + for (int k = 0; k < N; ++k) { + for (int j = 0; j < N; ++j) { + for (int i = 0; i < N; ++i) { + tot += i + j + k + n; + } } - } + } + } int test_tot = 0; parthenon::par_reduce( - loop_pattern, "Sum via par reduce", exec_space, - 0, N-1, 0, N-1, 0, N-1, 0, N-1, + loop_pattern, "Sum via par reduce", exec_space, 0, N - 1, 0, N - 1, 0, N - 1, 0, + N - 1, KOKKOS_LAMBDA(const int n, const int k, const int j, const int i, int &t) { - t += i+j+k+n; - }, Kokkos::Sum(test_tot)); + t += i + j + k + n; + }, + Kokkos::Sum(test_tot)); return tot == test_tot; } TEST_CASE("Parallel reduce", "[par_reduce]") { auto default_exec_space = DevExecSpace(); SECTION("1D loops") { - REQUIRE(test_wrapper_reduce_1d(parthenon::loop_pattern_flatrange_tag, - default_exec_space) == true); - if constexpr (std::is_same::value) { - REQUIRE(test_wrapper_reduce_1d(parthenon::loop_pattern_simdfor_tag, + REQUIRE(test_wrapper_reduce_1d(parthenon::loop_pattern_flatrange_tag, default_exec_space) == true); - } + if constexpr (std::is_same::value) { + REQUIRE(test_wrapper_reduce_1d(parthenon::loop_pattern_simdfor_tag, + default_exec_space) == true); + } } SECTION("3D loops") { - REQUIRE(test_wrapper_reduce_3d(parthenon::loop_pattern_flatrange_tag, - default_exec_space) == true); - REQUIRE(test_wrapper_reduce_3d(parthenon::loop_pattern_mdrange_tag, - default_exec_space) == true); + REQUIRE(test_wrapper_reduce_3d(parthenon::loop_pattern_flatrange_tag, + default_exec_space) == true); + REQUIRE(test_wrapper_reduce_3d(parthenon::loop_pattern_mdrange_tag, + default_exec_space) == true); } SECTION("4D loops") { - REQUIRE(test_wrapper_reduce_4d(parthenon::loop_pattern_flatrange_tag, - default_exec_space) == true); - REQUIRE(test_wrapper_reduce_4d(parthenon::loop_pattern_mdrange_tag, - default_exec_space) == true); + REQUIRE(test_wrapper_reduce_4d(parthenon::loop_pattern_flatrange_tag, + default_exec_space) == true); + REQUIRE(test_wrapper_reduce_4d(parthenon::loop_pattern_mdrange_tag, + default_exec_space) == true); } } From ba335d796c80e1e66a7253d23957d37abab0daad Mon Sep 17 00:00:00 2001 From: adam reyes Date: Sun, 23 Jun 2024 17:29:28 +0200 Subject: [PATCH 08/12] linting --- src/kokkos_abstraction.hpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/kokkos_abstraction.hpp b/src/kokkos_abstraction.hpp index 6032d3cc817b..6b0b7cc770e2 100644 --- a/src/kokkos_abstraction.hpp +++ b/src/kokkos_abstraction.hpp @@ -268,7 +268,7 @@ auto MakeFlatFunctor(F &function) { template struct FlatLoop3D { - FlatLoop3D() {}; + FlatLoop3D() {} template inline void operator()(Tag tag, const F &function, const std::string &name, DevExecSpace exec_space, const int kl, const int ku, @@ -297,10 +297,9 @@ struct FlatLoop3D { template class FlatFunctor { public: - FlatFunctor() {}; + FlatFunctor() {} template inline void operator()(Tag tag, const F &function, Args... args) const { - const FlatLoop3D flat3D; flat3D(tag, function, std::forward(args)...); } @@ -309,7 +308,7 @@ class FlatFunctor class FlatFunctor { public: - FlatFunctor() {}; + FlatFunctor() {} template inline void operator()(Tag tag, const F &function, Args... args) const { const FlatLoop3D flat3D; @@ -419,7 +418,7 @@ inline void par_dispatch(LoopPatternSimdFor, const std::string &name, template struct FlatLoop4D { - FlatLoop4D() {}; + FlatLoop4D() {} template inline void operator()(Tag tag, const F &function, const std::string &name, DevExecSpace exec_space, const int nl, const int nu, @@ -452,7 +451,7 @@ struct FlatLoop4D { template class FlatFunctor { public: - FlatFunctor() {}; + FlatFunctor() {} template inline void operator()(Tag tag, const F &function, Args... args) const { const FlatLoop4D flat4D; @@ -464,7 +463,7 @@ template class FlatFunctor { public: - FlatFunctor() {}; + FlatFunctor() {} template inline void operator()(Tag tag, const F &function, Args... args) const { const FlatLoop4D flat4D; From 2fa8ad150c8b0c4cec94ef47aa33d722aac0d67b Mon Sep 17 00:00:00 2001 From: adam reyes Date: Mon, 24 Jun 2024 00:11:52 +0200 Subject: [PATCH 09/12] templating functor index types --- src/kokkos_abstraction.hpp | 31 ++++--------------------------- 1 file changed, 4 insertions(+), 27 deletions(-) diff --git a/src/kokkos_abstraction.hpp b/src/kokkos_abstraction.hpp index 6b0b7cc770e2..58e0db791378 100644 --- a/src/kokkos_abstraction.hpp +++ b/src/kokkos_abstraction.hpp @@ -294,19 +294,8 @@ struct FlatLoop3D { } }; -template -class FlatFunctor { - public: - FlatFunctor() {} - template - inline void operator()(Tag tag, const F &function, Args... args) const { - const FlatLoop3D flat3D; - flat3D(tag, function, std::forward(args)...); - } -}; - -template -class FlatFunctor { +template +class FlatFunctor { public: FlatFunctor() {} template @@ -448,20 +437,8 @@ struct FlatLoop4D { } }; -template -class FlatFunctor { - public: - FlatFunctor() {} - template - inline void operator()(Tag tag, const F &function, Args... args) const { - const FlatLoop4D flat4D; - flat4D(tag, function, std::forward(args)...); - } -}; - -template -class FlatFunctor { +template +class FlatFunctor { public: FlatFunctor() {} template From b9a95a155f205066f0cffeb01d57029a8865878b Mon Sep 17 00:00:00 2001 From: adam reyes Date: Mon, 24 Jun 2024 11:24:56 +0200 Subject: [PATCH 10/12] moved to a single functor --- src/kokkos_abstraction.hpp | 139 +++++++++++++++---------------------- 1 file changed, 57 insertions(+), 82 deletions(-) diff --git a/src/kokkos_abstraction.hpp b/src/kokkos_abstraction.hpp index 58e0db791378..dae75a1de463 100644 --- a/src/kokkos_abstraction.hpp +++ b/src/kokkos_abstraction.hpp @@ -258,50 +258,32 @@ par_dispatch(LoopPatternMDRange, const std::string &name, DevExecSpace exec_spac function, std::forward(args)...); } -template +template class FlatFunctor; -template -auto MakeFlatFunctor(F &function) { - return FlatFunctor(); +template +auto MakeFlatFunctor(F &function, Args... args) { + return FlatFunctor(function, std::forward(args)...); } -template -struct FlatLoop3D { - FlatLoop3D() {} - template - inline void operator()(Tag tag, const F &function, const std::string &name, - DevExecSpace exec_space, const int kl, const int ku, - const int jl, const int ju, const int il, const int iu, - Args... args) const { - const int Nk = ku - kl + 1; - const int Nj = ju - jl + 1; - const int Ni = iu - il + 1; - const int NkNjNi = Nk * Nj * Ni; - const int NjNi = Nj * Ni; - kokkos_dispatch( - tag, name, Kokkos::RangePolicy<>(exec_space, 0, NkNjNi), - KOKKOS_LAMBDA(const int &idx, FArgs... fargs) { - int k = idx / NjNi; - int j = (idx - k * NjNi) / Ni; - int i = idx - k * NjNi - j * Ni; - k += kl; - j += jl; - i += il; - function(k, j, i, std::forward(fargs)...); - }, - std::forward(args)...); - } -}; +template +class FlatFunctor { + int NjNi, Nj, Ni, kl, jl, il; + Function function; -template -class FlatFunctor { public: - FlatFunctor() {} - template - inline void operator()(Tag tag, const F &function, Args... args) const { - const FlatLoop3D flat3D; - flat3D(tag, function, std::forward(args)...); + FlatFunctor(const Function _function, const int _NjNi, const int _Nj, const int _Ni, + const int _kl, const int _jl, const int _il) + : function(_function), NjNi(_NjNi), Nj(_Nj), Ni(_Ni), kl(_kl), jl(_jl), il(_il) {} + KOKKOS_INLINE_FUNCTION + void operator()(const int &idx, FArgs &&...fargs) const { + int k = idx / NjNi; + int j = (idx - k * NjNi) / Ni; + int i = idx - k * NjNi - j * Ni; + k += kl; + j += jl; + i += il; + function(k, j, i, std::forward(fargs)...); } }; @@ -312,9 +294,14 @@ par_dispatch(LoopPatternFlatRange, const std::string &name, DevExecSpace exec_sp const int kl, const int ku, const int jl, const int ju, const int il, const int iu, const Function &function, Args &&...args) { Tag tag; - const auto func = MakeFlatFunctor(function); - func(tag, function, name, exec_space, kl, ku, jl, ju, il, iu, - std::forward(args)...); + const int Nk = ku - kl + 1; + const int Nj = ju - jl + 1; + const int Ni = iu - il + 1; + const int NkNjNi = Nk * Nj * Ni; + const int NjNi = Nj * Ni; + kokkos_dispatch(tag, name, Kokkos::RangePolicy<>(exec_space, 0, NkNjNi), + MakeFlatFunctor(function, NjNi, Nj, Ni, kl, jl, il), + std::forward(args)...); } // 3D loop using MDRange loops @@ -405,46 +392,27 @@ inline void par_dispatch(LoopPatternSimdFor, const std::string &name, function(k, j, i); } -template -struct FlatLoop4D { - FlatLoop4D() {} - template - inline void operator()(Tag tag, const F &function, const std::string &name, - DevExecSpace exec_space, const int nl, const int nu, - const int kl, const int ku, const int jl, const int ju, - const int il, const int iu, Args... args) const { - const int Nn = nu - nl + 1; - const int Nk = ku - kl + 1; - const int Nj = ju - jl + 1; - const int Ni = iu - il + 1; - const int NnNkNjNi = Nn * Nk * Nj * Ni; - const int NkNjNi = Nk * Nj * Ni; - const int NjNi = Nj * Ni; - kokkos_dispatch( - tag, name, Kokkos::RangePolicy<>(exec_space, 0, NnNkNjNi), - KOKKOS_LAMBDA(const int &idx, FArgs... fargs) { - int n = idx / NkNjNi; - int k = (idx - n * NkNjNi) / NjNi; - int j = (idx - n * NkNjNi - k * NjNi) / Ni; - int i = idx - n * NkNjNi - k * NjNi - j * Ni; - n += nl; - k += kl; - j += jl; - i += il; - function(n, k, j, i, std::forward(fargs)...); - }, - std::forward(args)...); - } -}; +template +class FlatFunctor { + int NkNjNi, NjNi, Nj, Ni, nl, kl, jl, il; + Function function; -template -class FlatFunctor { public: - FlatFunctor() {} - template - inline void operator()(Tag tag, const F &function, Args... args) const { - const FlatLoop4D flat4D; - flat4D(tag, function, std::forward(args)...); + FlatFunctor(const Function _function, const int _NkNjNi, const int _NjNi, const int _Nj, + const int _Ni, const int _nl, const int _kl, const int _jl, const int _il) + : function(_function), NkNjNi(_NkNjNi), NjNi(_NjNi), Nj(_Nj), Ni(_Ni), nl(_nl), + kl(_kl), jl(_jl), il(_il) {} + KOKKOS_INLINE_FUNCTION + void operator()(const int &idx, FArgs &&...fargs) const { + int n = idx / NkNjNi; + int k = (idx - n * NkNjNi) / NjNi; + int j = (idx - n * NkNjNi - k * NjNi) / Ni; + int i = idx - n * NkNjNi - k * NjNi - j * Ni; + n += nl; + k += kl; + j += jl; + i += il; + function(n, k, j, i, std::forward(fargs)...); } }; @@ -456,9 +424,16 @@ par_dispatch(LoopPatternFlatRange, const std::string &name, DevExecSpace exec_sp const int ju, const int il, const int iu, const Function &function, Args &&...args) { Tag tag; - const auto func = MakeFlatFunctor(function); - func(tag, function, name, exec_space, nl, nu, kl, ku, jl, ju, il, iu, - std::forward(args)...); + const int Nn = nu - nl + 1; + const int Nk = ku - kl + 1; + const int Nj = ju - jl + 1; + const int Ni = iu - il + 1; + const int NnNkNjNi = Nn * Nk * Nj * Ni; + const int NkNjNi = Nk * Nj * Ni; + const int NjNi = Nj * Ni; + kokkos_dispatch(tag, name, Kokkos::RangePolicy<>(exec_space, 0, NnNkNjNi), + MakeFlatFunctor(function, NkNjNi, NjNi, Nj, Ni, nl, kl, jl, il), + std::forward(args)...); } // 4D loop using MDRange loops From 9033aa3331f0bfaabf6e35dd8cde8877394f176f Mon Sep 17 00:00:00 2001 From: Adam C Reyes Date: Mon, 24 Jun 2024 12:50:27 +0200 Subject: [PATCH 11/12] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4f80ef5dd8df..967c985a0b02 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## Current develop ### Added (new features/APIs/variables/...) +- [[PR 1130]](https://github.com/parthenon-hpc-lab/parthenon/pull/1130) Enable `parthenon::par_reduce` for MD loops with Kokkos 1D Range - [[PR 1099]](https://github.com/parthenon-hpc-lab/parthenon/pull/1099) Functionality for outputting task graphs in GraphViz format. - [[PR 1091]](https://github.com/parthenon-hpc-lab/parthenon/pull/1091) Add vector wave equation example. - [[PR 991]](https://github.com/parthenon-hpc-lab/parthenon/pull/991) Add fine fields. From 56c422d9d2869cb16eec4a678a1e3653c8c7b7f1 Mon Sep 17 00:00:00 2001 From: adam reyes Date: Wed, 3 Jul 2024 09:18:49 -0400 Subject: [PATCH 12/12] remove unused Nj --- src/kokkos_abstraction.hpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/kokkos_abstraction.hpp b/src/kokkos_abstraction.hpp index dae75a1de463..ca8c59ffe12e 100644 --- a/src/kokkos_abstraction.hpp +++ b/src/kokkos_abstraction.hpp @@ -268,13 +268,13 @@ auto MakeFlatFunctor(F &function, Args... args) { template class FlatFunctor { - int NjNi, Nj, Ni, kl, jl, il; + int NjNi, Ni, kl, jl, il; Function function; public: - FlatFunctor(const Function _function, const int _NjNi, const int _Nj, const int _Ni, - const int _kl, const int _jl, const int _il) - : function(_function), NjNi(_NjNi), Nj(_Nj), Ni(_Ni), kl(_kl), jl(_jl), il(_il) {} + FlatFunctor(const Function _function, const int _NjNi, const int _Ni, const int _kl, + const int _jl, const int _il) + : function(_function), NjNi(_NjNi), Ni(_Ni), kl(_kl), jl(_jl), il(_il) {} KOKKOS_INLINE_FUNCTION void operator()(const int &idx, FArgs &&...fargs) const { int k = idx / NjNi; @@ -300,7 +300,7 @@ par_dispatch(LoopPatternFlatRange, const std::string &name, DevExecSpace exec_sp const int NkNjNi = Nk * Nj * Ni; const int NjNi = Nj * Ni; kokkos_dispatch(tag, name, Kokkos::RangePolicy<>(exec_space, 0, NkNjNi), - MakeFlatFunctor(function, NjNi, Nj, Ni, kl, jl, il), + MakeFlatFunctor(function, NjNi, Ni, kl, jl, il), std::forward(args)...); } @@ -394,14 +394,14 @@ inline void par_dispatch(LoopPatternSimdFor, const std::string &name, template class FlatFunctor { - int NkNjNi, NjNi, Nj, Ni, nl, kl, jl, il; + int NkNjNi, NjNi, Ni, nl, kl, jl, il; Function function; public: - FlatFunctor(const Function _function, const int _NkNjNi, const int _NjNi, const int _Nj, - const int _Ni, const int _nl, const int _kl, const int _jl, const int _il) - : function(_function), NkNjNi(_NkNjNi), NjNi(_NjNi), Nj(_Nj), Ni(_Ni), nl(_nl), - kl(_kl), jl(_jl), il(_il) {} + FlatFunctor(const Function _function, const int _NkNjNi, const int _NjNi, const int _Ni, + const int _nl, const int _kl, const int _jl, const int _il) + : function(_function), NkNjNi(_NkNjNi), NjNi(_NjNi), Ni(_Ni), nl(_nl), kl(_kl), + jl(_jl), il(_il) {} KOKKOS_INLINE_FUNCTION void operator()(const int &idx, FArgs &&...fargs) const { int n = idx / NkNjNi; @@ -432,7 +432,7 @@ par_dispatch(LoopPatternFlatRange, const std::string &name, DevExecSpace exec_sp const int NkNjNi = Nk * Nj * Ni; const int NjNi = Nj * Ni; kokkos_dispatch(tag, name, Kokkos::RangePolicy<>(exec_space, 0, NnNkNjNi), - MakeFlatFunctor(function, NkNjNi, NjNi, Nj, Ni, nl, kl, jl, il), + MakeFlatFunctor(function, NkNjNi, NjNi, Ni, nl, kl, jl, il), std::forward(args)...); }