Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Testing some par_dispatch stuff #1156

Open
wants to merge 32 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
01e7406
extra stuff
lroberts36 Aug 20, 2024
3620a79
working for unit tests
lroberts36 Aug 20, 2024
6d82680
Add test for object being callable
lroberts36 Aug 21, 2024
a04698d
Switch to IndexRange based bounds
lroberts36 Aug 21, 2024
f0dab9a
revert test to original
lroberts36 Aug 21, 2024
727da4c
remove stuff
lroberts36 Aug 21, 2024
e0c13a6
format and lint
lroberts36 Aug 21, 2024
1e992c8
Add fallbacks
lroberts36 Aug 21, 2024
edef10d
fix constexpr issue on device
lroberts36 Aug 21, 2024
6bb150a
make public
lroberts36 Aug 21, 2024
9b4d10b
cleanup
lroberts36 Aug 21, 2024
df98b2a
fix possible future bug
lroberts36 Aug 21, 2024
aafd266
genralize hierarchical loops
lroberts36 Aug 21, 2024
59f615b
small
lroberts36 Aug 21, 2024
d3bdc34
make meshblock::par* calls just directly call parthenon::par*
lroberts36 Aug 21, 2024
a9c0db2
cleanup some cuda complaints
lroberts36 Aug 21, 2024
e9a8e0e
remove all tuple stuff
lroberts36 Aug 21, 2024
a73ccf4
suppress warning
lroberts36 Aug 22, 2024
064fd4d
switch to Kokkos array
lroberts36 Aug 22, 2024
d92a6d2
fix weird cuda issues
lroberts36 Aug 22, 2024
8d22898
revert to std::array in indexer
lroberts36 Aug 22, 2024
d68d4d1
format and lint
lroberts36 Aug 22, 2024
5e7f6a3
fix?
lroberts36 Aug 22, 2024
a701aea
Remove some duplication
lroberts36 Aug 22, 2024
64f0f13
clarify intention
lroberts36 Aug 22, 2024
524d6dc
format
lroberts36 Aug 22, 2024
a67aa91
cleanup
lroberts36 Aug 22, 2024
46af369
small
lroberts36 Aug 22, 2024
78d81a2
fix bug
lroberts36 Aug 22, 2024
1b76508
Merge branch 'develop' into lroberts36/generalize-par-dispatch
lroberts36 Aug 22, 2024
955fcb7
remove comment
lroberts36 Aug 22, 2024
8e9df23
Merge branch 'develop' into lroberts36/generalize-par-dispatch
lroberts36 Aug 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,4 @@ cmake_hdf5_test.o
# Python artifacts
*.pyc
*.egg-info
*_venv
2 changes: 2 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,8 @@ add_library(parthenon
globals.cpp
globals.hpp
kokkos_abstraction.hpp
kokkos_types.hpp
loop_bound_translator.hpp
parameter_input.cpp
parameter_input.hpp
parthenon_array_generic.hpp
Expand Down
1 change: 0 additions & 1 deletion src/bvals/comms/build_boundary_buffers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
#include "config.hpp"
#include "globals.hpp"
#include "interface/variable.hpp"
#include "kokkos_abstraction.hpp"
#include "mesh/mesh.hpp"
#include "mesh/mesh_refinement.hpp"
#include "mesh/meshblock.hpp"
Expand Down
1,120 changes: 308 additions & 812 deletions src/kokkos_abstraction.hpp

Large diffs are not rendered by default.

146 changes: 146 additions & 0 deletions src/kokkos_types.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
//========================================================================================
// Parthenon performance portable AMR framework
// Copyright(C) 2020-2023 The Parthenon collaboration
// Licensed under the 3-clause BSD License, see LICENSE file for details
//========================================================================================
// (C) (or copyright) 2020-2024. Triad National Security, LLC. All rights reserved.
//
// This program was produced under U.S. Government contract 89233218CNA000001
// for Los Alamos National Laboratory (LANL), which is operated by Triad
// National Security, LLC for the U.S. Department of Energy/National Nuclear
// Security Administration. All rights in the program are reserved by Triad
// National Security, LLC, and the U.S. Department of Energy/National Nuclear
// Security Administration. The Government is granted for itself and others
// acting on its behalf a nonexclusive, paid-up, irrevocable worldwide license
// in this material to reproduce, prepare derivative works, distribute copies to
// the public, perform publicly and display publicly, and to permit others to do
// so.
//========================================================================================

#ifndef KOKKOS_TYPES_HPP_
#define KOKKOS_TYPES_HPP_

#include <memory>
#include <string>
#include <tuple>
#include <type_traits>
#include <utility>

#include <Kokkos_Core.hpp>

#include "basic_types.hpp"
#include "config.hpp"
#include "parthenon_array_generic.hpp"
#include "utils/multi_pointer.hpp"
#include "utils/object_pool.hpp"

namespace parthenon {

#ifdef KOKKOS_ENABLE_CUDA_UVM
using DevMemSpace = Kokkos::CudaUVMSpace;
using HostMemSpace = Kokkos::CudaUVMSpace;
using DevExecSpace = Kokkos::Cuda;
#else
using DevMemSpace = Kokkos::DefaultExecutionSpace::memory_space;
using HostMemSpace = Kokkos::HostSpace;
using DevExecSpace = Kokkos::DefaultExecutionSpace;
#endif
using ScratchMemSpace = DevExecSpace::scratch_memory_space;

using HostExecSpace = Kokkos::DefaultHostExecutionSpace;
using LayoutWrapper = Kokkos::LayoutRight;
using MemUnmanaged = Kokkos::MemoryTraits<Kokkos::Unmanaged>;

#if defined(PARTHENON_ENABLE_HOST_COMM_BUFFERS)
#if defined(KOKKOS_ENABLE_CUDA)
using BufMemSpace = Kokkos::CudaHostPinnedSpace::memory_space;
#elif defined(KOKKOS_ENABLE_HIP)
using BufMemSpace = Kokkos::Experimental::HipHostPinnedSpace::memory_space;
#else
#error "Unknow comm buffer space for chose execution space."
#endif
#else
using BufMemSpace = Kokkos::DefaultExecutionSpace::memory_space;
#endif

// MPI communication buffers
template <typename T>
using BufArray1D = Kokkos::View<T *, LayoutWrapper, BufMemSpace>;

// Structures for reusable memory pools and communication
template <typename T>
using buf_pool_t = ObjectPool<BufArray1D<T>>;

template <typename T, typename State = empty_state_t>
using ParArray0D = ParArrayGeneric<Kokkos::View<T, LayoutWrapper, DevMemSpace>, State>;
template <typename T, typename State = empty_state_t>
using ParArray1D = ParArrayGeneric<Kokkos::View<T *, LayoutWrapper, DevMemSpace>, State>;
template <typename T, typename State = empty_state_t>
using ParArray2D = ParArrayGeneric<Kokkos::View<T **, LayoutWrapper, DevMemSpace>, State>;
template <typename T, typename State = empty_state_t>
using ParArray3D =
ParArrayGeneric<Kokkos::View<T ***, LayoutWrapper, DevMemSpace>, State>;
template <typename T, typename State = empty_state_t>
using ParArray4D =
ParArrayGeneric<Kokkos::View<T ****, LayoutWrapper, DevMemSpace>, State>;
template <typename T, typename State = empty_state_t>
using ParArray5D =
ParArrayGeneric<Kokkos::View<T *****, LayoutWrapper, DevMemSpace>, State>;
template <typename T, typename State = empty_state_t>
using ParArray6D =
ParArrayGeneric<Kokkos::View<T ******, LayoutWrapper, DevMemSpace>, State>;
template <typename T, typename State = empty_state_t>
using ParArray7D =
ParArrayGeneric<Kokkos::View<T *******, LayoutWrapper, DevMemSpace>, State>;
template <typename T, typename State = empty_state_t>
using ParArray8D =
ParArrayGeneric<Kokkos::View<T ********, LayoutWrapper, DevMemSpace>, State>;

// Host mirrors
template <typename T>
using HostArray0D = typename ParArray0D<T>::HostMirror;
template <typename T>
using HostArray1D = typename ParArray1D<T>::HostMirror;
template <typename T>
using HostArray2D = typename ParArray2D<T>::HostMirror;
template <typename T>
using HostArray3D = typename ParArray3D<T>::HostMirror;
template <typename T>
using HostArray4D = typename ParArray4D<T>::HostMirror;
template <typename T>
using HostArray5D = typename ParArray5D<T>::HostMirror;
template <typename T>
using HostArray6D = typename ParArray6D<T>::HostMirror;
template <typename T>
using HostArray7D = typename ParArray7D<T>::HostMirror;

using team_policy = Kokkos::TeamPolicy<>;
using team_mbr_t = Kokkos::TeamPolicy<>::member_type;

template <typename T>
using ScratchPad1D = Kokkos::View<T *, LayoutWrapper, ScratchMemSpace, MemUnmanaged>;
template <typename T>
using ScratchPad2D = Kokkos::View<T **, LayoutWrapper, ScratchMemSpace, MemUnmanaged>;
template <typename T>
using ScratchPad3D = Kokkos::View<T ***, LayoutWrapper, ScratchMemSpace, MemUnmanaged>;
template <typename T>
using ScratchPad4D = Kokkos::View<T ****, LayoutWrapper, ScratchMemSpace, MemUnmanaged>;
template <typename T>
using ScratchPad5D = Kokkos::View<T *****, LayoutWrapper, ScratchMemSpace, MemUnmanaged>;
template <typename T>
using ScratchPad6D = Kokkos::View<T ******, LayoutWrapper, ScratchMemSpace, MemUnmanaged>;

// Used for ParArrayND
// TODO(JMM): Should all of parthenon_arrays.hpp
// be moved here? Or should all of the above stuff be moved to
// parthenon_arrays.hpp?
inline constexpr std::size_t MAX_VARIABLE_DIMENSION = 7;
template <typename T, typename Layout = LayoutWrapper>
using device_view_t =
Kokkos::View<multi_pointer_t<T, MAX_VARIABLE_DIMENSION>, Layout, DevMemSpace>;
template <typename T, typename Layout = LayoutWrapper>
using host_view_t = typename device_view_t<T, Layout>::HostMirror;

} // namespace parthenon

#endif // KOKKOS_TYPES_HPP_
121 changes: 121 additions & 0 deletions src/loop_bound_translator.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
//========================================================================================
// Parthenon performance portable AMR framework
// Copyright(C) 2020-2024 The Parthenon collaboration
// Licensed under the 3-clause BSD License, see LICENSE file for details
//========================================================================================
// (C) (or copyright) 2020-2024. Triad National Security, LLC. All rights reserved.
//
// This program was produced under U.S. Government contract 89233218CNA000001
// for Los Alamos National Laboratory (LANL), which is operated by Triad
// National Security, LLC for the U.S. Department of Energy/National Nuclear
// Security Administration. All rights in the program are reserved by Triad
// National Security, LLC, and the U.S. Department of Energy/National Nuclear
// Security Administration. The Government is granted for itself and others
// acting on its behalf a nonexclusive, paid-up, irrevocable worldwide license
// in this material to reproduce, prepare derivative works, distribute copies to
// the public, perform publicly and display publicly, and to permit others to do
// so.
//========================================================================================

#ifndef LOOP_BOUND_TRANSLATOR_HPP_
#define LOOP_BOUND_TRANSLATOR_HPP_

#include <memory>
#include <string>
#include <tuple>
#include <type_traits>
#include <utility>

#include <Kokkos_Core.hpp>

#include "basic_types.hpp"
#include "kokkos_types.hpp"
#include "utils/indexer.hpp"
#include "utils/type_list.hpp"

namespace parthenon {
// Struct for translating between loop bounds given in terms of IndexRanges and loop
// bounds given in terms of raw integers
template <class... Bound_ts>
struct LoopBoundTranslator {
using Bound_tl = TypeList<Bound_ts...>;
static constexpr bool are_integers = std::is_integral_v<
typename std::remove_reference<typename Bound_tl::template type<0>>::type>;
static constexpr uint rank = sizeof...(Bound_ts) / (1 + are_integers);

std::array<IndexRange, rank> bounds;

KOKKOS_INLINE_FUNCTION
IndexRange &operator[](int i) { return bounds[i]; }

KOKKOS_INLINE_FUNCTION
const IndexRange &operator[](int i) const { return bounds[i]; }

KOKKOS_INLINE_FUNCTION
explicit LoopBoundTranslator(Bound_ts... bounds_in) {
if constexpr (are_integers) {
std::array<int64_t, 2 * rank> bounds_arr{static_cast<int64_t>(bounds_in)...};
for (int r = 0; r < rank; ++r) {
bounds[r].s = static_cast<int64_t>(bounds_arr[2 * r]);
bounds[r].e = static_cast<int64_t>(bounds_arr[2 * r + 1]);
}
} else {
bounds = std::array<IndexRange, rank>{bounds_in...};
}
}

template <int RankStart, int RankStop>
auto GetKokkosFlatRangePolicy(DevExecSpace exec_space) const {
constexpr int ndim = RankStop - RankStart;
static_assert(ndim > 0, "Need a valid range of ranks");
static_assert(RankStart >= 0, "Need a valid range of ranks");
static_assert(RankStop <= rank, "Need a valid range of ranks");
int64_t npoints = 1;
for (int d = RankStart; d < RankStop; ++d)
npoints *= (bounds[d].e + 1 - bounds[d].s);
return Kokkos::Experimental::require(
Kokkos::RangePolicy<>(exec_space, 0, npoints),
Kokkos::Experimental::WorkItemProperty::HintLightWeight);
}

template <int RankStart, int RankStop>
auto GetKokkosMDRangePolicy(DevExecSpace exec_space) const {
constexpr int ndim = RankStop - RankStart;
static_assert(ndim > 1, "Need a valid range of ranks");
static_assert(RankStart >= 0, "Need a valid range of ranks");
static_assert(RankStop <= rank, "Need a valid range of ranks");
Kokkos::Array<int64_t, ndim> start, end, tile;
for (int d = 0; d < ndim; ++d) {
start[d] = bounds[d + RankStart].s;
end[d] = bounds[d + RankStart].e + 1;
tile[d] = 1;
}
tile[ndim - 1] = end[ndim - 1] - start[ndim - 1];
return Kokkos::Experimental::require(
Kokkos::MDRangePolicy<Kokkos::Rank<ndim>>(exec_space, start, end, tile),
Kokkos::Experimental::WorkItemProperty::HintLightWeight);
}

template <int RankStart, std::size_t... Is>
KOKKOS_INLINE_FUNCTION auto GetIndexer(std::index_sequence<Is...>) const {
return MakeIndexer(
std::pair<int, int>(bounds[Is + RankStart].s, bounds[Is + RankStart].e)...);
}

template <int RankStart, int RankStop>
KOKKOS_INLINE_FUNCTION auto GetIndexer() const {
constexpr int ndim = RankStop - RankStart;
static_assert(ndim > 0, "Need a valid range of ranks");
static_assert(RankStart >= 0, "Need a valid range of ranks");
static_assert(RankStop <= rank, "Need a valid range of ranks");
return GetIndexer<RankStart>(std::make_index_sequence<ndim>());
}
};

template <class... Bound_ts>
struct LoopBoundTranslator<TypeList<Bound_ts...>>
: public LoopBoundTranslator<Bound_ts...> {};

} // namespace parthenon

#endif // LOOP_BOUND_TRANSLATOR_HPP_
Loading
Loading