-
Notifications
You must be signed in to change notification settings - Fork 27
/
Copy pathcuda_exec.hpp
126 lines (110 loc) · 3.44 KB
/
cuda_exec.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
// Copyright (c) 2017-2025, Lawrence Livermore National Security, LLC and
// other Axom Project Developers. See the top-level LICENSE file for details.
//
// SPDX-License-Identifier: (BSD-3-Clause)
#ifndef AXOM_CUDA_EXEC_HPP_
#define AXOM_CUDA_EXEC_HPP_
#include "axom/config.hpp"
#include "axom/core/memory_management.hpp"
#include "RAJA/RAJA.hpp"
#include "umpire/Umpire.hpp"
#ifndef RAJA_ENABLE_CUDA
#error CUDA_EXEC requires a CUDA enabled RAJA
#endif
#if !defined(UMPIRE_ENABLE_CUDA) && !defined(UMPIRE_ENABLE_UM)
#error CUDA_EXEC requires a CUDA enabled UMPIRE with UM support
#endif
namespace axom
{
enum ExecutionMode
{
SYNCHRONOUS,
ASYNC
};
// _cuda_exec_start
/*!
* \brief Indicates parallel execution on the GPU with CUDA.
*
* \tparam BLOCK_SIZE the number of CUDA threads in a block.
* \tparam ExecutionMode indicates synchronous or asynchronous execution.
*/
template <int BLOCK_SIZE, ExecutionMode EXEC_MODE = SYNCHRONOUS>
struct CUDA_EXEC
{ };
// _cuda_exec_end
/*!
* \brief execution_space traits specialization for CUDA_EXEC.
*
* \tparam BLOCK_SIZE the number of CUDA threads to launch
*
*/
template <int BLOCK_SIZE>
struct execution_space<CUDA_EXEC<BLOCK_SIZE, SYNCHRONOUS>>
{
using loop_policy = RAJA::cuda_exec<BLOCK_SIZE>;
using reduce_policy = RAJA::cuda_reduce;
using atomic_policy = RAJA::cuda_atomic;
using sync_policy = RAJA::cuda_synchronize;
static constexpr MemorySpace memory_space = MemorySpace::Device;
static constexpr bool async() noexcept { return false; }
static constexpr bool valid() noexcept { return true; }
static constexpr bool onDevice() noexcept { return true; }
static constexpr char* name() noexcept { return (char*)"[CUDA_EXEC]"; }
static int allocatorID() noexcept
{
return axom::getUmpireResourceAllocatorID(umpire::resource::Device);
}
static constexpr runtime_policy::Policy runtimePolicy() noexcept
{
return runtime_policy::Policy::cuda;
}
static bool usesMemorySpace(axom::MemorySpace m) noexcept
{
return m == memory_space || m == MemorySpace::Unified;
}
static bool usesAllocId(int allocId) noexcept
{
return usesMemorySpace(axom::detail::getAllocatorSpace(allocId));
}
};
/*!
* \brief execution_space traits specialization for CUDA_EXEC.
*
* \tparam BLOCK_SIZE the number of CUDA threads to launch
*
*/
template <int BLOCK_SIZE>
struct execution_space<CUDA_EXEC<BLOCK_SIZE, ASYNC>>
{
using loop_policy = RAJA::cuda_exec_async<BLOCK_SIZE>;
using reduce_policy = RAJA::cuda_reduce;
using atomic_policy = RAJA::cuda_atomic;
using sync_policy = RAJA::cuda_synchronize;
static constexpr MemorySpace memory_space = MemorySpace::Device;
static constexpr bool async() noexcept { return true; }
static constexpr bool valid() noexcept { return true; }
static constexpr bool onDevice() noexcept { return true; }
static constexpr char* name() noexcept
{
return (char*)"[CUDA_EXEC] (async)";
}
static int allocatorID() noexcept
{
return axom::getUmpireResourceAllocatorID(umpire::resource::Device);
}
static constexpr runtime_policy::Policy runtimePolicy() noexcept
{
return runtime_policy::Policy::cuda;
}
static bool usesMemorySpace(axom::MemorySpace m) noexcept
{
return m == memory_space || m == MemorySpace::Unified;
}
static bool usesAllocId(int allocId) noexcept
{
return allocId == 0 ||
usesMemorySpace(axom::detail::getAllocatorSpace(allocId));
}
};
} // namespace axom
#endif // AXOM_CUDA_EXEC_HPP_