From 69743347fae589cadcfb71b9edd450f92e0dd6c4 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Fri, 14 Jun 2024 09:37:04 -0700 Subject: [PATCH] TableData: Add support for row-major order (amrex::Order::C) (#3973) The default is still column-major (i.e., Fortran order, amrex::Order::F). --- Src/Base/AMReX_TableData.H | 254 +++++++++++++++++++++++++------------ 1 file changed, 170 insertions(+), 84 deletions(-) diff --git a/Src/Base/AMReX_TableData.H b/Src/Base/AMReX_TableData.H index 8ff5f608334..9c2a0d06ca1 100644 --- a/Src/Base/AMReX_TableData.H +++ b/Src/Base/AMReX_TableData.H @@ -72,11 +72,11 @@ struct Table1D #endif }; -template +template struct Table2D { T* AMREX_RESTRICT p = nullptr; - Long jstride = 0; + Long stride1 = 0; GpuArray begin{{1,1}}; GpuArray end{{0,0}}; @@ -84,9 +84,9 @@ struct Table2D template ,int> = 0> AMREX_GPU_HOST_DEVICE - constexpr Table2D (Table2D> const& rhs) noexcept + constexpr Table2D (Table2D, ORDER> const& rhs) noexcept : p(rhs.p), - jstride(rhs.jstride), + stride1(rhs.stride1), begin(rhs.begin), end(rhs.end) {} @@ -96,7 +96,7 @@ struct Table2D GpuArray const& a_begin, GpuArray const& a_end) noexcept : p(a_p), - jstride(a_end[0]-a_begin[0]), + stride1(len0(a_begin,a_end)), begin(a_begin), end(a_end) {} @@ -110,7 +110,13 @@ struct Table2D #if defined(AMREX_DEBUG) || defined(AMREX_BOUND_CHECK) index_assert(i,j); #endif - return p[(i-begin[0])+(j-begin[1])*jstride]; + static_assert(std::is_same_v || + std::is_same_v); + if constexpr (std::is_same_v) { + return p[(i-begin[0])+(j-begin[1])*stride1]; + } else { + return p[(i-begin[0])*stride1+(j-begin[1])]; + } } #if defined(AMREX_DEBUG) || defined(AMREX_BOUND_CHECK) @@ -134,14 +140,26 @@ struct Table2D } } #endif + +private: + + static constexpr int len0 (GpuArray const& a_begin, + GpuArray const& a_end) noexcept + { + if constexpr (std::is_same_v) { + return a_end[0] - a_begin[0]; + } else { + return a_end[1] - a_begin[1]; + } + } }; -template +template struct Table3D { T* AMREX_RESTRICT p = nullptr; - Long jstride = 0; - Long kstride = 0; + Long stride1 = 0; + Long stride2 = 0; GpuArray begin{{1,1,1}}; GpuArray end{{0,0,0}}; @@ -149,10 +167,10 @@ struct Table3D template ,int> = 0> AMREX_GPU_HOST_DEVICE - constexpr Table3D (Table3D> const& rhs) noexcept + constexpr Table3D (Table3D,ORDER> const& rhs) noexcept : p(rhs.p), - jstride(rhs.jstride), - kstride(rhs.kstride), + stride1(rhs.stride1), + stride2(rhs.stride2), begin(rhs.begin), end(rhs.end) {} @@ -162,8 +180,8 @@ struct Table3D GpuArray const& a_begin, GpuArray const& a_end) noexcept : p(a_p), - jstride(a_end[0]-a_begin[0]), - kstride(jstride*(a_end[1]-a_begin[1])), + stride1( len0(a_begin,a_end)), + stride2(stride1*len1(a_begin,a_end)), begin(a_begin), end(a_end) {} @@ -177,7 +195,13 @@ struct Table3D #if defined(AMREX_DEBUG) || defined(AMREX_BOUND_CHECK) index_assert(i,j,k); #endif - return p[(i-begin[0])+(j-begin[1])*jstride+(k-begin[2])*kstride]; + static_assert(std::is_same_v || + std::is_same_v); + if constexpr (std::is_same_v) { + return p[(i-begin[0])+(j-begin[1])*stride1+(k-begin[2])*stride2]; + } else { + return p[(i-begin[0])*stride2+(j-begin[1])*stride1+(k-begin[2])]; + } } #if defined(AMREX_DEBUG) || defined(AMREX_BOUND_CHECK) @@ -204,15 +228,33 @@ struct Table3D } } #endif + +private: + + static constexpr int len0 (GpuArray const& a_begin, + GpuArray const& a_end) noexcept + { + if constexpr (std::is_same_v) { + return a_end[0] - a_begin[0]; + } else { + return a_end[2] - a_begin[2]; + } + } + + static constexpr int len1 (GpuArray const& a_begin, + GpuArray const& a_end) noexcept + { + return a_end[1] - a_begin[1]; + } }; -template +template struct Table4D { T* AMREX_RESTRICT p = nullptr; - Long jstride = 0; - Long kstride = 0; - Long nstride = 0; + Long stride1 = 0; + Long stride2 = 0; + Long stride3 = 0; GpuArray begin{{1,1,1,1}}; GpuArray end{{0,0,0,0}}; @@ -220,11 +262,11 @@ struct Table4D template ,int> = 0> AMREX_GPU_HOST_DEVICE - constexpr Table4D (Table4D> const& rhs) noexcept + constexpr Table4D (Table4D,ORDER> const& rhs) noexcept : p(rhs.p), - jstride(rhs.jstride), - kstride(rhs.kstride), - nstride(rhs.nstride), + stride1(rhs.stride1), + stride2(rhs.stride2), + stride3(rhs.stride3), begin(rhs.begin), end(rhs.end) {} @@ -234,9 +276,9 @@ struct Table4D GpuArray const& a_begin, GpuArray const& a_end) noexcept : p(a_p), - jstride(a_end[0]-a_begin[0]), - kstride(jstride*(a_end[1]-a_begin[1])), - nstride(kstride*(a_end[2]-a_begin[2])), + stride1( len0(a_begin,a_end)), + stride2(stride1*len1(a_begin,a_end)), + stride3(stride2*len2(a_begin,a_end)), begin(a_begin), end(a_end) {} @@ -250,7 +292,13 @@ struct Table4D #if defined(AMREX_DEBUG) || defined(AMREX_BOUND_CHECK) index_assert(i,j,k,n); #endif - return p[(i-begin[0])+(j-begin[1])*jstride+(k-begin[2])*kstride+(n-begin[3])*nstride]; + static_assert(std::is_same_v || + std::is_same_v); + if constexpr (std::is_same_v) { + return p[(i-begin[0])+(j-begin[1])*stride1+(k-begin[2])*stride2+(n-begin[3])*stride3]; + } else { + return p[(i-begin[0])*stride3+(j-begin[1])*stride2+(k-begin[2])*stride1+(n-begin[3])]; + } } #if defined(AMREX_DEBUG) || defined(AMREX_BOUND_CHECK) @@ -279,6 +327,38 @@ struct Table4D } } #endif + +private: + + static constexpr int len0 (GpuArray const& a_begin, + GpuArray const& a_end) noexcept + { + if constexpr (std::is_same_v) { + return a_end[0] - a_begin[0]; + } else { + return a_end[3] - a_begin[3]; + } + } + + static constexpr int len1 (GpuArray const& a_begin, + GpuArray const& a_end) noexcept + { + if constexpr (std::is_same_v) { + return a_end[1] - a_begin[1]; + } else { + return a_end[2] - a_begin[2]; + } + } + + static constexpr int len2 (GpuArray const& a_begin, + GpuArray const& a_end) noexcept + { + if constexpr (std::is_same_v) { + return a_end[2] - a_begin[2]; + } else { + return a_end[1] - a_begin[1]; + } + } }; /** @@ -287,9 +367,12 @@ struct Table4D * This class is somewhat similar to FArrayBox/BaseFab. The main difference * is the dimension of the array in this class can be 1, 2, 3, or 4, whereas * the dimension of FArrayBox/BaseFab is the spatial dimension - * (AMREX_SPACEDIM) plus a component dimension. Below is an example of - * using it to store a 3D table of data that is initialized on CPU and is - * read-only by all GPU threads on the device. + * (AMREX_SPACEDIM) plus a component dimension. Another difference is that + * this class supports both column-major order (i.e., Fortran order) and + * row-major order (i.e., C order), whereas FArrayBox/BaseFab is always + * column-major. Below is an example of using it to store a 3D table of data + * that is initialized on CPU and is read-only by all GPU threads on the + * device. * * \code * Array tlo{0,0,0}; // lower bounds @@ -316,22 +399,22 @@ struct Table4D * // We can now use table in device lambda. * \endcode */ -template +template class TableData : public DataAllocator { public: - template friend class TableData; + template friend class TableData; using value_type = T; using table_type = std::conditional_t, - std::conditional_t, - std::conditional_t, - Table4D > > >; + std::conditional_t, + std::conditional_t, + Table4D > > >; using const_table_type = std::conditional_t, - std::conditional_t, - std::conditional_t, - Table4D > > >; + std::conditional_t, + std::conditional_t, + Table4D > > >; TableData () noexcept = default; @@ -339,11 +422,11 @@ public: TableData (Array const& lo, Array const& hi, Arena* ar = nullptr); - TableData (TableData const&) = delete; - TableData& operator= (TableData const&) = delete; + TableData (TableData const&) = delete; + TableData& operator= (TableData const&) = delete; - TableData (TableData&& rhs) noexcept; - TableData& operator= (TableData && rhs) noexcept; + TableData (TableData&& rhs) noexcept; + TableData& operator= (TableData && rhs) noexcept; ~TableData () noexcept; @@ -359,7 +442,7 @@ public: void clear () noexcept; - void copy (TableData const& rhs) noexcept; + void copy (TableData const& rhs) noexcept; table_type table () noexcept; const_table_type table () const noexcept; @@ -376,16 +459,16 @@ private: bool m_ptr_owner = false; }; -template -TableData::TableData (Array const& lo, Array const& hi, Arena* ar) +template +TableData::TableData (Array const& lo, Array const& hi, Arena* ar) : DataAllocator{ar}, m_lo(lo), m_hi(hi) { define(); } -template -TableData::TableData (TableData&& rhs) noexcept +template +TableData::TableData (TableData&& rhs) noexcept : DataAllocator{rhs.arena()}, m_dptr(rhs.m_dptr), m_lo(rhs.m_lo), @@ -397,9 +480,9 @@ TableData::TableData (TableData&& rhs) noexcept rhs.m_ptr_owner = false; } -template -TableData& -TableData::operator= (TableData && rhs) noexcept +template +TableData& +TableData::operator= (TableData && rhs) noexcept { if (this != &rhs) { clear(); @@ -415,19 +498,22 @@ TableData::operator= (TableData && rhs) noexcept return *this; } -template -TableData::~TableData () noexcept +template +TableData::~TableData () noexcept { static_assert(std::is_trivially_copyable() && std::is_trivially_destructible(), - "TableData: T must be trivially copyable and trivially destructible"); - static_assert(N>=1 && N <=4, "TableData: N must be in the range of [1,4]"); + "TableData: T must be trivially copyable and trivially destructible"); + static_assert(N>=1 && N <=4, "TableData: N must be in the range of [1,4]"); + static_assert(std::is_same_v || + std::is_same_v, + "TableDat: ORDER must be either Order::F or Order::C"); clear(); } -template +template void -TableData::resize (Array const& lo, Array const& hi, Arena* ar) +TableData::resize (Array const& lo, Array const& hi, Arena* ar) { m_lo = lo; m_hi = hi; @@ -449,9 +535,9 @@ TableData::resize (Array const& lo, Array const& hi, Arena* a } } -template +template Long -TableData::size () const noexcept +TableData::size () const noexcept { Long r = 1; for (int i = 0; i < N; ++i) { @@ -460,9 +546,9 @@ TableData::size () const noexcept return r; } -template +template void -TableData::clear () noexcept +TableData::clear () noexcept { if (m_dptr) { if (m_ptr_owner) { @@ -473,9 +559,9 @@ TableData::clear () noexcept } } -template +template void -TableData::define () +TableData::define () { m_truesize = size(); AMREX_ASSERT(m_truesize >= 0); @@ -488,48 +574,48 @@ TableData::define () } namespace detail { - template + template Table1D make_table (T* p, Array const& lo, Array const& hi) { return Table1D(p, lo[0], hi[0]+1); } - template - Table2D make_table (T* p, Array const& lo, Array const& hi) { - return Table2D(p, {lo[0],lo[1]}, {hi[0]+1,hi[1]+1}); + template + Table2D make_table (T* p, Array const& lo, Array const& hi) { + return Table2D(p, {lo[0],lo[1]}, {hi[0]+1,hi[1]+1}); } - template + template Table3D make_table (T* p, Array const& lo, Array const& hi) { - return Table3D(p, {lo[0],lo[1],lo[2]}, {hi[0]+1,hi[1]+1,hi[2]+1}); + return Table3D(p, {lo[0],lo[1],lo[2]}, {hi[0]+1,hi[1]+1,hi[2]+1}); } - template + template Table4D make_table (T* p, Array const& lo, Array const& hi) { - return Table4D(p, {lo[0],lo[1],lo[2],lo[3]}, {hi[0]+1,hi[1]+1,hi[2]+1,hi[3]+1}); + return Table4D(p, {lo[0],lo[1],lo[2],lo[3]}, {hi[0]+1,hi[1]+1,hi[2]+1,hi[3]+1}); } } -template -typename TableData::table_type -TableData::table () noexcept +template +typename TableData::table_type +TableData::table () noexcept { - return detail::make_table(m_dptr, m_lo, m_hi); + return detail::make_table(m_dptr, m_lo, m_hi); } -template -typename TableData::const_table_type -TableData::table () const noexcept +template +typename TableData::const_table_type +TableData::table () const noexcept { - return detail::make_table(m_dptr, m_lo, m_hi); + return detail::make_table(m_dptr, m_lo, m_hi); } -template -typename TableData::const_table_type -TableData::const_table () const noexcept +template +typename TableData::const_table_type +TableData::const_table () const noexcept { - return detail::make_table(m_dptr, m_lo, m_hi); + return detail::make_table(m_dptr, m_lo, m_hi); } -template +template void -TableData::copy (TableData const& rhs) noexcept +TableData::copy (TableData const& rhs) noexcept { std::size_t count = sizeof(T)*size(); #ifdef AMREX_USE_GPU