Skip to content

Commit

Permalink
Make MFParallelFor safer from int overflow (AMReX-Codes#3768)
Browse files Browse the repository at this point in the history
This is continuation of the changes in AMReX-Codes#3742 making AMReX ready for big
kernels.

We also store the number of points in BoxIndexer now because we always
need that number in GPU kernels.
  • Loading branch information
WeiqunZhang authored Feb 20, 2024
1 parent 398b20b commit 4ebd5e0
Show file tree
Hide file tree
Showing 6 changed files with 103 additions and 136 deletions.
8 changes: 4 additions & 4 deletions Src/Base/AMReX_BaseFabUtility.H
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,14 @@ void fill (BaseFab<STRUCT>& aos_fab, F && f)
std::uint64_t const threadIdxx = handler.threadIdx();
std::uint64_t const blockIdxx = handler.blockIdx();
auto const shared = (T*)handler.sharedMemory();
if (icell < ntotcells) {
if (icell < indexer.numPts()) {
auto ga = new(shared+threadIdxx*STRUCTSIZE) STRUCT;
auto [i, j, k] = indexer(icell);
f(*ga, i, j, k);
}
handler.sharedBarrier();
for (std::uint64_t m = threadIdxx,
mend = amrex::min<std::uint64_t>(blockDimx, ntotcells-blockDimx*blockIdxx) * STRUCTSIZE;
mend = amrex::min<std::uint64_t>(blockDimx, indexer.numPts()-blockDimx*blockIdxx) * STRUCTSIZE;
m < mend; m += blockDimx) {
p[blockDimx*blockIdxx*STRUCTSIZE+m] = shared[m];
}
Expand All @@ -72,14 +72,14 @@ void fill (BaseFab<STRUCT>& aos_fab, F && f)
std::uint64_t const icell = std::uint64_t(blockDim.x)*blockIdx.x+threadIdx.x;
Gpu::SharedMemory<T> gsm;
T* const shared = gsm.dataPtr();
if (icell < ntotcells) {
if (icell < indexer.numPts()) {
auto ga = new(shared+std::uint64_t(threadIdx.x)*STRUCTSIZE) STRUCT;
auto [i, j, k] = indexer(icell);
f(*ga, i, j, k);
}
__syncthreads();
for (std::uint64_t m = threadIdx.x,
mend = amrex::min<std::uint64_t>(blockDim.x, ntotcells-std::uint64_t(blockDim.x)*blockIdx.x) * STRUCTSIZE;
mend = amrex::min<std::uint64_t>(blockDim.x, indexer.numPts()-std::uint64_t(blockDim.x)*blockIdx.x) * STRUCTSIZE;
m < mend; m += blockDim.x) {
p[std::uint64_t(blockDim.x)*blockIdx.x*STRUCTSIZE+m] = shared[m];
}
Expand Down
14 changes: 11 additions & 3 deletions Src/Base/AMReX_Box.H
Original file line number Diff line number Diff line change
Expand Up @@ -1842,13 +1842,16 @@ Box makeSingleCellBox (int i, int j, int k, IndexType typ = IndexType::TheCellTy

struct BoxIndexer
{
std::uint64_t npts;

#if (AMREX_SPACEDIM == 3)
Math::FastDivmodU64 fdxy;
Math::FastDivmodU64 fdx;
IntVect lo;

BoxIndexer (Box const& box)
: fdxy(std::uint64_t(box.length(0))*std::uint64_t(box.length(1))),
: npts(box.numPts()),
fdxy(std::uint64_t(box.length(0))*std::uint64_t(box.length(1))),
fdx (std::uint64_t(box.length(0))),
lo (box.smallEnd())
{}
Expand Down Expand Up @@ -1877,7 +1880,8 @@ struct BoxIndexer
IntVect lo;

BoxIndexer (Box const& box)
: fdx (std::uint64_t(box.length(0))),
: npts(box.numPts()),
fdx (std::uint64_t(box.length(0))),
lo (box.smallEnd())
{}

Expand All @@ -1902,7 +1906,8 @@ struct BoxIndexer
int lo;

BoxIndexer (Box const& box)
: lo(box.smallEnd(0))
: npts(box.numPts()),
lo(box.smallEnd(0))
{}

[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
Expand All @@ -1918,6 +1923,9 @@ struct BoxIndexer
}

#endif

[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
std::uint64_t numPts () const { return npts; }
};

}
Expand Down
4 changes: 2 additions & 2 deletions Src/Base/AMReX_FabArrayBase.H
Original file line number Diff line number Diff line change
Expand Up @@ -651,7 +651,7 @@ public:
~ParForInfo ();

std::pair<int*,int*> const& getBlocks () const { return m_nblocks_x; }
Box const* getBoxes () const { return m_boxes; }
BoxIndexer const* getBoxes () const { return m_boxes; }

ParForInfo () = delete;
ParForInfo (ParForInfo const&) = delete;
Expand All @@ -663,7 +663,7 @@ public:
IntVect m_ng;
int m_nthreads;
std::pair<int*,int*> m_nblocks_x;
Box* m_boxes = nullptr;
BoxIndexer* m_boxes = nullptr;
char* m_hp = nullptr;
char* m_dp = nullptr;
};
Expand Down
Loading

0 comments on commit 4ebd5e0

Please sign in to comment.