Skip to content

Commit

Permalink
Template average down (#2980)
Browse files Browse the repository at this point in the history
Adding templating for the average_down function
  • Loading branch information
bsrunnels authored Nov 23, 2022
1 parent bd0cef1 commit 4d6413c
Show file tree
Hide file tree
Showing 5 changed files with 187 additions and 168 deletions.
143 changes: 141 additions & 2 deletions Src/Base/AMReX_MultiFabUtil.H
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,11 @@ namespace amrex
//! Average MultiFab onto crse MultiFab without volume weighting. This
//! routine DOES NOT assume that the crse BoxArray is a coarsened version of
//! the fine BoxArray. Work for both cell-centered and nodal MultiFabs.
void average_down (const MultiFab& S_fine, MultiFab& S_crse,
template<typename FAB>
void average_down (const FabArray<FAB>& S_fine, FabArray<FAB>& S_crse,
int scomp, int ncomp, const IntVect& ratio);
void average_down (const MultiFab& S_fine, MultiFab& S_crse,
template<typename FAB>
void average_down (const FabArray<FAB>& S_fine, FabArray<FAB>& S_crse,
int scomp, int ncomp, int ratio);

//! Add a coarsened version of the data contained in the S_fine MultiFab to
Expand Down Expand Up @@ -365,6 +367,143 @@ void average_down_nodal (const FabArray<FAB>& fine, FabArray<FAB>& crse,
}
}

// *************************************************************************************************************

// Average fine cell-based MultiFab onto crse cell-centered MultiFab.
// We do NOT assume that the coarse layout is a coarsened version of the fine layout.
// This version does NOT use volume-weighting
template<typename FAB>
void average_down (const FabArray<FAB>& S_fine, FabArray<FAB>& S_crse, int scomp, int ncomp, int rr)
{
average_down(S_fine,S_crse,scomp,ncomp,rr*IntVect::TheUnitVector());
}

template<typename FAB>
void average_down (const FabArray<FAB>& S_fine, FabArray<FAB>& S_crse,
int scomp, int ncomp, const IntVect& ratio)
{
BL_PROFILE("amrex::average_down");
AMREX_ASSERT(S_crse.nComp() == S_fine.nComp());
AMREX_ASSERT((S_crse.is_cell_centered() && S_fine.is_cell_centered()) ||
(S_crse.is_nodal() && S_fine.is_nodal()));

using value_type = typename FAB::value_type;

bool is_cell_centered = S_crse.is_cell_centered();

//
// Coarsen() the fine stuff on processors owning the fine data.
//
BoxArray crse_S_fine_BA = S_fine.boxArray(); crse_S_fine_BA.coarsen(ratio);

if (crse_S_fine_BA == S_crse.boxArray() && S_fine.DistributionMap() == S_crse.DistributionMap())
{
#ifdef AMREX_USE_GPU
if (Gpu::inLaunchRegion() && S_crse.isFusingCandidate()) {
auto const& crsema = S_crse.arrays();
auto const& finema = S_fine.const_arrays();
if (is_cell_centered) {
ParallelFor(S_crse, IntVect(0), ncomp,
[=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
{
amrex_avgdown(i,j,k,n,crsema[box_no],finema[box_no],scomp,scomp,ratio);
});
} else {
ParallelFor(S_crse, IntVect(0), ncomp,
[=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
{
amrex_avgdown_nodes(i,j,k,n,crsema[box_no],finema[box_no],scomp,scomp,ratio);
});
}
Gpu::streamSynchronize();
} else
#endif
{
#ifdef AMREX_USE_OMP
#pragma omp parallel if (Gpu::notInLaunchRegion())
#endif
for (MFIter mfi(S_crse,TilingIfNotGPU()); mfi.isValid(); ++mfi)
{
// NOTE: The tilebox is defined at the coarse level.
const Box& bx = mfi.tilebox();
Array4<value_type> const& crsearr = S_crse.array(mfi);
Array4<value_type const> const& finearr = S_fine.const_array(mfi);

if (is_cell_centered) {
AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n,
{
amrex_avgdown(i,j,k,n,crsearr,finearr,scomp,scomp,ratio);
});
} else {
AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n,
{
amrex_avgdown_nodes(i,j,k,n,crsearr,finearr,scomp,scomp,ratio);
});
}
}
}
}
else
{
FabArray<FAB> crse_S_fine(crse_S_fine_BA, S_fine.DistributionMap(), ncomp, 0, MFInfo(),DefaultFabFactory<FAB>());

#ifdef AMREX_USE_GPU
if (Gpu::inLaunchRegion() && crse_S_fine.isFusingCandidate()) {
auto const& crsema = crse_S_fine.arrays();
auto const& finema = S_fine.const_arrays();
if (is_cell_centered) {
ParallelFor(crse_S_fine, IntVect(0), ncomp,
[=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
{
amrex_avgdown(i,j,k,n,crsema[box_no],finema[box_no],0,scomp,ratio);
});
} else {
ParallelFor(crse_S_fine, IntVect(0), ncomp,
[=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
{
amrex_avgdown_nodes(i,j,k,n,crsema[box_no],finema[box_no],0,scomp,ratio);
});
}
Gpu::streamSynchronize();
} else
#endif
{
#ifdef AMREX_USE_OMP
#pragma omp parallel if (Gpu::notInLaunchRegion())
#endif
for (MFIter mfi(crse_S_fine,TilingIfNotGPU()); mfi.isValid(); ++mfi)
{
// NOTE: The tilebox is defined at the coarse level.
const Box& bx = mfi.tilebox();
Array4<value_type> const& crsearr = crse_S_fine.array(mfi);
Array4<value_type const> const& finearr = S_fine.const_array(mfi);

// NOTE: We copy from component scomp of the fine fab into component 0 of the crse fab
// because the crse fab is a temporary which was made starting at comp 0, it is
// not part of the actual crse multifab which came in.

if (is_cell_centered) {
AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n,
{
amrex_avgdown(i,j,k,n,crsearr,finearr,0,scomp,ratio);
});
} else {
AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n,
{
amrex_avgdown_nodes(i,j,k,n,crsearr,finearr,0,scomp,ratio);
});
}
}
}

S_crse.ParallelCopy(crse_S_fine,0,scomp,ncomp);
}
}





/**
* \brief Returns part of a norm based on two MultiFabs
* The MultiFabs MUST have the same underlying BoxArray.
Expand Down
130 changes: 0 additions & 130 deletions Src/Base/AMReX_MultiFabUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -439,17 +439,6 @@ namespace amrex
#endif
}

// *************************************************************************************************************

// Average fine cell-based MultiFab onto crse cell-centered MultiFab.
// We do NOT assume that the coarse layout is a coarsened version of the fine layout.
// This version does NOT use volume-weighting
void average_down (const MultiFab& S_fine, MultiFab& S_crse, int scomp, int ncomp, int rr)
{
average_down(S_fine,S_crse,scomp,ncomp,rr*IntVect::TheUnitVector());
}


void sum_fine_to_coarse(const MultiFab& S_fine, MultiFab& S_crse,
int scomp, int ncomp, const IntVect& ratio,
const Geometry& cgeom, const Geometry& /*fgeom*/)
Expand Down Expand Up @@ -501,125 +490,6 @@ namespace amrex
cgeom.periodicity(), FabArrayBase::ADD);
}

void average_down (const MultiFab& S_fine, MultiFab& S_crse,
int scomp, int ncomp, const IntVect& ratio)
{
BL_PROFILE("amrex::average_down");
AMREX_ASSERT(S_crse.nComp() == S_fine.nComp());
AMREX_ASSERT((S_crse.is_cell_centered() && S_fine.is_cell_centered()) ||
(S_crse.is_nodal() && S_fine.is_nodal()));

bool is_cell_centered = S_crse.is_cell_centered();

//
// Coarsen() the fine stuff on processors owning the fine data.
//
BoxArray crse_S_fine_BA = S_fine.boxArray(); crse_S_fine_BA.coarsen(ratio);

if (crse_S_fine_BA == S_crse.boxArray() && S_fine.DistributionMap() == S_crse.DistributionMap())
{
#ifdef AMREX_USE_GPU
if (Gpu::inLaunchRegion() && S_crse.isFusingCandidate()) {
auto const& crsema = S_crse.arrays();
auto const& finema = S_fine.const_arrays();
if (is_cell_centered) {
ParallelFor(S_crse, IntVect(0), ncomp,
[=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
{
amrex_avgdown(i,j,k,n,crsema[box_no],finema[box_no],scomp,scomp,ratio);
});
} else {
ParallelFor(S_crse, IntVect(0), ncomp,
[=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
{
amrex_avgdown_nodes(i,j,k,n,crsema[box_no],finema[box_no],scomp,scomp,ratio);
});
}
Gpu::streamSynchronize();
} else
#endif
{
#ifdef AMREX_USE_OMP
#pragma omp parallel if (Gpu::notInLaunchRegion())
#endif
for (MFIter mfi(S_crse,TilingIfNotGPU()); mfi.isValid(); ++mfi)
{
// NOTE: The tilebox is defined at the coarse level.
const Box& bx = mfi.tilebox();
Array4<Real> const& crsearr = S_crse.array(mfi);
Array4<Real const> const& finearr = S_fine.const_array(mfi);

if (is_cell_centered) {
AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n,
{
amrex_avgdown(i,j,k,n,crsearr,finearr,scomp,scomp,ratio);
});
} else {
AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n,
{
amrex_avgdown_nodes(i,j,k,n,crsearr,finearr,scomp,scomp,ratio);
});
}
}
}
}
else
{
MultiFab crse_S_fine(crse_S_fine_BA, S_fine.DistributionMap(), ncomp, 0, MFInfo(), FArrayBoxFactory());

#ifdef AMREX_USE_GPU
if (Gpu::inLaunchRegion() && crse_S_fine.isFusingCandidate()) {
auto const& crsema = crse_S_fine.arrays();
auto const& finema = S_fine.const_arrays();
if (is_cell_centered) {
ParallelFor(crse_S_fine, IntVect(0), ncomp,
[=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
{
amrex_avgdown(i,j,k,n,crsema[box_no],finema[box_no],0,scomp,ratio);
});
} else {
ParallelFor(crse_S_fine, IntVect(0), ncomp,
[=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
{
amrex_avgdown_nodes(i,j,k,n,crsema[box_no],finema[box_no],0,scomp,ratio);
});
}
Gpu::streamSynchronize();
} else
#endif
{
#ifdef AMREX_USE_OMP
#pragma omp parallel if (Gpu::notInLaunchRegion())
#endif
for (MFIter mfi(crse_S_fine,TilingIfNotGPU()); mfi.isValid(); ++mfi)
{
// NOTE: The tilebox is defined at the coarse level.
const Box& bx = mfi.tilebox();
Array4<Real> const& crsearr = crse_S_fine.array(mfi);
Array4<Real const> const& finearr = S_fine.const_array(mfi);

// NOTE: We copy from component scomp of the fine fab into component 0 of the crse fab
// because the crse fab is a temporary which was made starting at comp 0, it is
// not part of the actual crse multifab which came in.

if (is_cell_centered) {
AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n,
{
amrex_avgdown(i,j,k,n,crsearr,finearr,0,scomp,ratio);
});
} else {
AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n,
{
amrex_avgdown_nodes(i,j,k,n,crsearr,finearr,0,scomp,ratio);
});
}
}
}

S_crse.ParallelCopy(crse_S_fine,0,scomp,ncomp);
}
}

//! Average fine edge-based MultiFab onto crse edge-based MultiFab.
//! This routine assumes that the crse BoxArray is a coarsened version of the fine BoxArray.
void average_down_edges (const Vector<const MultiFab*>& fine, const Vector<MultiFab*>& crse,
Expand Down
32 changes: 18 additions & 14 deletions Src/Base/AMReX_MultiFabUtil_1D_C.H
Original file line number Diff line number Diff line change
Expand Up @@ -185,22 +185,23 @@ void amrex_avgdown_edges (int i, int, int, int n, Array4<Real> const& crse,
crse(i,0,0,n+ccomp) = c * facInv;
}

template <typename T>
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void amrex_avgdown (Box const& bx, Array4<Real> const& crse,
Array4<Real const> const& fine,
void amrex_avgdown (Box const& bx, Array4<T> const& crse,
Array4<T const> const& fine,
int ccomp, int fcomp, int ncomp,
IntVect const& ratio) noexcept
{
const auto clo = lbound(bx);
const auto chi = ubound(bx);

const int facx = ratio[0];
const Real volfrac = Real(1.0)/static_cast<Real>(facx);
const T volfrac = T(1.0)/T(facx);

for (int n = 0; n < ncomp; ++n) {
for (int i = clo.x; i <= chi.x; ++i) {
int ii = i*facx;
Real c = 0.;
T c = 0;
for (int iref = 0; iref < facx; ++iref) {
c += fine(ii+iref,0,0,n+fcomp);
}
Expand All @@ -209,30 +210,32 @@ void amrex_avgdown (Box const& bx, Array4<Real> const& crse,
}
}

template <typename T>
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void amrex_avgdown (int i, int, int, int n, Array4<Real> const& crse,
Array4<Real const> const& fine,
void amrex_avgdown (int i, int, int, int n, Array4<T> const& crse,
Array4<T const> const& fine,
int ccomp, int fcomp, IntVect const& ratio) noexcept
{
const int facx = ratio[0];
const Real volfrac = Real(1.0)/static_cast<Real>(facx);
const T volfrac = T(1.0)/T(facx);
const int ii = i*facx;
Real c = Real(0.);
T c = 0;
for (int iref = 0; iref < facx; ++iref) {
c += fine(ii+iref,0,0,n+fcomp);
}
crse(i,0,0,n+ccomp) = volfrac * c;
}

template <typename T>
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void amrex_avgdown_with_vol (int i, int, int, int n, Array4<Real> const& crse,
Array4<Real const> const& fine,
Array4<Real const> const& fv,
void amrex_avgdown_with_vol (int i, int, int, int n, Array4<T> const& crse,
Array4<T const> const& fine,
Array4<T const> const& fv,
int ccomp, int fcomp, IntVect const& ratio) noexcept
{
const int facx = ratio[0];
const int ii = i*facx;
Real cd = 0., cv = 0.;
T cd = 0, cv = 0;
for (int iref = 0; iref < facx; ++iref) {
cv += fv(ii+iref,0,0);
cd += fine(ii+iref,0,0,fcomp+n)*fv(ii+iref,0,0);
Expand Down Expand Up @@ -260,9 +263,10 @@ void amrex_avgdown_nodes (Box const& bx, Array4<T> const& crse,
}
}

template<typename T>
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void amrex_avgdown_nodes (int i, int, int, int n, Array4<Real> const& crse,
Array4<Real const> const& fine,
void amrex_avgdown_nodes (int i, int, int, int n, Array4<T> const& crse,
Array4<T const> const& fine,
int ccomp, int fcomp, IntVect const& ratio) noexcept
{
crse(i,0,0,n+ccomp) = fine(i*ratio[0],0,0,n+fcomp);
Expand Down
Loading

0 comments on commit 4d6413c

Please sign in to comment.