Skip to content

Commit 4d6413c

Browse files
authored
Template average down (#2980)
Adding templating for the average_down function
1 parent bd0cef1 commit 4d6413c

File tree

5 files changed

+187
-168
lines changed

5 files changed

+187
-168
lines changed

Src/Base/AMReX_MultiFabUtil.H

Lines changed: 141 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,9 +126,11 @@ namespace amrex
126126
//! Average MultiFab onto crse MultiFab without volume weighting. This
127127
//! routine DOES NOT assume that the crse BoxArray is a coarsened version of
128128
//! the fine BoxArray. Work for both cell-centered and nodal MultiFabs.
129-
void average_down (const MultiFab& S_fine, MultiFab& S_crse,
129+
template<typename FAB>
130+
void average_down (const FabArray<FAB>& S_fine, FabArray<FAB>& S_crse,
130131
int scomp, int ncomp, const IntVect& ratio);
131-
void average_down (const MultiFab& S_fine, MultiFab& S_crse,
132+
template<typename FAB>
133+
void average_down (const FabArray<FAB>& S_fine, FabArray<FAB>& S_crse,
132134
int scomp, int ncomp, int ratio);
133135

134136
//! Add a coarsened version of the data contained in the S_fine MultiFab to
@@ -365,6 +367,143 @@ void average_down_nodal (const FabArray<FAB>& fine, FabArray<FAB>& crse,
365367
}
366368
}
367369

370+
// *************************************************************************************************************
371+
372+
// Average fine cell-based MultiFab onto crse cell-centered MultiFab.
373+
// We do NOT assume that the coarse layout is a coarsened version of the fine layout.
374+
// This version does NOT use volume-weighting
375+
template<typename FAB>
376+
void average_down (const FabArray<FAB>& S_fine, FabArray<FAB>& S_crse, int scomp, int ncomp, int rr)
377+
{
378+
average_down(S_fine,S_crse,scomp,ncomp,rr*IntVect::TheUnitVector());
379+
}
380+
381+
template<typename FAB>
382+
void average_down (const FabArray<FAB>& S_fine, FabArray<FAB>& S_crse,
383+
int scomp, int ncomp, const IntVect& ratio)
384+
{
385+
BL_PROFILE("amrex::average_down");
386+
AMREX_ASSERT(S_crse.nComp() == S_fine.nComp());
387+
AMREX_ASSERT((S_crse.is_cell_centered() && S_fine.is_cell_centered()) ||
388+
(S_crse.is_nodal() && S_fine.is_nodal()));
389+
390+
using value_type = typename FAB::value_type;
391+
392+
bool is_cell_centered = S_crse.is_cell_centered();
393+
394+
//
395+
// Coarsen() the fine stuff on processors owning the fine data.
396+
//
397+
BoxArray crse_S_fine_BA = S_fine.boxArray(); crse_S_fine_BA.coarsen(ratio);
398+
399+
if (crse_S_fine_BA == S_crse.boxArray() && S_fine.DistributionMap() == S_crse.DistributionMap())
400+
{
401+
#ifdef AMREX_USE_GPU
402+
if (Gpu::inLaunchRegion() && S_crse.isFusingCandidate()) {
403+
auto const& crsema = S_crse.arrays();
404+
auto const& finema = S_fine.const_arrays();
405+
if (is_cell_centered) {
406+
ParallelFor(S_crse, IntVect(0), ncomp,
407+
[=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
408+
{
409+
amrex_avgdown(i,j,k,n,crsema[box_no],finema[box_no],scomp,scomp,ratio);
410+
});
411+
} else {
412+
ParallelFor(S_crse, IntVect(0), ncomp,
413+
[=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
414+
{
415+
amrex_avgdown_nodes(i,j,k,n,crsema[box_no],finema[box_no],scomp,scomp,ratio);
416+
});
417+
}
418+
Gpu::streamSynchronize();
419+
} else
420+
#endif
421+
{
422+
#ifdef AMREX_USE_OMP
423+
#pragma omp parallel if (Gpu::notInLaunchRegion())
424+
#endif
425+
for (MFIter mfi(S_crse,TilingIfNotGPU()); mfi.isValid(); ++mfi)
426+
{
427+
// NOTE: The tilebox is defined at the coarse level.
428+
const Box& bx = mfi.tilebox();
429+
Array4<value_type> const& crsearr = S_crse.array(mfi);
430+
Array4<value_type const> const& finearr = S_fine.const_array(mfi);
431+
432+
if (is_cell_centered) {
433+
AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n,
434+
{
435+
amrex_avgdown(i,j,k,n,crsearr,finearr,scomp,scomp,ratio);
436+
});
437+
} else {
438+
AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n,
439+
{
440+
amrex_avgdown_nodes(i,j,k,n,crsearr,finearr,scomp,scomp,ratio);
441+
});
442+
}
443+
}
444+
}
445+
}
446+
else
447+
{
448+
FabArray<FAB> crse_S_fine(crse_S_fine_BA, S_fine.DistributionMap(), ncomp, 0, MFInfo(),DefaultFabFactory<FAB>());
449+
450+
#ifdef AMREX_USE_GPU
451+
if (Gpu::inLaunchRegion() && crse_S_fine.isFusingCandidate()) {
452+
auto const& crsema = crse_S_fine.arrays();
453+
auto const& finema = S_fine.const_arrays();
454+
if (is_cell_centered) {
455+
ParallelFor(crse_S_fine, IntVect(0), ncomp,
456+
[=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
457+
{
458+
amrex_avgdown(i,j,k,n,crsema[box_no],finema[box_no],0,scomp,ratio);
459+
});
460+
} else {
461+
ParallelFor(crse_S_fine, IntVect(0), ncomp,
462+
[=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
463+
{
464+
amrex_avgdown_nodes(i,j,k,n,crsema[box_no],finema[box_no],0,scomp,ratio);
465+
});
466+
}
467+
Gpu::streamSynchronize();
468+
} else
469+
#endif
470+
{
471+
#ifdef AMREX_USE_OMP
472+
#pragma omp parallel if (Gpu::notInLaunchRegion())
473+
#endif
474+
for (MFIter mfi(crse_S_fine,TilingIfNotGPU()); mfi.isValid(); ++mfi)
475+
{
476+
// NOTE: The tilebox is defined at the coarse level.
477+
const Box& bx = mfi.tilebox();
478+
Array4<value_type> const& crsearr = crse_S_fine.array(mfi);
479+
Array4<value_type const> const& finearr = S_fine.const_array(mfi);
480+
481+
// NOTE: We copy from component scomp of the fine fab into component 0 of the crse fab
482+
// because the crse fab is a temporary which was made starting at comp 0, it is
483+
// not part of the actual crse multifab which came in.
484+
485+
if (is_cell_centered) {
486+
AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n,
487+
{
488+
amrex_avgdown(i,j,k,n,crsearr,finearr,0,scomp,ratio);
489+
});
490+
} else {
491+
AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n,
492+
{
493+
amrex_avgdown_nodes(i,j,k,n,crsearr,finearr,0,scomp,ratio);
494+
});
495+
}
496+
}
497+
}
498+
499+
S_crse.ParallelCopy(crse_S_fine,0,scomp,ncomp);
500+
}
501+
}
502+
503+
504+
505+
506+
368507
/**
369508
* \brief Returns part of a norm based on two MultiFabs
370509
* The MultiFabs MUST have the same underlying BoxArray.

Src/Base/AMReX_MultiFabUtil.cpp

Lines changed: 0 additions & 130 deletions
Original file line numberDiff line numberDiff line change
@@ -439,17 +439,6 @@ namespace amrex
439439
#endif
440440
}
441441

442-
// *************************************************************************************************************
443-
444-
// Average fine cell-based MultiFab onto crse cell-centered MultiFab.
445-
// We do NOT assume that the coarse layout is a coarsened version of the fine layout.
446-
// This version does NOT use volume-weighting
447-
void average_down (const MultiFab& S_fine, MultiFab& S_crse, int scomp, int ncomp, int rr)
448-
{
449-
average_down(S_fine,S_crse,scomp,ncomp,rr*IntVect::TheUnitVector());
450-
}
451-
452-
453442
void sum_fine_to_coarse(const MultiFab& S_fine, MultiFab& S_crse,
454443
int scomp, int ncomp, const IntVect& ratio,
455444
const Geometry& cgeom, const Geometry& /*fgeom*/)
@@ -501,125 +490,6 @@ namespace amrex
501490
cgeom.periodicity(), FabArrayBase::ADD);
502491
}
503492

504-
void average_down (const MultiFab& S_fine, MultiFab& S_crse,
505-
int scomp, int ncomp, const IntVect& ratio)
506-
{
507-
BL_PROFILE("amrex::average_down");
508-
AMREX_ASSERT(S_crse.nComp() == S_fine.nComp());
509-
AMREX_ASSERT((S_crse.is_cell_centered() && S_fine.is_cell_centered()) ||
510-
(S_crse.is_nodal() && S_fine.is_nodal()));
511-
512-
bool is_cell_centered = S_crse.is_cell_centered();
513-
514-
//
515-
// Coarsen() the fine stuff on processors owning the fine data.
516-
//
517-
BoxArray crse_S_fine_BA = S_fine.boxArray(); crse_S_fine_BA.coarsen(ratio);
518-
519-
if (crse_S_fine_BA == S_crse.boxArray() && S_fine.DistributionMap() == S_crse.DistributionMap())
520-
{
521-
#ifdef AMREX_USE_GPU
522-
if (Gpu::inLaunchRegion() && S_crse.isFusingCandidate()) {
523-
auto const& crsema = S_crse.arrays();
524-
auto const& finema = S_fine.const_arrays();
525-
if (is_cell_centered) {
526-
ParallelFor(S_crse, IntVect(0), ncomp,
527-
[=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
528-
{
529-
amrex_avgdown(i,j,k,n,crsema[box_no],finema[box_no],scomp,scomp,ratio);
530-
});
531-
} else {
532-
ParallelFor(S_crse, IntVect(0), ncomp,
533-
[=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
534-
{
535-
amrex_avgdown_nodes(i,j,k,n,crsema[box_no],finema[box_no],scomp,scomp,ratio);
536-
});
537-
}
538-
Gpu::streamSynchronize();
539-
} else
540-
#endif
541-
{
542-
#ifdef AMREX_USE_OMP
543-
#pragma omp parallel if (Gpu::notInLaunchRegion())
544-
#endif
545-
for (MFIter mfi(S_crse,TilingIfNotGPU()); mfi.isValid(); ++mfi)
546-
{
547-
// NOTE: The tilebox is defined at the coarse level.
548-
const Box& bx = mfi.tilebox();
549-
Array4<Real> const& crsearr = S_crse.array(mfi);
550-
Array4<Real const> const& finearr = S_fine.const_array(mfi);
551-
552-
if (is_cell_centered) {
553-
AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n,
554-
{
555-
amrex_avgdown(i,j,k,n,crsearr,finearr,scomp,scomp,ratio);
556-
});
557-
} else {
558-
AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n,
559-
{
560-
amrex_avgdown_nodes(i,j,k,n,crsearr,finearr,scomp,scomp,ratio);
561-
});
562-
}
563-
}
564-
}
565-
}
566-
else
567-
{
568-
MultiFab crse_S_fine(crse_S_fine_BA, S_fine.DistributionMap(), ncomp, 0, MFInfo(), FArrayBoxFactory());
569-
570-
#ifdef AMREX_USE_GPU
571-
if (Gpu::inLaunchRegion() && crse_S_fine.isFusingCandidate()) {
572-
auto const& crsema = crse_S_fine.arrays();
573-
auto const& finema = S_fine.const_arrays();
574-
if (is_cell_centered) {
575-
ParallelFor(crse_S_fine, IntVect(0), ncomp,
576-
[=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
577-
{
578-
amrex_avgdown(i,j,k,n,crsema[box_no],finema[box_no],0,scomp,ratio);
579-
});
580-
} else {
581-
ParallelFor(crse_S_fine, IntVect(0), ncomp,
582-
[=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
583-
{
584-
amrex_avgdown_nodes(i,j,k,n,crsema[box_no],finema[box_no],0,scomp,ratio);
585-
});
586-
}
587-
Gpu::streamSynchronize();
588-
} else
589-
#endif
590-
{
591-
#ifdef AMREX_USE_OMP
592-
#pragma omp parallel if (Gpu::notInLaunchRegion())
593-
#endif
594-
for (MFIter mfi(crse_S_fine,TilingIfNotGPU()); mfi.isValid(); ++mfi)
595-
{
596-
// NOTE: The tilebox is defined at the coarse level.
597-
const Box& bx = mfi.tilebox();
598-
Array4<Real> const& crsearr = crse_S_fine.array(mfi);
599-
Array4<Real const> const& finearr = S_fine.const_array(mfi);
600-
601-
// NOTE: We copy from component scomp of the fine fab into component 0 of the crse fab
602-
// because the crse fab is a temporary which was made starting at comp 0, it is
603-
// not part of the actual crse multifab which came in.
604-
605-
if (is_cell_centered) {
606-
AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n,
607-
{
608-
amrex_avgdown(i,j,k,n,crsearr,finearr,0,scomp,ratio);
609-
});
610-
} else {
611-
AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n,
612-
{
613-
amrex_avgdown_nodes(i,j,k,n,crsearr,finearr,0,scomp,ratio);
614-
});
615-
}
616-
}
617-
}
618-
619-
S_crse.ParallelCopy(crse_S_fine,0,scomp,ncomp);
620-
}
621-
}
622-
623493
//! Average fine edge-based MultiFab onto crse edge-based MultiFab.
624494
//! This routine assumes that the crse BoxArray is a coarsened version of the fine BoxArray.
625495
void average_down_edges (const Vector<const MultiFab*>& fine, const Vector<MultiFab*>& crse,

Src/Base/AMReX_MultiFabUtil_1D_C.H

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -185,22 +185,23 @@ void amrex_avgdown_edges (int i, int, int, int n, Array4<Real> const& crse,
185185
crse(i,0,0,n+ccomp) = c * facInv;
186186
}
187187

188+
template <typename T>
188189
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
189-
void amrex_avgdown (Box const& bx, Array4<Real> const& crse,
190-
Array4<Real const> const& fine,
190+
void amrex_avgdown (Box const& bx, Array4<T> const& crse,
191+
Array4<T const> const& fine,
191192
int ccomp, int fcomp, int ncomp,
192193
IntVect const& ratio) noexcept
193194
{
194195
const auto clo = lbound(bx);
195196
const auto chi = ubound(bx);
196197

197198
const int facx = ratio[0];
198-
const Real volfrac = Real(1.0)/static_cast<Real>(facx);
199+
const T volfrac = T(1.0)/T(facx);
199200

200201
for (int n = 0; n < ncomp; ++n) {
201202
for (int i = clo.x; i <= chi.x; ++i) {
202203
int ii = i*facx;
203-
Real c = 0.;
204+
T c = 0;
204205
for (int iref = 0; iref < facx; ++iref) {
205206
c += fine(ii+iref,0,0,n+fcomp);
206207
}
@@ -209,30 +210,32 @@ void amrex_avgdown (Box const& bx, Array4<Real> const& crse,
209210
}
210211
}
211212

213+
template <typename T>
212214
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
213-
void amrex_avgdown (int i, int, int, int n, Array4<Real> const& crse,
214-
Array4<Real const> const& fine,
215+
void amrex_avgdown (int i, int, int, int n, Array4<T> const& crse,
216+
Array4<T const> const& fine,
215217
int ccomp, int fcomp, IntVect const& ratio) noexcept
216218
{
217219
const int facx = ratio[0];
218-
const Real volfrac = Real(1.0)/static_cast<Real>(facx);
220+
const T volfrac = T(1.0)/T(facx);
219221
const int ii = i*facx;
220-
Real c = Real(0.);
222+
T c = 0;
221223
for (int iref = 0; iref < facx; ++iref) {
222224
c += fine(ii+iref,0,0,n+fcomp);
223225
}
224226
crse(i,0,0,n+ccomp) = volfrac * c;
225227
}
226228

229+
template <typename T>
227230
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
228-
void amrex_avgdown_with_vol (int i, int, int, int n, Array4<Real> const& crse,
229-
Array4<Real const> const& fine,
230-
Array4<Real const> const& fv,
231+
void amrex_avgdown_with_vol (int i, int, int, int n, Array4<T> const& crse,
232+
Array4<T const> const& fine,
233+
Array4<T const> const& fv,
231234
int ccomp, int fcomp, IntVect const& ratio) noexcept
232235
{
233236
const int facx = ratio[0];
234237
const int ii = i*facx;
235-
Real cd = 0., cv = 0.;
238+
T cd = 0, cv = 0;
236239
for (int iref = 0; iref < facx; ++iref) {
237240
cv += fv(ii+iref,0,0);
238241
cd += fine(ii+iref,0,0,fcomp+n)*fv(ii+iref,0,0);
@@ -260,9 +263,10 @@ void amrex_avgdown_nodes (Box const& bx, Array4<T> const& crse,
260263
}
261264
}
262265

266+
template<typename T>
263267
AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
264-
void amrex_avgdown_nodes (int i, int, int, int n, Array4<Real> const& crse,
265-
Array4<Real const> const& fine,
268+
void amrex_avgdown_nodes (int i, int, int, int n, Array4<T> const& crse,
269+
Array4<T const> const& fine,
266270
int ccomp, int fcomp, IntVect const& ratio) noexcept
267271
{
268272
crse(i,0,0,n+ccomp) = fine(i*ratio[0],0,0,n+fcomp);

0 commit comments

Comments
 (0)