AMReX-Codes
diff --git a/‎Src/Base/AMReX_MultiFabUtil.H
Lines changed: 141 additions & 2 deletions b/‎Src/Base/AMReX_MultiFabUtil.H
Lines changed: 141 additions & 2 deletions
diff --git a/‎Src/Base/AMReX_MultiFabUtil.cpp
Lines changed: 0 additions & 130 deletions b/‎Src/Base/AMReX_MultiFabUtil.cpp
Lines changed: 0 additions & 130 deletions
diff --git a/‎Src/Base/AMReX_MultiFabUtil_1D_C.H
Lines changed: 18 additions & 14 deletions b/‎Src/Base/AMReX_MultiFabUtil_1D_C.H
Lines changed: 18 additions & 14 deletions
@@ -126,9 +126,11 @@ namespace amrex
     //! Average MultiFab onto crse MultiFab without volume weighting. This
     //! routine DOES NOT assume that the crse BoxArray is a coarsened version of
     //! the fine BoxArray. Work for both cell-centered and nodal MultiFabs.
-    void average_down (const MultiFab& S_fine, MultiFab& S_crse,
+    template<typename FAB>
+    void average_down (const FabArray<FAB>& S_fine, FabArray<FAB>& S_crse,
                        int scomp, int ncomp, const IntVect& ratio);
-    void average_down (const MultiFab& S_fine, MultiFab& S_crse,
+    template<typename FAB>
+    void average_down (const FabArray<FAB>& S_fine, FabArray<FAB>& S_crse,
                        int scomp, int ncomp, int ratio);
 
     //! Add a coarsened version of the data contained in the S_fine MultiFab to
@@ -365,6 +367,143 @@ void average_down_nodal (const FabArray<FAB>& fine, FabArray<FAB>& crse,
     }
 }
 
+// *************************************************************************************************************
+
+// Average fine cell-based MultiFab onto crse cell-centered MultiFab.
+// We do NOT assume that the coarse layout is a coarsened version of the fine layout.
+// This version does NOT use volume-weighting
+template<typename FAB>
+void average_down (const FabArray<FAB>& S_fine, FabArray<FAB>& S_crse, int scomp, int ncomp, int rr)
+{
+    average_down(S_fine,S_crse,scomp,ncomp,rr*IntVect::TheUnitVector());
+}
+
+template<typename FAB>
+void average_down (const FabArray<FAB>& S_fine, FabArray<FAB>& S_crse,
+                   int scomp, int ncomp, const IntVect& ratio)
+{
+    BL_PROFILE("amrex::average_down");
+    AMREX_ASSERT(S_crse.nComp() == S_fine.nComp());
+    AMREX_ASSERT((S_crse.is_cell_centered() && S_fine.is_cell_centered()) ||
+                 (S_crse.is_nodal()         && S_fine.is_nodal()));
+
+    using value_type = typename FAB::value_type;
+
+    bool is_cell_centered = S_crse.is_cell_centered();
+
+    //
+    // Coarsen() the fine stuff on processors owning the fine data.
+    //
+    BoxArray crse_S_fine_BA = S_fine.boxArray(); crse_S_fine_BA.coarsen(ratio);
+
+    if (crse_S_fine_BA == S_crse.boxArray() && S_fine.DistributionMap() == S_crse.DistributionMap())
+    {
+#ifdef AMREX_USE_GPU
+        if (Gpu::inLaunchRegion() && S_crse.isFusingCandidate()) {
+            auto const& crsema = S_crse.arrays();
+            auto const& finema = S_fine.const_arrays();
+            if (is_cell_centered) {
+                ParallelFor(S_crse, IntVect(0), ncomp,
+                            [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
+                            {
+                                amrex_avgdown(i,j,k,n,crsema[box_no],finema[box_no],scomp,scomp,ratio);
+                            });
+            } else {
+                ParallelFor(S_crse, IntVect(0), ncomp,
+                            [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
+                            {
+                                amrex_avgdown_nodes(i,j,k,n,crsema[box_no],finema[box_no],scomp,scomp,ratio);
+                            });
+            }
+            Gpu::streamSynchronize();
+        } else
+#endif
+        {
+#ifdef AMREX_USE_OMP
+#pragma omp parallel if (Gpu::notInLaunchRegion())
+#endif
+            for (MFIter mfi(S_crse,TilingIfNotGPU()); mfi.isValid(); ++mfi)
+            {
+                //  NOTE: The tilebox is defined at the coarse level.
+                const Box& bx = mfi.tilebox();
+                Array4<value_type> const& crsearr = S_crse.array(mfi);
+                Array4<value_type const> const& finearr = S_fine.const_array(mfi);
+
+                if (is_cell_centered) {
+                    AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n,
+                                                      {
+                                                          amrex_avgdown(i,j,k,n,crsearr,finearr,scomp,scomp,ratio);
+                                                      });
+                } else {
+                    AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n,
+                                                      {
+                                                          amrex_avgdown_nodes(i,j,k,n,crsearr,finearr,scomp,scomp,ratio);
+                                                      });
+                }
+            }
+        }
+    }
+    else
+    {
+        FabArray<FAB> crse_S_fine(crse_S_fine_BA, S_fine.DistributionMap(), ncomp, 0, MFInfo(),DefaultFabFactory<FAB>());
+
+#ifdef AMREX_USE_GPU
+        if (Gpu::inLaunchRegion() && crse_S_fine.isFusingCandidate()) {
+            auto const& crsema = crse_S_fine.arrays();
+            auto const& finema = S_fine.const_arrays();
+            if (is_cell_centered) {
+                ParallelFor(crse_S_fine, IntVect(0), ncomp,
+                            [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
+                            {
+                                amrex_avgdown(i,j,k,n,crsema[box_no],finema[box_no],0,scomp,ratio);
+                            });
+            } else {
+                ParallelFor(crse_S_fine, IntVect(0), ncomp,
+                            [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
+                            {
+                                amrex_avgdown_nodes(i,j,k,n,crsema[box_no],finema[box_no],0,scomp,ratio);
+                            });
+            }
+            Gpu::streamSynchronize();
+        } else
+#endif
+        {
+#ifdef AMREX_USE_OMP
+#pragma omp parallel if (Gpu::notInLaunchRegion())
+#endif
+            for (MFIter mfi(crse_S_fine,TilingIfNotGPU()); mfi.isValid(); ++mfi)
+            {
+                //  NOTE: The tilebox is defined at the coarse level.
+                const Box& bx = mfi.tilebox();
+                Array4<value_type> const& crsearr = crse_S_fine.array(mfi);
+                Array4<value_type const> const& finearr = S_fine.const_array(mfi);
+
+                //  NOTE: We copy from component scomp of the fine fab into component 0 of the crse fab
+                //        because the crse fab is a temporary which was made starting at comp 0, it is
+                //        not part of the actual crse multifab which came in.
+
+                if (is_cell_centered) {
+                    AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n,
+                                                      {
+                                                          amrex_avgdown(i,j,k,n,crsearr,finearr,0,scomp,ratio);
+                                                      });
+                } else {
+                    AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n,
+                                                      {
+                                                          amrex_avgdown_nodes(i,j,k,n,crsearr,finearr,0,scomp,ratio);
+                                                      });
+                }
+            }
+        }
+
+        S_crse.ParallelCopy(crse_S_fine,0,scomp,ncomp);
+    }
+}
+
+
+
+
+
    /**
     * \brief Returns part of a norm based on two MultiFabs
     * The MultiFabs MUST have the same underlying BoxArray.
 
@@ -439,17 +439,6 @@ namespace amrex
 #endif
    }
 
-// *************************************************************************************************************
-
-    // Average fine cell-based MultiFab onto crse cell-centered MultiFab.
-    // We do NOT assume that the coarse layout is a coarsened version of the fine layout.
-    // This version does NOT use volume-weighting
-    void average_down (const MultiFab& S_fine, MultiFab& S_crse, int scomp, int ncomp, int rr)
-    {
-         average_down(S_fine,S_crse,scomp,ncomp,rr*IntVect::TheUnitVector());
-    }
-
-
     void sum_fine_to_coarse(const MultiFab& S_fine, MultiFab& S_crse,
                             int scomp, int ncomp, const IntVect& ratio,
                             const Geometry& cgeom, const Geometry& /*fgeom*/)
@@ -501,125 +490,6 @@ namespace amrex
                             cgeom.periodicity(), FabArrayBase::ADD);
     }
 
-    void average_down (const MultiFab& S_fine, MultiFab& S_crse,
-                       int scomp, int ncomp, const IntVect& ratio)
-    {
-        BL_PROFILE("amrex::average_down");
-        AMREX_ASSERT(S_crse.nComp() == S_fine.nComp());
-        AMREX_ASSERT((S_crse.is_cell_centered() && S_fine.is_cell_centered()) ||
-                     (S_crse.is_nodal()         && S_fine.is_nodal()));
-
-        bool is_cell_centered = S_crse.is_cell_centered();
-
-        //
-        // Coarsen() the fine stuff on processors owning the fine data.
-        //
-        BoxArray crse_S_fine_BA = S_fine.boxArray(); crse_S_fine_BA.coarsen(ratio);
-
-        if (crse_S_fine_BA == S_crse.boxArray() && S_fine.DistributionMap() == S_crse.DistributionMap())
-        {
-#ifdef AMREX_USE_GPU
-            if (Gpu::inLaunchRegion() && S_crse.isFusingCandidate()) {
-                auto const& crsema = S_crse.arrays();
-                auto const& finema = S_fine.const_arrays();
-                if (is_cell_centered) {
-                    ParallelFor(S_crse, IntVect(0), ncomp,
-                    [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
-                    {
-                        amrex_avgdown(i,j,k,n,crsema[box_no],finema[box_no],scomp,scomp,ratio);
-                    });
-                } else {
-                    ParallelFor(S_crse, IntVect(0), ncomp,
-                    [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
-                    {
-                        amrex_avgdown_nodes(i,j,k,n,crsema[box_no],finema[box_no],scomp,scomp,ratio);
-                    });
-                }
-                Gpu::streamSynchronize();
-            } else
-#endif
-            {
-#ifdef AMREX_USE_OMP
-#pragma omp parallel if (Gpu::notInLaunchRegion())
-#endif
-                for (MFIter mfi(S_crse,TilingIfNotGPU()); mfi.isValid(); ++mfi)
-                {
-                    //  NOTE: The tilebox is defined at the coarse level.
-                    const Box& bx = mfi.tilebox();
-                    Array4<Real> const& crsearr = S_crse.array(mfi);
-                    Array4<Real const> const& finearr = S_fine.const_array(mfi);
-
-                    if (is_cell_centered) {
-                        AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n,
-                        {
-                            amrex_avgdown(i,j,k,n,crsearr,finearr,scomp,scomp,ratio);
-                        });
-                    } else {
-                        AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n,
-                        {
-                            amrex_avgdown_nodes(i,j,k,n,crsearr,finearr,scomp,scomp,ratio);
-                        });
-                    }
-                }
-            }
-        }
-        else
-        {
-            MultiFab crse_S_fine(crse_S_fine_BA, S_fine.DistributionMap(), ncomp, 0, MFInfo(), FArrayBoxFactory());
-
-#ifdef AMREX_USE_GPU
-            if (Gpu::inLaunchRegion() && crse_S_fine.isFusingCandidate()) {
-                auto const& crsema = crse_S_fine.arrays();
-                auto const& finema = S_fine.const_arrays();
-                if (is_cell_centered) {
-                    ParallelFor(crse_S_fine, IntVect(0), ncomp,
-                    [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
-                    {
-                        amrex_avgdown(i,j,k,n,crsema[box_no],finema[box_no],0,scomp,ratio);
-                    });
-                } else {
-                    ParallelFor(crse_S_fine, IntVect(0), ncomp,
-                    [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
-                    {
-                        amrex_avgdown_nodes(i,j,k,n,crsema[box_no],finema[box_no],0,scomp,ratio);
-                    });
-                }
-                Gpu::streamSynchronize();
-            } else
-#endif
-            {
-#ifdef AMREX_USE_OMP
-#pragma omp parallel if (Gpu::notInLaunchRegion())
-#endif
-                for (MFIter mfi(crse_S_fine,TilingIfNotGPU()); mfi.isValid(); ++mfi)
-                {
-                    //  NOTE: The tilebox is defined at the coarse level.
-                    const Box& bx = mfi.tilebox();
-                    Array4<Real> const& crsearr = crse_S_fine.array(mfi);
-                    Array4<Real const> const& finearr = S_fine.const_array(mfi);
-
-                    //  NOTE: We copy from component scomp of the fine fab into component 0 of the crse fab
-                    //        because the crse fab is a temporary which was made starting at comp 0, it is
-                    //        not part of the actual crse multifab which came in.
-
-                    if (is_cell_centered) {
-                        AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n,
-                        {
-                            amrex_avgdown(i,j,k,n,crsearr,finearr,0,scomp,ratio);
-                        });
-                    } else {
-                        AMREX_HOST_DEVICE_PARALLEL_FOR_4D(bx, ncomp, i, j, k, n,
-                        {
-                            amrex_avgdown_nodes(i,j,k,n,crsearr,finearr,0,scomp,ratio);
-                        });
-                    }
-                }
-            }
-
-            S_crse.ParallelCopy(crse_S_fine,0,scomp,ncomp);
-        }
-   }
-
     //! Average fine edge-based MultiFab onto crse edge-based MultiFab.
     //! This routine assumes that the crse BoxArray is a coarsened version of the fine BoxArray.
     void average_down_edges (const Vector<const MultiFab*>& fine, const Vector<MultiFab*>& crse,
 
@@ -185,22 +185,23 @@ void amrex_avgdown_edges (int i, int, int, int n, Array4<Real> const& crse,
     crse(i,0,0,n+ccomp) = c * facInv;
 }
 
+template <typename T>
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-void amrex_avgdown (Box const& bx, Array4<Real> const& crse,
-                    Array4<Real const> const& fine,
+void amrex_avgdown (Box const& bx, Array4<T> const& crse,
+                    Array4<T const> const& fine,
                     int ccomp, int fcomp, int ncomp,
                     IntVect const& ratio) noexcept
 {
     const auto clo = lbound(bx);
     const auto chi = ubound(bx);
 
     const int facx = ratio[0];
-    const Real volfrac = Real(1.0)/static_cast<Real>(facx);
+    const T volfrac = T(1.0)/T(facx);
 
     for (int n = 0; n < ncomp; ++n) {
         for (int i = clo.x; i <= chi.x; ++i) {
             int ii = i*facx;
-            Real c = 0.;
+            T c = 0;
             for (int iref = 0; iref < facx; ++iref) {
                 c += fine(ii+iref,0,0,n+fcomp);
             }
@@ -209,30 +210,32 @@ void amrex_avgdown (Box const& bx, Array4<Real> const& crse,
     }
 }
 
+template <typename T>
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-void amrex_avgdown (int i, int, int, int n, Array4<Real> const& crse,
-                    Array4<Real const> const& fine,
+void amrex_avgdown (int i, int, int, int n, Array4<T> const& crse,
+                    Array4<T const> const& fine,
                     int ccomp, int fcomp, IntVect const& ratio) noexcept
 {
     const int facx = ratio[0];
-    const Real volfrac = Real(1.0)/static_cast<Real>(facx);
+    const T volfrac = T(1.0)/T(facx);
     const int ii = i*facx;
-    Real c = Real(0.);
+    T c = 0;
     for (int iref = 0; iref < facx; ++iref) {
         c += fine(ii+iref,0,0,n+fcomp);
     }
     crse(i,0,0,n+ccomp) = volfrac * c;
 }
 
+template <typename T>
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-void amrex_avgdown_with_vol (int i, int, int, int n, Array4<Real> const& crse,
-                             Array4<Real const> const& fine,
-                             Array4<Real const> const& fv,
+void amrex_avgdown_with_vol (int i, int, int, int n, Array4<T> const& crse,
+                             Array4<T const> const& fine,
+                             Array4<T const> const& fv,
                              int ccomp, int fcomp, IntVect const& ratio) noexcept
 {
     const int facx = ratio[0];
     const int ii = i*facx;
-    Real cd = 0., cv = 0.;
+    T cd = 0, cv = 0;
     for (int iref = 0; iref < facx; ++iref) {
         cv +=                           fv(ii+iref,0,0);
         cd += fine(ii+iref,0,0,fcomp+n)*fv(ii+iref,0,0);
@@ -260,9 +263,10 @@ void amrex_avgdown_nodes (Box const& bx, Array4<T> const& crse,
     }
 }
 
+template<typename T>
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-void amrex_avgdown_nodes (int i, int, int, int n, Array4<Real> const& crse,
-                          Array4<Real const> const& fine,
+void amrex_avgdown_nodes (int i, int, int, int n, Array4<T> const& crse,
+                          Array4<T const> const& fine,
                           int ccomp, int fcomp, IntVect const& ratio) noexcept
 {
     crse(i,0,0,n+ccomp) = fine(i*ratio[0],0,0,n+fcomp);