@@ -126,9 +126,11 @@ namespace amrex
126
126
// ! Average MultiFab onto crse MultiFab without volume weighting. This
127
127
// ! routine DOES NOT assume that the crse BoxArray is a coarsened version of
128
128
// ! the fine BoxArray. Work for both cell-centered and nodal MultiFabs.
129
- void average_down (const MultiFab& S_fine, MultiFab& S_crse,
129
+ template <typename FAB>
130
+ void average_down (const FabArray<FAB>& S_fine, FabArray<FAB>& S_crse,
130
131
int scomp, int ncomp, const IntVect& ratio);
131
- void average_down (const MultiFab& S_fine, MultiFab& S_crse,
132
+ template <typename FAB>
133
+ void average_down (const FabArray<FAB>& S_fine, FabArray<FAB>& S_crse,
132
134
int scomp, int ncomp, int ratio);
133
135
134
136
// ! Add a coarsened version of the data contained in the S_fine MultiFab to
@@ -365,6 +367,143 @@ void average_down_nodal (const FabArray<FAB>& fine, FabArray<FAB>& crse,
365
367
}
366
368
}
367
369
370
+ // *************************************************************************************************************
371
+
372
+ // Average fine cell-based MultiFab onto crse cell-centered MultiFab.
373
+ // We do NOT assume that the coarse layout is a coarsened version of the fine layout.
374
+ // This version does NOT use volume-weighting
375
+ template <typename FAB>
376
+ void average_down (const FabArray<FAB>& S_fine, FabArray<FAB>& S_crse, int scomp, int ncomp, int rr)
377
+ {
378
+ average_down (S_fine,S_crse,scomp,ncomp,rr*IntVect::TheUnitVector ());
379
+ }
380
+
381
+ template <typename FAB>
382
+ void average_down (const FabArray<FAB>& S_fine, FabArray<FAB>& S_crse,
383
+ int scomp, int ncomp, const IntVect& ratio)
384
+ {
385
+ BL_PROFILE (" amrex::average_down" );
386
+ AMREX_ASSERT (S_crse.nComp () == S_fine.nComp ());
387
+ AMREX_ASSERT ((S_crse.is_cell_centered () && S_fine.is_cell_centered ()) ||
388
+ (S_crse.is_nodal () && S_fine.is_nodal ()));
389
+
390
+ using value_type = typename FAB::value_type;
391
+
392
+ bool is_cell_centered = S_crse.is_cell_centered ();
393
+
394
+ //
395
+ // Coarsen() the fine stuff on processors owning the fine data.
396
+ //
397
+ BoxArray crse_S_fine_BA = S_fine.boxArray (); crse_S_fine_BA.coarsen (ratio);
398
+
399
+ if (crse_S_fine_BA == S_crse.boxArray () && S_fine.DistributionMap () == S_crse.DistributionMap ())
400
+ {
401
+ #ifdef AMREX_USE_GPU
402
+ if (Gpu::inLaunchRegion () && S_crse.isFusingCandidate ()) {
403
+ auto const & crsema = S_crse.arrays ();
404
+ auto const & finema = S_fine.const_arrays ();
405
+ if (is_cell_centered) {
406
+ ParallelFor (S_crse, IntVect (0 ), ncomp,
407
+ [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
408
+ {
409
+ amrex_avgdown (i,j,k,n,crsema[box_no],finema[box_no],scomp,scomp,ratio);
410
+ });
411
+ } else {
412
+ ParallelFor (S_crse, IntVect (0 ), ncomp,
413
+ [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
414
+ {
415
+ amrex_avgdown_nodes (i,j,k,n,crsema[box_no],finema[box_no],scomp,scomp,ratio);
416
+ });
417
+ }
418
+ Gpu::streamSynchronize ();
419
+ } else
420
+ #endif
421
+ {
422
+ #ifdef AMREX_USE_OMP
423
+ #pragma omp parallel if (Gpu::notInLaunchRegion())
424
+ #endif
425
+ for (MFIter mfi (S_crse,TilingIfNotGPU ()); mfi.isValid (); ++mfi)
426
+ {
427
+ // NOTE: The tilebox is defined at the coarse level.
428
+ const Box& bx = mfi.tilebox ();
429
+ Array4<value_type> const & crsearr = S_crse.array (mfi);
430
+ Array4<value_type const > const & finearr = S_fine.const_array (mfi);
431
+
432
+ if (is_cell_centered) {
433
+ AMREX_HOST_DEVICE_PARALLEL_FOR_4D (bx, ncomp, i, j, k, n,
434
+ {
435
+ amrex_avgdown (i,j,k,n,crsearr,finearr,scomp,scomp,ratio);
436
+ });
437
+ } else {
438
+ AMREX_HOST_DEVICE_PARALLEL_FOR_4D (bx, ncomp, i, j, k, n,
439
+ {
440
+ amrex_avgdown_nodes (i,j,k,n,crsearr,finearr,scomp,scomp,ratio);
441
+ });
442
+ }
443
+ }
444
+ }
445
+ }
446
+ else
447
+ {
448
+ FabArray<FAB> crse_S_fine (crse_S_fine_BA, S_fine.DistributionMap (), ncomp, 0 , MFInfo (),DefaultFabFactory<FAB>());
449
+
450
+ #ifdef AMREX_USE_GPU
451
+ if (Gpu::inLaunchRegion () && crse_S_fine.isFusingCandidate ()) {
452
+ auto const & crsema = crse_S_fine.arrays ();
453
+ auto const & finema = S_fine.const_arrays ();
454
+ if (is_cell_centered) {
455
+ ParallelFor (crse_S_fine, IntVect (0 ), ncomp,
456
+ [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
457
+ {
458
+ amrex_avgdown (i,j,k,n,crsema[box_no],finema[box_no],0 ,scomp,ratio);
459
+ });
460
+ } else {
461
+ ParallelFor (crse_S_fine, IntVect (0 ), ncomp,
462
+ [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, int n) noexcept
463
+ {
464
+ amrex_avgdown_nodes (i,j,k,n,crsema[box_no],finema[box_no],0 ,scomp,ratio);
465
+ });
466
+ }
467
+ Gpu::streamSynchronize ();
468
+ } else
469
+ #endif
470
+ {
471
+ #ifdef AMREX_USE_OMP
472
+ #pragma omp parallel if (Gpu::notInLaunchRegion())
473
+ #endif
474
+ for (MFIter mfi (crse_S_fine,TilingIfNotGPU ()); mfi.isValid (); ++mfi)
475
+ {
476
+ // NOTE: The tilebox is defined at the coarse level.
477
+ const Box& bx = mfi.tilebox ();
478
+ Array4<value_type> const & crsearr = crse_S_fine.array (mfi);
479
+ Array4<value_type const > const & finearr = S_fine.const_array (mfi);
480
+
481
+ // NOTE: We copy from component scomp of the fine fab into component 0 of the crse fab
482
+ // because the crse fab is a temporary which was made starting at comp 0, it is
483
+ // not part of the actual crse multifab which came in.
484
+
485
+ if (is_cell_centered) {
486
+ AMREX_HOST_DEVICE_PARALLEL_FOR_4D (bx, ncomp, i, j, k, n,
487
+ {
488
+ amrex_avgdown (i,j,k,n,crsearr,finearr,0 ,scomp,ratio);
489
+ });
490
+ } else {
491
+ AMREX_HOST_DEVICE_PARALLEL_FOR_4D (bx, ncomp, i, j, k, n,
492
+ {
493
+ amrex_avgdown_nodes (i,j,k,n,crsearr,finearr,0 ,scomp,ratio);
494
+ });
495
+ }
496
+ }
497
+ }
498
+
499
+ S_crse.ParallelCopy (crse_S_fine,0 ,scomp,ncomp);
500
+ }
501
+ }
502
+
503
+
504
+
505
+
506
+
368
507
/* *
369
508
* \brief Returns part of a norm based on two MultiFabs
370
509
* The MultiFabs MUST have the same underlying BoxArray.
0 commit comments