diff --git a/CHANGELOG.md b/CHANGELOG.md index befb895a8a5a..978bccbf19c0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## Current develop ### Added (new features/APIs/variables/...) +- [[PR 907]](https://github.com/parthenon-hpc-lab/parthenon/pull/907) PEP1: Allow subclassing StateDescriptor - [[PR 932]](https://github.com/parthenon-hpc-lab/parthenon/pull/932) Add GetOrAddFlag to metadata - [[PR 931]](https://github.com/parthenon-hpc-lab/parthenon/pull/931) Allow SparsePacks with subsets of blocks - [[PR 921]](https://github.com/parthenon-hpc-lab/parthenon/pull/921) Add more flexible ways of adding and using MeshData/MeshBlockData objects to DataCollections @@ -25,17 +26,25 @@ - [[PR 885]](https://github.com/parthenon-hpc-lab/parthenon/pull/885) Expose PackDescriptor and use uids in SparsePacks ### Fixed (not changing behavior/API/variables/...) +- [[PR 955]](https://github.com/parthenon-hpc-lab/parthenon/pull/955) Only permit rank0 to mkdir when -d flag specified +- [[PR 952]](https://github.com/parthenon-hpc-lab/parthenon/pull/954) Fix format string in sparse advection example +- [[PR 947]](https://github.com/parthenon-hpc-lab/parthenon/pull/947) Add missing ForceRemeshComm dependencies +- [[PR 928]](https://github.com/parthenon-hpc-lab/parthenon/pull/928) Fix boundary comms during refinement next to refined blocks +- [[PR 937]](https://github.com/parthenon-hpc-lab/parthenon/pull/937) Fix multiple line continuations - [[PR 933]](https://github.com/parthenon-hpc-lab/parthenon/pull/933) Remove extraneous debug check - [[PR 917]](https://github.com/parthenon-hpc-lab/parthenon/pull/917) Update Iterative Tasking Infrastructure - [[PR 890]](https://github.com/parthenon-hpc-lab/parthenon/pull/890) Fix bugs in sparse communication and prolongation ### Infrastructure (changes irrelevant to downstream codes) +- [[PR 967]](https://github.com/parthenon-hpc-lab/parthenon/pull/967) Change INLINE to FORCEINLINE on par_for_inner overloads +- [[PR 938]](https://github.com/parthenon-hpc-lab/parthenon/pull/938) Restructure buffer packing/unpacking kernel hierarchical parallelism +- [[PR 944]](https://github.com/parthenon-hpc-lab/parthenon/pull/944) Move sparse pack identifier creation to descriptor - [[PR 904]](https://github.com/parthenon-hpc-lab/parthenon/pull/904) Move to prolongation/restriction in one for AMR and communicate non-cell centered fields -- [[PR 918]](https://github.com/parthenon-hpc-lab/parthenon/pull/918) Refactor RegionSize +- [[PR 918]](https://github.com/parthenon-hpc-lab/parthenon/pull/918) Refactor RegionSize - [[PR 901]](https://github.com/parthenon-hpc-lab/parthenon/pull/901) Implement shared element ownership model ### Removed (removing behavior/API/varaibles/...) - +- [[PR 930](https://github.com/parthenon-hpc-lab/parthenon/pull/930) Remove ParthenonManager::ParthenonInit as it is error-prone and the split functions are the recommended usage. ## Release 0.8.0 Date: 2023-05-26 diff --git a/benchmarks/burgers/main.cpp b/benchmarks/burgers/main.cpp index 2ecc94490325..070a5c8bad8d 100644 --- a/benchmarks/burgers/main.cpp +++ b/benchmarks/burgers/main.cpp @@ -1,5 +1,5 @@ //======================================================================================== -// (C) (or copyright) 2020. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2022-2023. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 for Los // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC @@ -25,7 +25,7 @@ int main(int argc, char *argv[]) { pman.app_input->ProblemGenerator = burgers_benchmark::ProblemGenerator; // call ParthenonInit to initialize MPI and Kokkos, parse the input deck, and set up - auto manager_status = pman.ParthenonInit(argc, argv); + auto manager_status = pman.ParthenonInitEnv(argc, argv); if (manager_status == ParthenonStatus::complete) { pman.ParthenonFinalize(); return 0; @@ -38,6 +38,7 @@ int main(int argc, char *argv[]) { // make use of MPI and Kokkos // This needs to be scoped so that the driver object is destructed before Finalize + pman.ParthenonInitPackagesAndMesh(); { // Initialize the driver burgers_benchmark::BurgersDriver driver(pman.pinput.get(), pman.app_input.get(), diff --git a/doc/sphinx/src/parthenon_manager.rst b/doc/sphinx/src/parthenon_manager.rst new file mode 100644 index 000000000000..15211e8bccdb --- /dev/null +++ b/doc/sphinx/src/parthenon_manager.rst @@ -0,0 +1,67 @@ +.. _parthenonmanager: + +Parthenon Manager +================= + +The ``ParthenonManager`` class helps set up a parthenon-based +application. An instance of ``ParthenonManager`` owns pointers a +number of sub-objects: + +* The ``ApplicationInput`` struct, which lets users set things like + the ``ProcessPackages`` and ``ProblemGenerator`` function pointers. +* The ``ParameterInput`` class, which populates input parameters from + the input file and command line +* The ``Mesh`` object + +The ``ParthenonManager`` has two important methods that usually must +be called in the ``main`` function of a parthenon-based app. The +function + +.. code:: cpp + + ParthenonStatus ParthenonManager::ParthenonInitEnv(int argc, char *argv); + +reads the input deck and populates the ``ParameterInput`` object +pointer ``pman.pin``, and sets up the ``MPI``, and ``Kokkos`` +runtimes. The function + +.. code:: cpp + + void ParthenonManager::ParthenonInitPackagesAndMesh(); + +Calls the ``Initialize(ParameterInput *pin)`` function of all packages +to be utilized and creates the grid hierarchy, including the ``Mesh`` +and ``MeshBlock`` objects, and calls the ``ProblemGenerator`` +initialization routines. + +The reason these functions are split out is to enable decisions to be +made by the application between reading the input deck and setting up +the grid. For example, a common use-case is: + +.. code:: cpp + + using parthenon::ParthenonManager; + using parthenon::ParthenonStatus; + ParthenonManager pman; + + // call ParthenonInit to initialize MPI and Kokkos, parse the input deck, and set up + auto manager_status = pman.ParthenonInitEnv(argc, argv); + if (manager_status == ParthenonStatus::complete) { + pman.ParthenonFinalize(); + return 0; + } + if (manager_status == ParthenonStatus::error) { + pman.ParthenonFinalize(); + return 1; + } + + // Redefine parthenon defaults + pman.app_input->ProcessPackages = MyProcessPackages; + std::string prob = pman.pin->GetString("app", "problem"); + if (prob == "problem1") { + pman.app_input->ProblemGenerator = Problem1Generator; + } else { + pman.app_input->ProblemGenerator = Problem2Generator; + } + + pman.ParthenonInitPackagesAndMesh(); diff --git a/example/advection/main.cpp b/example/advection/main.cpp index d7a823993341..a4c3cc6cf7e9 100644 --- a/example/advection/main.cpp +++ b/example/advection/main.cpp @@ -1,5 +1,5 @@ //======================================================================================== -// (C) (or copyright) 2020. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2020-2023. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 for Los // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC @@ -26,7 +26,7 @@ int main(int argc, char *argv[]) { pman.app_input->UserWorkAfterLoop = advection_example::UserWorkAfterLoop; // call ParthenonInit to initialize MPI and Kokkos, parse the input deck, and set up - auto manager_status = pman.ParthenonInit(argc, argv); + auto manager_status = pman.ParthenonInitEnv(argc, argv); if (manager_status == ParthenonStatus::complete) { pman.ParthenonFinalize(); return 0; @@ -35,9 +35,11 @@ int main(int argc, char *argv[]) { pman.ParthenonFinalize(); return 1; } + // Now that ParthenonInit has been called and setup succeeded, the code can now // make use of MPI and Kokkos. // This needs to be scoped so that the driver object is destructed before Finalize + pman.ParthenonInitPackagesAndMesh(); { // Initialize the driver advection_example::AdvectionDriver driver(pman.pinput.get(), pman.app_input.get(), diff --git a/example/calculate_pi/pi_driver.cpp b/example/calculate_pi/pi_driver.cpp index 05017f6fc2e1..c322c17c0d3f 100644 --- a/example/calculate_pi/pi_driver.cpp +++ b/example/calculate_pi/pi_driver.cpp @@ -1,5 +1,5 @@ //======================================================================================== -// (C) (or copyright) 2020-2021. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2020-2023. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 for Los // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC @@ -39,7 +39,7 @@ int main(int argc, char *argv[]) { // This is called on each mesh block whenever the mesh changes. pman.app_input->InitMeshBlockUserData = &calculate_pi::SetInOrOutBlock; - auto manager_status = pman.ParthenonInit(argc, argv); + auto manager_status = pman.ParthenonInitEnv(argc, argv); if (manager_status == ParthenonStatus::complete) { pman.ParthenonFinalize(); return 0; @@ -50,6 +50,7 @@ int main(int argc, char *argv[]) { } // This needs to be scoped so that the driver object is destructed before Finalize + pman.ParthenonInitPackagesAndMesh(); { PiDriver driver(pman.pinput.get(), pman.app_input.get(), pman.pmesh.get()); diff --git a/example/particle_leapfrog/main.cpp b/example/particle_leapfrog/main.cpp index 565fd857c166..450d2b1230fc 100644 --- a/example/particle_leapfrog/main.cpp +++ b/example/particle_leapfrog/main.cpp @@ -1,5 +1,5 @@ //======================================================================================== -// (C) (or copyright) 2020. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2020-2023. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 for Los // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC @@ -25,7 +25,7 @@ int main(int argc, char *argv[]) { pman.app_input->ProblemGenerator = particles_leapfrog::ProblemGenerator; // call ParthenonInit to initialize MPI and Kokkos, parse the input deck, and set up - auto manager_status = pman.ParthenonInit(argc, argv); + auto manager_status = pman.ParthenonInitEnv(argc, argv); if (manager_status == ParthenonStatus::complete) { pman.ParthenonFinalize(); return 0; @@ -37,6 +37,7 @@ int main(int argc, char *argv[]) { // Now that ParthenonInit has been called and setup succeeded, the code can now // make use of MPI and Kokkos + pman.ParthenonInitPackagesAndMesh(); // This needs to be scoped so that the driver object is destructed before Finalize { // Initialize the driver diff --git a/example/particle_tracers/main.cpp b/example/particle_tracers/main.cpp index d95bf999e758..707a94e1bb40 100644 --- a/example/particle_tracers/main.cpp +++ b/example/particle_tracers/main.cpp @@ -1,5 +1,5 @@ //======================================================================================== -// (C) (or copyright) 2021. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2021-2023. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 for Los // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC @@ -25,7 +25,7 @@ int main(int argc, char *argv[]) { pman.app_input->ProblemGenerator = tracers_example::ProblemGenerator; // call ParthenonInit to initialize MPI and Kokkos, parse the input deck, and set up - auto manager_status = pman.ParthenonInit(argc, argv); + auto manager_status = pman.ParthenonInitEnv(argc, argv); if (manager_status == ParthenonStatus::complete) { pman.ParthenonFinalize(); return 0; @@ -38,6 +38,7 @@ int main(int argc, char *argv[]) { // make use of MPI and Kokkos // This needs to be scoped so that the driver object is destructed before Finalize + pman.ParthenonInitPackagesAndMesh(); { // Initialize the driver tracers_example::ParticleDriver driver(pman.pinput.get(), pman.app_input.get(), diff --git a/example/poisson/main.cpp b/example/poisson/main.cpp index 066214ae6b1c..02e7888b73df 100644 --- a/example/poisson/main.cpp +++ b/example/poisson/main.cpp @@ -1,5 +1,5 @@ //======================================================================================== -// (C) (or copyright) 2021. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2021-2023. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 for Los // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC @@ -25,7 +25,7 @@ int main(int argc, char *argv[]) { pman.app_input->MeshProblemGenerator = poisson_example::ProblemGenerator; // call ParthenonInit to initialize MPI and Kokkos, parse the input deck, and set up - auto manager_status = pman.ParthenonInit(argc, argv); + auto manager_status = pman.ParthenonInitEnv(argc, argv); if (manager_status == ParthenonStatus::complete) { pman.ParthenonFinalize(); return 0; @@ -38,6 +38,7 @@ int main(int argc, char *argv[]) { // make use of MPI and Kokkos // This needs to be scoped so that the driver object is destructed before Finalize + pman.ParthenonInitPackagesAndMesh(); { // Initialize the driver poisson_example::PoissonDriver driver(pman.pinput.get(), pman.app_input.get(), diff --git a/example/sparse_advection/main.cpp b/example/sparse_advection/main.cpp index e09954965072..3f513670596b 100644 --- a/example/sparse_advection/main.cpp +++ b/example/sparse_advection/main.cpp @@ -1,5 +1,5 @@ //======================================================================================== -// (C) (or copyright) 2021. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2021-2023. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 for Los // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC @@ -29,7 +29,7 @@ int main(int argc, char *argv[]) { sparse_advection_example::PostStepDiagnosticsInLoop; // call ParthenonInit to initialize MPI and Kokkos, parse the input deck, and set up - auto manager_status = pman.ParthenonInit(argc, argv); + auto manager_status = pman.ParthenonInitEnv(argc, argv); if (manager_status == ParthenonStatus::complete) { pman.ParthenonFinalize(); return 0; @@ -43,6 +43,7 @@ int main(int argc, char *argv[]) { DriverStatus driver_status; // This needs to be scoped so that the driver object is destructed before Finalize + pman.ParthenonInitPackagesAndMesh(); { // Initialize the driver sparse_advection_example::SparseAdvectionDriver driver( diff --git a/example/sparse_advection/parthenon_app_inputs.cpp b/example/sparse_advection/parthenon_app_inputs.cpp index 8203ade9b532..1cd806accad6 100644 --- a/example/sparse_advection/parthenon_app_inputs.cpp +++ b/example/sparse_advection/parthenon_app_inputs.cpp @@ -1,4 +1,4 @@ -// (C) (or copyright) 2021. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2023. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 for Los // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC @@ -176,7 +176,7 @@ void PostStepDiagnosticsInLoop(Mesh *mesh, ParameterInput *pin, const SimTime &t } std::printf("\n"); Real mem_avg = static_cast(mem_tot) / static_cast(blocks_tot); - std::printf("\tMem used/block in bytes [min, max, avg] = [%ld, %ld, %.14e]\n", + std::printf("\tMem used/block in bytes [min, max, avg] = [%lu, %lu, %.14e]\n", mem_min, mem_max, mem_avg); } } diff --git a/example/stochastic_subgrid/main.cpp b/example/stochastic_subgrid/main.cpp index 613c7663a830..2ca46f086f93 100644 --- a/example/stochastic_subgrid/main.cpp +++ b/example/stochastic_subgrid/main.cpp @@ -1,5 +1,5 @@ //======================================================================================== -// (C) (or copyright) 2020-2021. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2020-2023. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 for Los // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC @@ -26,7 +26,7 @@ int main(int argc, char *argv[]) { pman.app_input->UserWorkAfterLoop = stochastic_subgrid_example::UserWorkAfterLoop; // call ParthenonInit to initialize MPI and Kokkos, parse the input deck, and set up - auto manager_status = pman.ParthenonInit(argc, argv); + auto manager_status = pman.ParthenonInitEnv(argc, argv); if (manager_status == ParthenonStatus::complete) { pman.ParthenonFinalize(); return 0; @@ -39,6 +39,7 @@ int main(int argc, char *argv[]) { // make use of MPI and Kokkos // This needs to be scoped so that the driver object is destructed before Finalize + pman.ParthenonInitPackagesAndMesh(); { // Initialize the driver stochastic_subgrid_example::StochasticSubgridDriver driver( diff --git a/src/bvals/bvals.hpp b/src/bvals/bvals.hpp index 827abac792ca..718499a9f804 100644 --- a/src/bvals/bvals.hpp +++ b/src/bvals/bvals.hpp @@ -21,6 +21,7 @@ #include #include +#include #include #include "basic_types.hpp" @@ -78,7 +79,9 @@ class BoundaryBase { static int BufferID(int dim, bool multilevel); static int FindBufferID(int ox1, int ox2, int ox3, int fi1, int fi2); - void SearchAndSetNeighbors(MeshBlockTree &tree, int *ranklist, int *nslist); + void + SearchAndSetNeighbors(MeshBlockTree &tree, int *ranklist, int *nslist, + const std::unordered_set &newly_refined = {}); protected: // 1D refined or unrefined=2 @@ -90,7 +93,8 @@ class BoundaryBase { RegionSize block_size_; ParArrayND sarea_[2]; - void SetNeighborOwnership(); + void + SetNeighborOwnership(const std::unordered_set &newly_refined = {}); private: // calculate 3x shared static data members when constructing only the 1st class instance diff --git a/src/bvals/bvals_base.cpp b/src/bvals/bvals_base.cpp index d9ad5317ac93..057493f186e8 100644 --- a/src/bvals/bvals_base.cpp +++ b/src/bvals/bvals_base.cpp @@ -29,6 +29,7 @@ #include // c_str() #include "globals.hpp" +#include "mesh/logical_location.hpp" #include "mesh/mesh.hpp" #include "utils/buffer_utils.hpp" #include "utils/error_checking.hpp" @@ -300,8 +301,9 @@ int BoundaryBase::CreateBvalsMPITag(int lid, int bufid) { // TODO(felker): break-up this long function -void BoundaryBase::SearchAndSetNeighbors(MeshBlockTree &tree, int *ranklist, - int *nslist) { +void BoundaryBase::SearchAndSetNeighbors( + MeshBlockTree &tree, int *ranklist, int *nslist, + const std::unordered_set &newly_refined) { Kokkos::Profiling::pushRegion("SearchAndSetNeighbors"); MeshBlockTree *neibt; int myox1, myox2 = 0, myox3 = 0, myfx1, myfx2, myfx3; @@ -368,7 +370,7 @@ void BoundaryBase::SearchAndSetNeighbors(MeshBlockTree &tree, int *ranklist, } } if (block_size_.nx(X2DIR) == 1) { - SetNeighborOwnership(); + SetNeighborOwnership(newly_refined); Kokkos::Profiling::popRegion(); // SearchAndSetNeighbors return; } @@ -503,7 +505,7 @@ void BoundaryBase::SearchAndSetNeighbors(MeshBlockTree &tree, int *ranklist, } if (block_size_.nx(X3DIR) == 1) { - SetNeighborOwnership(); + SetNeighborOwnership(newly_refined); Kokkos::Profiling::popRegion(); // SearchAndSetNeighbors return; } @@ -626,11 +628,12 @@ void BoundaryBase::SearchAndSetNeighbors(MeshBlockTree &tree, int *ranklist, } } - SetNeighborOwnership(); + SetNeighborOwnership(newly_refined); Kokkos::Profiling::popRegion(); // SearchAndSetNeighbors } -void BoundaryBase::SetNeighborOwnership() { +void BoundaryBase::SetNeighborOwnership( + const std::unordered_set &newly_refined) { // Set neighbor block ownership std::set allowed_neighbors; allowed_neighbors.insert(loc); // Insert the location of this block @@ -642,7 +645,7 @@ void BoundaryBase::SetNeighborOwnership() { RootGridInfo rg_info = pmy_mesh_->GetRootGridInfo(); for (int n = 0; n < nneighbor; ++n) { neighbor[n].ownership = - DetermineOwnership(neighbor[n].loc, allowed_neighbors, rg_info); + DetermineOwnership(neighbor[n].loc, allowed_neighbors, rg_info, newly_refined); neighbor[n].ownership.initialized = true; } } diff --git a/src/bvals/comms/boundary_communication.cpp b/src/bvals/comms/boundary_communication.cpp index 9f7b80f3e0a1..2496c6382da8 100644 --- a/src/bvals/comms/boundary_communication.cpp +++ b/src/bvals/comms/boundary_communication.cpp @@ -84,6 +84,7 @@ TaskStatus SendBoundBufs(std::shared_ptr> &md) { PARTHENON_DEBUG_REQUIRE(bnd_info.size() == nbound, "Need same size for boundary info"); auto &sending_nonzero_flags = cache.sending_non_zero_flags; auto &sending_nonzero_flags_h = cache.sending_non_zero_flags_h; + Kokkos::parallel_for( "SendBoundBufs", Kokkos::TeamPolicy<>(parthenon::DevExecSpace(), nbound, Kokkos::AUTO), @@ -103,13 +104,26 @@ TaskStatus SendBoundBufs(std::shared_ptr> &md) { int idx_offset = 0; for (int iel = 0; iel < bnd_info(b).ntopological_elements; ++iel) { auto &idxer = bnd_info(b).idxer[iel]; + const int Ni = idxer.template EndIdx<5>() - idxer.template StartIdx<5>() + 1; Kokkos::parallel_reduce( - Kokkos::TeamThreadRange<>(team_member, idxer.size()), + Kokkos::TeamThreadRange<>(team_member, idxer.size() / Ni), [&](const int idx, bool &lnon_zero) { - const auto [t, u, v, k, j, i] = idxer(idx); - const Real &val = bnd_info(b).var(iel, t, u, v, k, j, i); - bnd_info(b).buf(idx + idx_offset) = val; - lnon_zero = lnon_zero || (std::abs(val) >= threshold); + const auto [t, u, v, k, j, i] = idxer(idx * Ni); + Real *var = &bnd_info(b).var(iel, t, u, v, k, j, i); + Real *buf = &bnd_info(b).buf(idx * Ni + idx_offset); + + Kokkos::parallel_for(Kokkos::ThreadVectorRange<>(team_member, Ni), + [&](int m) { buf[m] = var[m]; }); + + bool mnon_zero = false; + Kokkos::parallel_reduce( + Kokkos::ThreadVectorRange<>(team_member, Ni), + [&](int m, bool &llnon_zero) { + llnon_zero = llnon_zero || (std::abs(buf[m]) >= threshold); + }, + Kokkos::LOr(mnon_zero)); + + lnon_zero = lnon_zero || mnon_zero; }, Kokkos::LOr(non_zero[iel])); idx_offset += idxer.size(); @@ -148,7 +162,7 @@ template TaskStatus StartReceiveBoundBufs(std::shared_ptr> &md) { Kokkos::Profiling::pushRegion("Task_StartReceiveBoundBufs"); Mesh *pmesh = md->GetMeshPointer(); - auto &cache = md->GetBvarsCache().GetSubCache(BoundaryType::flxcor_send, false); + auto &cache = md->GetBvarsCache().GetSubCache(bound_type, false); if (cache.buf_vec.size() == 0) InitializeBufferCache(md, &(pmesh->boundary_comm_map), &cache, ReceiveKey, false); @@ -238,21 +252,41 @@ TaskStatus SetBounds(std::shared_ptr> &md) { int idx_offset = 0; for (int iel = 0; iel < bnd_info(b).ntopological_elements; ++iel) { auto &idxer = bnd_info(b).idxer[iel]; + const int Ni = idxer.template EndIdx<5>() - idxer.template StartIdx<5>() + 1; if (bnd_info(b).buf_allocated && bnd_info(b).allocated) { - Kokkos::parallel_for(Kokkos::TeamThreadRange<>(team_member, idxer.size()), - [&](const int idx) { - const auto [t, u, v, k, j, i] = idxer(idx); - if (idxer.IsActive(k, j, i)) - bnd_info(b).var(iel, t, u, v, k, j, i) = - bnd_info(b).buf(idx + idx_offset); - }); + Kokkos::parallel_for( + Kokkos::TeamThreadRange<>(team_member, idxer.size() / Ni), + [&](const int idx) { + const auto [t, u, v, k, j, i] = idxer(idx * Ni); + Real *var = &bnd_info(b).var(iel, t, u, v, k, j, i); + Real *buf = &bnd_info(b).buf(idx * Ni + idx_offset); + // Have to do this because of some weird issue about structure bindings + // being captured + const int kk = k; + const int jj = j; + const int ii = i; + Kokkos::parallel_for(Kokkos::ThreadVectorRange<>(team_member, Ni), + [&](int m) { + if (idxer.IsActive(kk, jj, ii + m)) + var[m] = buf[m]; + }); + }); } else if (bnd_info(b).allocated) { const Real default_val = bnd_info(b).var.sparse_default_val; - Kokkos::parallel_for(Kokkos::TeamThreadRange<>(team_member, idxer.size()), - [&](const int idx) { - const auto [t, u, v, k, j, i] = idxer(idx); - bnd_info(b).var(iel, t, u, v, k, j, i) = default_val; - }); + Kokkos::parallel_for( + Kokkos::TeamThreadRange<>(team_member, idxer.size() / Ni), + [&](const int idx) { + const auto [t, u, v, k, j, i] = idxer(idx * Ni); + Real *var = &bnd_info(b).var(iel, t, u, v, k, j, i); + const int kk = k; + const int jj = j; + const int ii = i; + Kokkos::parallel_for(Kokkos::ThreadVectorRange<>(team_member, Ni), + [&](int m) { + if (idxer.IsActive(kk, jj, ii + m)) + var[m] = default_val; + }); + }); } idx_offset += idxer.size(); } diff --git a/src/driver/driver.cpp b/src/driver/driver.cpp index 60f6940f5cf4..84371eeaeb36 100644 --- a/src/driver/driver.cpp +++ b/src/driver/driver.cpp @@ -39,7 +39,9 @@ Kokkos::Timer Driver::timer_LBandAMR; void Driver::PreExecute() { if (Globals::my_rank == 0) { - std::cout << std::endl << "Setup complete, executing driver...\n" << std::endl; + std::cout << "# Variables in use:\n" << *(pmesh->resolved_packages) << std::endl; + std::cout << std::endl; + std::cout << "Setup complete, executing driver...\n" << std::endl; } timer_main.reset(); diff --git a/src/interface/packages.hpp b/src/interface/packages.hpp index 285a815bf9ea..1ef6db59f58f 100644 --- a/src/interface/packages.hpp +++ b/src/interface/packages.hpp @@ -16,6 +16,7 @@ #include #include +#include #include "basic_types.hpp" @@ -26,15 +27,47 @@ class Packages_t { Packages_t() = default; void Add(const std::shared_ptr &package); - std::shared_ptr const &Get(const std::string &name) { + std::shared_ptr const &Get(const std::string &name) const { return packages_.at(name); } + // Retrieve a package pointer, cast to a given type T + template + T *Get(const std::string &name) const { + return static_cast(packages_.at(name).get()); + } + const Dictionary> &AllPackages() const { return packages_; } Dictionary> &AllPackages() { return packages_; } + // Returns a sub-Dictionary containing just pointers to packages of type T. + // Dictionary is a *new copy*, and members are bare pointers, not shared_ptr. + template + const Dictionary AllPackagesOfType() const { + Dictionary sub_dict; + for (auto package : packages_) { + if (T *cast_package = dynamic_cast(package.second.get())) { + sub_dict[package.first] = cast_package; + } + } + return sub_dict; + } + + // Returns a list of pointers to packages of type T. + // List contains bare pointers, not shared_ptr objects + template + const std::vector ListPackagesOfType() const { + std::vector sub_list; + for (auto package : packages_) { + if (T *cast_package = dynamic_cast(package.second.get())) { + sub_list.append(cast_package); + } + } + return sub_list; + } + private: Dictionary> packages_; }; diff --git a/src/interface/sparse_pack_base.cpp b/src/interface/sparse_pack_base.cpp index 5711f6662b58..abbe3a854300 100644 --- a/src/interface/sparse_pack_base.cpp +++ b/src/interface/sparse_pack_base.cpp @@ -289,19 +289,26 @@ template SparsePackBase SparsePackBase::Build>(MeshData *, template SparsePackBase &SparsePackCache::Get(T *pmd, const PackDescriptor &desc, const std::vector &include_block) { - //std::string ident = GetIdentifier(desc, include_block); - if (auto pack_pair = pack_map.find(desc.identifier); pack_pair != pack_map.end()) { - auto &pack = pack_pair->second; + if (pack_map.count(desc.identifier) > 0) { + auto &cache_tuple = pack_map[desc.identifier]; + auto &pack = std::get<0>(cache_tuple); auto alloc_status_in = SparsePackBase::GetAllocStatus(pmd, desc, include_block); - auto &alloc_status = pack.second; + auto &alloc_status = std::get<1>(cache_tuple); if (alloc_status.size() != alloc_status_in.size()) return BuildAndAdd(pmd, desc, include_block); for (int i = 0; i < alloc_status_in.size(); ++i) { if (alloc_status[i] != alloc_status_in[i]) return BuildAndAdd(pmd, desc, include_block); } + auto &include_status = std::get<2>(cache_tuple); + if (include_status.size() != include_block.size()) + return BuildAndAdd(pmd, desc, include_block); + for (int i = 0; i < include_block.size(); ++i) { + if (include_status[i] != include_block[i]) + return BuildAndAdd(pmd, desc, include_block); + } // Cached version is not stale, so just return a reference to it - return pack.first; + return std::get<0>(cache_tuple); } return BuildAndAdd(pmd, desc, include_block); } @@ -317,8 +324,9 @@ SparsePackBase &SparsePackCache::BuildAndAdd(T *pmd, const PackDescriptor &desc, const std::vector &include_block) { if (pack_map.count(desc.identifier) > 0) pack_map.erase(desc.identifier); pack_map[desc.identifier] = {SparsePackBase::Build(pmd, desc, include_block), - SparsePackBase::GetAllocStatus(pmd, desc, include_block)}; - return pack_map[desc.identifier].first; + SparsePackBase::GetAllocStatus(pmd, desc, include_block), + include_block}; + return std::get<0>(pack_map[desc.identifier]); } template SparsePackBase & SparsePackCache::BuildAndAdd>(MeshData *, const PackDescriptor &, @@ -326,23 +334,4 @@ SparsePackCache::BuildAndAdd>(MeshData *, const PackDescrip template SparsePackBase &SparsePackCache::BuildAndAdd>( MeshBlockData *, const PackDescriptor &, const std::vector &); -std::string SparsePackCache::GetIdentifier(const PackDescriptor &desc, - const std::vector &include_block) const { - std::string identifier(""); - for (const auto &vgroup : desc.var_groups) { - for (const auto &[vid, uid] : vgroup) { - identifier += std::to_string(uid) + "_"; - } - identifier += "|"; - } - identifier += std::to_string(desc.with_fluxes); - identifier += std::to_string(desc.coarse); - identifier += std::to_string(desc.flat); - identifier += "|"; - for (const auto b : include_block) { - identifier += std::to_string(b); - } - return identifier; -} - } // namespace parthenon diff --git a/src/interface/sparse_pack_base.hpp b/src/interface/sparse_pack_base.hpp index af13b22b108c..34a08590dd2b 100644 --- a/src/interface/sparse_pack_base.hpp +++ b/src/interface/sparse_pack_base.hpp @@ -55,6 +55,7 @@ class SparsePackBase { friend class SparsePackCache; using alloc_t = std::vector; + using include_t = std::vector; using pack_t = ParArray3D>; using bounds_t = ParArray3D; using bounds_h_t = typename ParArray3D::HostMirror; @@ -123,9 +124,8 @@ class SparsePackCache { SparsePackBase &BuildAndAdd(T *pmd, const impl::PackDescriptor &desc, const std::vector &include_block); - std::string GetIdentifier(const impl::PackDescriptor &desc, - const std::vector &include_block) const; - std::unordered_map> + std::unordered_map> pack_map; friend class SparsePackBase; @@ -164,7 +164,6 @@ struct PackDescriptor { const bool flat; const std::string identifier; - private: std::string GetIdentifier() { std::string ident(""); @@ -179,7 +178,6 @@ struct PackDescriptor { ident += std::to_string(flat); return ident; } - template std::vector BuildUids(int nvgs, const StateDescriptor *const psd, const FUNC_t &selector) { diff --git a/src/interface/state_descriptor.hpp b/src/interface/state_descriptor.hpp index ff21e628147c..5bb37ea83b60 100644 --- a/src/interface/state_descriptor.hpp +++ b/src/interface/state_descriptor.hpp @@ -108,6 +108,9 @@ class StateDescriptor { } } + // Virtual destructor for subclassing + virtual ~StateDescriptor() = default; + static std::shared_ptr CreateResolvedStateDescriptor(Packages_t &packages); @@ -429,7 +432,7 @@ class StateDescriptor { friend std::ostream &operator<<(std::ostream &os, const StateDescriptor &sd); - private: + protected: void InvertControllerMap(); Params params_; diff --git a/src/interface/variable.cpp b/src/interface/variable.cpp index fd8b534c95e5..ce62354f0631 100644 --- a/src/interface/variable.cpp +++ b/src/interface/variable.cpp @@ -1,5 +1,5 @@ //======================================================================================== -// (C) (or copyright) 2020-2022. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2020-2023. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 for Los // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC @@ -81,7 +81,8 @@ void Variable::CopyFluxesAndBdryVar(const Variable *src) { } } - if (IsSet(Metadata::FillGhost) || IsSet(Metadata::Independent)) { + if (IsSet(Metadata::FillGhost) || IsSet(Metadata::Independent) || + IsSet(Metadata::ForceRemeshComm)) { // no need to check mesh->multilevel, if false, we're just making a shallow copy of // an empty ParArrayND coarse_s = src->coarse_s; @@ -172,7 +173,8 @@ void Variable::AllocateFluxesAndCoarse(std::weak_ptr wpmb) { } // Create the boundary object - if (IsSet(Metadata::FillGhost) || IsSet(Metadata::Independent)) { + if (IsSet(Metadata::FillGhost) || IsSet(Metadata::Independent) || + IsSet(Metadata::ForceRemeshComm)) { if (wpmb.expired()) return; std::shared_ptr pmb = wpmb.lock(); @@ -205,7 +207,8 @@ std::int64_t Variable::Deallocate() { } } - if (IsSet(Metadata::FillGhost) || IsSet(Metadata::Independent)) { + if (IsSet(Metadata::FillGhost) || IsSet(Metadata::Independent) || + IsSet(Metadata::ForceRemeshComm)) { mem_size += coarse_s.size() * sizeof(T); coarse_s.Reset(); } diff --git a/src/kokkos_abstraction.hpp b/src/kokkos_abstraction.hpp index bc6331fea796..05e33603a1d3 100644 --- a/src/kokkos_abstraction.hpp +++ b/src/kokkos_abstraction.hpp @@ -1,9 +1,9 @@ //======================================================================================== // Parthenon performance portable AMR framework -// Copyright(C) 2020-2022 The Parthenon collaboration +// Copyright(C) 2020-2023 The Parthenon collaboration // Licensed under the 3-clause BSD License, see LICENSE file for details //======================================================================================== -// (C) (or copyright) 2020-2022. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2020-2023. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 // for Los Alamos National Laboratory (LANL), which is operated by Triad @@ -712,7 +712,7 @@ inline void par_for_outer(OuterLoopPatternTeams, const std::string &name, // Inner parallel loop using TeamThreadRange template -KOKKOS_INLINE_FUNCTION void +KOKKOS_FORCEINLINE_FUNCTION void par_for_inner(InnerLoopPatternTTR, team_mbr_t team_member, const int ll, const int lu, const int ml, const int mu, const int nl, const int nu, const int kl, const int ku, const int jl, const int ju, const int il, const int iu, @@ -746,7 +746,7 @@ par_for_inner(InnerLoopPatternTTR, team_mbr_t team_member, const int ll, const i }); } template -KOKKOS_INLINE_FUNCTION void +KOKKOS_FORCEINLINE_FUNCTION void par_for_inner(InnerLoopPatternTTR, team_mbr_t team_member, const int ml, const int mu, const int nl, const int nu, const int kl, const int ku, const int jl, const int ju, const int il, const int iu, const Function &function) { @@ -775,7 +775,7 @@ par_for_inner(InnerLoopPatternTTR, team_mbr_t team_member, const int ml, const i }); } template -KOKKOS_INLINE_FUNCTION void +KOKKOS_FORCEINLINE_FUNCTION void par_for_inner(InnerLoopPatternTTR, team_mbr_t team_member, const int nl, const int nu, const int kl, const int ku, const int jl, const int ju, const int il, const int iu, const Function &function) { @@ -800,10 +800,10 @@ par_for_inner(InnerLoopPatternTTR, team_mbr_t team_member, const int nl, const i }); } template -KOKKOS_INLINE_FUNCTION void par_for_inner(InnerLoopPatternTTR, team_mbr_t team_member, - const int kl, const int ku, const int jl, - const int ju, const int il, const int iu, - const Function &function) { +KOKKOS_FORCEINLINE_FUNCTION void +par_for_inner(InnerLoopPatternTTR, team_mbr_t team_member, const int kl, const int ku, + const int jl, const int ju, const int il, const int iu, + const Function &function) { const int Nk = ku - kl + 1; const int Nj = ju - jl + 1; const int Ni = iu - il + 1; @@ -820,9 +820,9 @@ KOKKOS_INLINE_FUNCTION void par_for_inner(InnerLoopPatternTTR, team_mbr_t team_m }); } template -KOKKOS_INLINE_FUNCTION void par_for_inner(InnerLoopPatternTTR, team_mbr_t team_member, - const int jl, const int ju, const int il, - const int iu, const Function &function) { +KOKKOS_FORCEINLINE_FUNCTION void +par_for_inner(InnerLoopPatternTTR, team_mbr_t team_member, const int jl, const int ju, + const int il, const int iu, const Function &function) { const int Nj = ju - jl + 1; const int Ni = iu - il + 1; const int NjNi = Nj * Ni; @@ -833,22 +833,22 @@ KOKKOS_INLINE_FUNCTION void par_for_inner(InnerLoopPatternTTR, team_mbr_t team_m }); } template -KOKKOS_INLINE_FUNCTION void par_for_inner(InnerLoopPatternTTR, team_mbr_t team_member, - const int il, const int iu, - const Function &function) { +KOKKOS_FORCEINLINE_FUNCTION void par_for_inner(InnerLoopPatternTTR, + team_mbr_t team_member, const int il, + const int iu, const Function &function) { Kokkos::parallel_for(Kokkos::TeamThreadRange(team_member, il, iu + 1), function); } // Inner parallel loop using TeamVectorRange template -KOKKOS_INLINE_FUNCTION void par_for_inner(InnerLoopPatternTVR, team_mbr_t team_member, - const int il, const int iu, - const Function &function) { +KOKKOS_FORCEINLINE_FUNCTION void par_for_inner(InnerLoopPatternTVR, + team_mbr_t team_member, const int il, + const int iu, const Function &function) { Kokkos::parallel_for(Kokkos::TeamVectorRange(team_member, il, iu + 1), function); } // Inner parallel loop using FOR SIMD template -KOKKOS_INLINE_FUNCTION void +KOKKOS_FORCEINLINE_FUNCTION void par_for_inner(InnerLoopPatternSimdFor, team_mbr_t team_member, const int nl, const int nu, const int kl, const int ku, const int jl, const int ju, const int il, const int iu, const Function &function) { @@ -864,10 +864,10 @@ par_for_inner(InnerLoopPatternSimdFor, team_mbr_t team_member, const int nl, con } } template -KOKKOS_INLINE_FUNCTION void par_for_inner(InnerLoopPatternSimdFor, team_mbr_t team_member, - const int kl, const int ku, const int jl, - const int ju, const int il, const int iu, - const Function &function) { +KOKKOS_FORCEINLINE_FUNCTION void +par_for_inner(InnerLoopPatternSimdFor, team_mbr_t team_member, const int kl, const int ku, + const int jl, const int ju, const int il, const int iu, + const Function &function) { for (int k = kl; k <= ku; ++k) { for (int j = jl; j <= ju; ++j) { #pragma omp simd @@ -878,9 +878,9 @@ KOKKOS_INLINE_FUNCTION void par_for_inner(InnerLoopPatternSimdFor, team_mbr_t te } } template -KOKKOS_INLINE_FUNCTION void par_for_inner(InnerLoopPatternSimdFor, team_mbr_t team_member, - const int jl, const int ju, const int il, - const int iu, const Function &function) { +KOKKOS_FORCEINLINE_FUNCTION void +par_for_inner(InnerLoopPatternSimdFor, team_mbr_t team_member, const int jl, const int ju, + const int il, const int iu, const Function &function) { for (int j = jl; j <= ju; ++j) { #pragma omp simd for (int i = il; i <= iu; i++) { @@ -889,9 +889,9 @@ KOKKOS_INLINE_FUNCTION void par_for_inner(InnerLoopPatternSimdFor, team_mbr_t te } } template -KOKKOS_INLINE_FUNCTION void par_for_inner(InnerLoopPatternSimdFor, team_mbr_t team_member, - const int il, const int iu, - const Function &function) { +KOKKOS_FORCEINLINE_FUNCTION void par_for_inner(InnerLoopPatternSimdFor, + team_mbr_t team_member, const int il, + const int iu, const Function &function) { #pragma omp simd for (int i = il; i <= iu; i++) { function(i); diff --git a/src/mesh/amr_loadbalance.cpp b/src/mesh/amr_loadbalance.cpp index 44a96bbdaf6b..4db6ed6b2b53 100644 --- a/src/mesh/amr_loadbalance.cpp +++ b/src/mesh/amr_loadbalance.cpp @@ -739,6 +739,7 @@ bool Mesh::RedistributeAndRefineMeshBlocks(ParameterInput *pin, ApplicationInput oldtonew[mb_idx] = ntot - 1; current_level = 0; + std::unordered_set newly_refined; for (int n = 0; n < ntot; n++) { // "on" = "old n" = "old gid" = "old global MeshBlock ID" int on = newtoold[n]; @@ -746,6 +747,10 @@ bool Mesh::RedistributeAndRefineMeshBlocks(ParameterInput *pin, ApplicationInput current_level = newloc[n].level(); if (newloc[n].level() >= loclist[on].level()) { // same or refined newcost[n] = costlist[on]; + // Keep a list of all blocks refined for below + if (newloc[n].level() > loclist[on].level()) { + newly_refined.insert(newloc[n]); + } } else { double acost = 0.0; for (int l = 0; l < nleaf; l++) @@ -951,22 +956,49 @@ bool Mesh::RedistributeAndRefineMeshBlocks(ParameterInput *pin, ApplicationInput } } prolongation_cache.CopyToDevice(); + refinement::ProlongateShared(resolved_packages.get(), prolongation_cache, cellbounds, c_cellbounds); - refinement::ProlongateInternal(resolved_packages.get(), prolongation_cache, - cellbounds, c_cellbounds); + // update the lists + loclist = std::move(newloc); + ranklist = std::move(newrank); + costlist = std::move(newcost); + + // A block newly refined and prolongated may have neighbors which were + // already refined to the new level. + // If so, the prolongated versions of shared elements will not reflect + // the true, finer versions present in the neighbor block. + // We must create any new fine buffers and fill them from these neighbors + // in order to maintain a consistent global state. + // Thus we rebuild and synchronize the mesh now, but using a unique + // neighbor precedence favoring the "old" fine blocks over "new" ones + for (auto &pmb : block_list) { + pmb->pbval->SearchAndSetNeighbors(tree, ranklist.data(), nslist.data(), + newly_refined); + } + // Make sure all old sends/receives are done before we reconfigure the mesh #ifdef MPI_PARALLEL if (send_reqs.size() != 0) PARTHENON_MPI_CHECK( MPI_Waitall(send_reqs.size(), send_reqs.data(), MPI_STATUSES_IGNORE)); #endif - Kokkos::Profiling::popRegion(); // AMR: Recv data and unpack + // Re-initialize the mesh with our temporary ownership/neighbor configurations. + // No buffers are different when we switch to the final precedence order. + Initialize(false, pin, app_in); - // update the lists - loclist = std::move(newloc); - ranklist = std::move(newrank); - costlist = std::move(newcost); + // Internal refinement relies on the fine shared values, which are only consistent after + // being updated with any previously fine versions + refinement::ProlongateInternal(resolved_packages.get(), prolongation_cache, + cellbounds, fcellc_cellbounds); + + // Rebuild just the ownership model, this time weighting the "new" fine blocks just like + // any other blocks at their level. + for (auto &pmb : block_list) { + pmb->pbval->SearchAndSetNeighbors(tree, ranklist.data(), nslist.data()); + } + + Kokkos::Profiling::popRegion(); // AMR: Recv data and unpack #ifdef ENABLE_LB_TIMERS block_cost.Realloc(nbe - nbs + 1); @@ -974,12 +1006,6 @@ bool Mesh::RedistributeAndRefineMeshBlocks(ParameterInput *pin, ApplicationInput block_cost.resize(nbe - nbs + 1); #endif - // re-initialize the MeshBlocks - for (auto &pmb : block_list) { - pmb->pbval->SearchAndSetNeighbors(tree, ranklist.data(), nslist.data()); - } - Initialize(false, pin, app_in); - ResetLoadBalanceVariables(); Kokkos::Profiling::popRegion(); // RedistributeAndRefineMeshBlocks diff --git a/src/mesh/logical_location.hpp b/src/mesh/logical_location.hpp index 21dd7a52f513..33abdebb96d8 100644 --- a/src/mesh/logical_location.hpp +++ b/src/mesh/logical_location.hpp @@ -23,12 +23,25 @@ #include #include #include +#include #include #include +#include "logical_location.hpp" #include "utils/error_checking.hpp" #include "utils/morton_number.hpp" +namespace parthenon { +class LogicalLocation; +} + +// This must be declared before an unordered_set of LogicalLocation is used +// below, but must be *implemented* after the class definition +template <> +struct std::hash { + std::size_t operator()(const parthenon::LogicalLocation &key) const noexcept; +}; + namespace parthenon { struct RootGridInfo { @@ -251,15 +264,25 @@ struct block_ownership_t { inline block_ownership_t DetermineOwnership(const LogicalLocation &main_block, const std::set &allowed_neighbors, - const RootGridInfo &rg_info = RootGridInfo()) { + const RootGridInfo &rg_info = RootGridInfo(), + const std::unordered_set &newly_refined = {}) { block_ownership_t main_owns; - auto ownership_less_than = [](const LogicalLocation &a, const LogicalLocation &b) { + auto ownership_level = [&](const LogicalLocation &a) { + // Newly-refined blocks are treated as higher-level than blocks at their + // parent level, but lower-level than previously-refined blocks at their + // current level. + if (newly_refined.count(a)) return 2 * a.level() - 1; + return 2 * a.level(); + }; + + auto ownership_less_than = [ownership_level](const LogicalLocation &a, + const LogicalLocation &b) { // Ownership is first determined by block with the highest level, then by maximum // Morton number this is reversed in precedence from the normal comparators where // Morton number takes precedence - if (a.level() == b.level()) return a.morton() < b.morton(); - return a.level() < b.level(); + if (ownership_level(a) == ownership_level(b)) return a.morton() < b.morton(); + return ownership_level(a) < ownership_level(b); }; for (int ox1 : {-1, 0, 1}) { @@ -346,14 +369,12 @@ inline auto GetIndexRangeMaskFromOwnership(TopologicalElement el, } // namespace parthenon -template <> -struct std::hash { - std::size_t operator()(const parthenon::LogicalLocation &key) const noexcept { - // TODO(LFR): Think more carefully about what the best choice for this key is, - // probably the least significant sizeof(size_t) * 8 bits of the morton number - // with 3 * (level - 21) trailing bits removed. - return key.morton().bits[0]; - } -}; +inline std::size_t std::hash::operator()( + const parthenon::LogicalLocation &key) const noexcept { + // TODO(LFR): Think more carefully about what the best choice for this key is, + // probably the least significant sizeof(size_t) * 8 bits of the morton number + // with 3 * (level - 21) trailing bits removed. + return key.morton().bits[0]; +} #endif // MESH_LOGICAL_LOCATION_HPP_ diff --git a/src/mesh/mesh.cpp b/src/mesh/mesh.cpp index 43adc9d04632..1afcee636deb 100644 --- a/src/mesh/mesh.cpp +++ b/src/mesh/mesh.cpp @@ -463,11 +463,6 @@ Mesh::Mesh(ParameterInput *pin, ApplicationInput *app_in, Packages_t &packages, #endif ResetLoadBalanceVariables(); - - // Output variables in use in this run - if (Globals::my_rank == 0) { - std::cout << "#Variables in use:\n" << *(resolved_packages) << std::endl; - } } //---------------------------------------------------------------------------------------- @@ -730,11 +725,6 @@ Mesh::Mesh(ParameterInput *pin, ApplicationInput *app_in, RestartReader &rr, block_cost.resize(block_list.size()); #endif ResetLoadBalanceVariables(); - - // Output variables in use in this run - if (Globals::my_rank == 0) { - std::cout << "#Variables in use:\n" << *(resolved_packages) << std::endl; - } } //---------------------------------------------------------------------------------------- @@ -1276,7 +1266,8 @@ void Mesh::SetupMPIComms() { auto &metadata = pair.second; // Create both boundary and flux communicators for everything with either FillGhost // or WithFluxes just to be safe - if (metadata.IsSet(Metadata::FillGhost) || metadata.IsSet(Metadata::WithFluxes)) { + if (metadata.IsSet(Metadata::FillGhost) || metadata.IsSet(Metadata::WithFluxes) || + metadata.IsSet(Metadata::ForceRemeshComm)) { MPI_Comm mpi_comm; PARTHENON_MPI_CHECK(MPI_Comm_dup(MPI_COMM_WORLD, &mpi_comm)); const auto ret = mpi_comm_map_.insert({pair.first.label(), mpi_comm}); diff --git a/src/parameter_input.cpp b/src/parameter_input.cpp index 464940eeba2c..b9fd9546f2b6 100644 --- a/src/parameter_input.cpp +++ b/src/parameter_input.cpp @@ -3,7 +3,7 @@ // Copyright(C) 2014 James M. Stone and other code contributors // Licensed under the 3-clause BSD License, see LICENSE file for details //======================================================================================== -// (C) (or copyright) 2020-2022. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2020-2023. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 for Los // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC @@ -117,7 +117,9 @@ void ParameterInput::LoadFromStream(std::istream &is) { InputBlock *pib{}; int line_num{-1}, blocks_found{0}; + // Buffer multiple lines if a continuation character is present std::string multiline_name, multiline_value, multiline_comment; + // Status in/out of continuation bool continuing = false; while (is.good()) { @@ -175,19 +177,26 @@ void ParameterInput::LoadFromStream(std::istream &is) { PARTHENON_FAIL(msg); } // parse line and add name/value/comment strings (if found) to current block name - bool continuation = ParseLine(pib, line, param_name, param_value, param_comment); - if (continuing || continuation) { + bool has_cont_char = ParseLine(pib, line, param_name, param_value, param_comment); + if (continuing || has_cont_char) { + // Append line data multiline_name += param_name; multiline_value += param_value; multiline_comment += param_comment; + // Set new state continuing = true; } - if (continuing && !continuation) { - continuing = false; + if (continuing && !has_cont_char) { + // Flush line data param_name = multiline_name; param_value = multiline_value; param_comment = multiline_comment; + multiline_name = ""; + multiline_value = ""; + multiline_comment = ""; + // Set new state + continuing = false; } if (!continuing) { diff --git a/src/parthenon_manager.cpp b/src/parthenon_manager.cpp index 7bb08a587658..2329553bfe76 100644 --- a/src/parthenon_manager.cpp +++ b/src/parthenon_manager.cpp @@ -1,9 +1,9 @@ //======================================================================================== // Parthenon performance portable AMR framework -// Copyright(C) 2020-2022 The Parthenon collaboration +// Copyright(C) 2020-2023 The Parthenon collaboration // Licensed under the 3-clause BSD License, see LICENSE file for details //======================================================================================== -// (C) (or copyright) 2020-2022. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2020-2023. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 for Los // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC @@ -40,16 +40,12 @@ namespace parthenon { -ParthenonStatus ParthenonManager::ParthenonInit(int argc, char *argv[]) { - auto manager_status = ParthenonInitEnv(argc, argv); - if (manager_status != ParthenonStatus::ok) { - return manager_status; +ParthenonStatus ParthenonManager::ParthenonInitEnv(int argc, char *argv[]) { + if (called_init_env_) { + PARTHENON_THROW("ParthenonInitEnv called twice!"); } - ParthenonInitPackagesAndMesh(); - return ParthenonStatus::ok; -} + called_init_env_ = true; -ParthenonStatus ParthenonManager::ParthenonInitEnv(int argc, char *argv[]) { // initialize MPI #ifdef MPI_PARALLEL if (MPI_SUCCESS != MPI_Init(&argc, &argv)) { @@ -146,6 +142,11 @@ ParthenonStatus ParthenonManager::ParthenonInitEnv(int argc, char *argv[]) { } void ParthenonManager::ParthenonInitPackagesAndMesh() { + if (called_init_packages_and_mesh_) { + PARTHENON_THROW("Called ParthenonInitPackagesAndMesh twice!"); + } + called_init_packages_and_mesh_ = true; + // Allow for user overrides to default Parthenon functions if (app_input->ProcessPackages != nullptr) { ProcessPackages = app_input->ProcessPackages; diff --git a/src/parthenon_manager.hpp b/src/parthenon_manager.hpp index 3ff5184e8f95..4dc31f169b1f 100644 --- a/src/parthenon_manager.hpp +++ b/src/parthenon_manager.hpp @@ -1,5 +1,5 @@ //======================================================================================== -// (C) (or copyright) 2020-2021. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2020-2023. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 for Los // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC @@ -37,7 +37,6 @@ enum class ParthenonStatus { ok, complete, error }; class ParthenonManager { public: ParthenonManager() { app_input.reset(new ApplicationInput()); } - ParthenonStatus ParthenonInit(int argc, char *argv[]); ParthenonStatus ParthenonInitEnv(int argc, char *argv[]); void ParthenonInitPackagesAndMesh(); ParthenonStatus ParthenonFinalize(); @@ -57,6 +56,8 @@ class ParthenonManager { private: ArgParse arg; + bool called_init_env_ = false; + bool called_init_packages_and_mesh_ = false; template void ReadSwarmVars_(const SP_Swarm &pswarm, const BlockList_t &block_list, diff --git a/src/utils/change_rundir.cpp b/src/utils/change_rundir.cpp index 761873d058c6..4c616d61f68c 100644 --- a/src/utils/change_rundir.cpp +++ b/src/utils/change_rundir.cpp @@ -3,7 +3,7 @@ // Copyright(C) 2014 James M. Stone and other code contributors // Licensed under the 3-clause BSD License, see LICENSE file for details //======================================================================================== -// (C) (or copyright) 2020. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2023. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 for Los // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC @@ -27,6 +27,8 @@ #include #include "defs.hpp" +#include "globals.hpp" +#include "parthenon_mpi.hpp" #include "utils/error_checking.hpp" namespace fs = FS_NAMESPACE; @@ -42,19 +44,25 @@ void ChangeRunDir(const char *pdir) { if (pdir == nullptr || *pdir == '\0') return; - if (!fs::exists(pdir)) { - if (!fs::create_directories(pdir)) { - msg << "### FATAL ERROR in function [ChangeToRunDir]" << std::endl - << "Cannot create directory '" << pdir << "'"; - PARTHENON_THROW(msg); - } + if (parthenon::Globals::my_rank == 0) { + if (!fs::exists(pdir)) { + if (!fs::create_directories(pdir)) { + msg << "### FATAL ERROR in function [ChangeToRunDir]" << std::endl + << "Cannot create directory '" << pdir << "'"; + PARTHENON_THROW(msg); + } - // in POSIX, this is 0755 permission, rwxr-xr-x - auto perms = fs::perms::owner_all | fs::perms::group_read | fs::perms::group_exec | - fs::perms::others_read | fs::perms::others_exec; - fs::permissions(pdir, perms); + // in POSIX, this is 0755 permission, rwxr-xr-x + auto perms = fs::perms::owner_all | fs::perms::group_read | fs::perms::group_exec | + fs::perms::others_read | fs::perms::others_exec; + fs::permissions(pdir, perms); + } } +#ifdef MPI_PARALLEL + MPI_Barrier(MPI_COMM_WORLD); +#endif + if (chdir(pdir)) { msg << "### FATAL ERROR in function [ChangeToRunDir]" << std::endl << "Cannot cd to directory '" << pdir << "'"; diff --git a/tst/unit/test_sparse_pack.cpp b/tst/unit/test_sparse_pack.cpp index 2e9f656222b1..48172d5723f3 100644 --- a/tst/unit/test_sparse_pack.cpp +++ b/tst/unit/test_sparse_pack.cpp @@ -197,12 +197,28 @@ TEST_CASE("Test behavior of sparse packs", "[SparsePack]") { auto desc = parthenon::MakePackDescriptor( pkg.get(), {}, {PDOpt::WithFluxes, PDOpt::Flatten}); auto pack = desc.GetPack(&mesh_data); + int lo = pack.GetLowerBoundHost(2); int hi = pack.GetUpperBoundHost(2); REQUIRE(lo == 4 - 1 + 4 + 1); // lo = index in flat pack where block 2 starts. // v3 and v5 = 4 total var components REQUIRE(hi == lo); // hi = index in flat pack where block 2 ends. Only v3 // present, so only 1 var + AND_THEN("The flattened sparse pack can access vars correctly") { + const int nblocks_and_vars = pack.GetMaxNumberOfVars(); + int nwrong = 0; + par_reduce( + loop_pattern_mdrange_tag, "test flat", DevExecSpace(), 0, + nblocks_and_vars - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + KOKKOS_LAMBDA(int v, int k, int j, int i, int <ot) { + int n = i + 1e1 * j + 1e2 * k; + if (n != (static_cast(pack(v, k, j, i)) % 1000)) { + ltot += 1; + } + }, + nwrong); + REQUIRE(nwrong == 0); + } } }