Skip to content

Commit

Permalink
Merge branch 'develop' into lroberts36/add-multi-grid
Browse files Browse the repository at this point in the history
  • Loading branch information
lroberts36 authored Oct 13, 2023
2 parents fc0467b + 8b8ab7f commit e1a88fd
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 19 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
- [[PR 885]](https://github.com/parthenon-hpc-lab/parthenon/pull/885) Expose PackDescriptor and use uids in SparsePacks

### Fixed (not changing behavior/API/variables/...)
- [[PR 952]](https://github.com/parthenon-hpc-lab/parthenon/pull/954) Fix format string in sparse advection example
- [[PR 947]](https://github.com/parthenon-hpc-lab/parthenon/pull/947) Add missing ForceRemeshComm dependencies
- [[PR 928]](https://github.com/parthenon-hpc-lab/parthenon/pull/928) Fix boundary comms during refinement next to refined blocks
- [[PR 937]](https://github.com/parthenon-hpc-lab/parthenon/pull/937) Fix multiple line continuations
Expand All @@ -34,6 +35,7 @@
- [[PR 890]](https://github.com/parthenon-hpc-lab/parthenon/pull/890) Fix bugs in sparse communication and prolongation

### Infrastructure (changes irrelevant to downstream codes)
- [[PR 938]](https://github.com/parthenon-hpc-lab/parthenon/pull/938) Restructure buffer packing/unpacking kernel hierarchical parallelism
- [[PR 944]](https://github.com/parthenon-hpc-lab/parthenon/pull/944) Move sparse pack identifier creation to descriptor
- [[PR 904]](https://github.com/parthenon-hpc-lab/parthenon/pull/904) Move to prolongation/restriction in one for AMR and communicate non-cell centered fields
- [[PR 918]](https://github.com/parthenon-hpc-lab/parthenon/pull/918) Refactor RegionSize
Expand Down
4 changes: 2 additions & 2 deletions example/sparse_advection/parthenon_app_inputs.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// (C) (or copyright) 2021. Triad National Security, LLC. All rights reserved.
// (C) (or copyright) 2023. Triad National Security, LLC. All rights reserved.
//
// This program was produced under U.S. Government contract 89233218CNA000001 for Los
// Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC
Expand Down Expand Up @@ -176,7 +176,7 @@ void PostStepDiagnosticsInLoop(Mesh *mesh, ParameterInput *pin, const SimTime &t
}
std::printf("\n");
Real mem_avg = static_cast<Real>(mem_tot) / static_cast<Real>(blocks_tot);
std::printf("\tMem used/block in bytes [min, max, avg] = [%ld, %ld, %.14e]\n",
std::printf("\tMem used/block in bytes [min, max, avg] = [%lu, %lu, %.14e]\n",
mem_min, mem_max, mem_avg);
}
}
Expand Down
68 changes: 51 additions & 17 deletions src/bvals/comms/boundary_communication.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ TaskStatus SendBoundBufs(std::shared_ptr<MeshData<Real>> &md) {
PARTHENON_DEBUG_REQUIRE(bnd_info.size() == nbound, "Need same size for boundary info");
auto &sending_nonzero_flags = cache.sending_non_zero_flags;
auto &sending_nonzero_flags_h = cache.sending_non_zero_flags_h;

Kokkos::parallel_for(
"SendBoundBufs",
Kokkos::TeamPolicy<>(parthenon::DevExecSpace(), nbound, Kokkos::AUTO),
Expand All @@ -106,13 +107,26 @@ TaskStatus SendBoundBufs(std::shared_ptr<MeshData<Real>> &md) {
int idx_offset = 0;
for (int iel = 0; iel < bnd_info(b).ntopological_elements; ++iel) {
auto &idxer = bnd_info(b).idxer[iel];
const int Ni = idxer.template EndIdx<5>() - idxer.template StartIdx<5>() + 1;
Kokkos::parallel_reduce(
Kokkos::TeamThreadRange<>(team_member, idxer.size()),
Kokkos::TeamThreadRange<>(team_member, idxer.size() / Ni),
[&](const int idx, bool &lnon_zero) {
const auto [t, u, v, k, j, i] = idxer(idx);
const Real &val = bnd_info(b).var(iel, t, u, v, k, j, i);
bnd_info(b).buf(idx + idx_offset) = val;
lnon_zero = lnon_zero || (std::abs(val) >= threshold);
const auto [t, u, v, k, j, i] = idxer(idx * Ni);
Real *var = &bnd_info(b).var(iel, t, u, v, k, j, i);
Real *buf = &bnd_info(b).buf(idx * Ni + idx_offset);

Kokkos::parallel_for(Kokkos::ThreadVectorRange<>(team_member, Ni),
[&](int m) { buf[m] = var[m]; });

bool mnon_zero = false;
Kokkos::parallel_reduce(
Kokkos::ThreadVectorRange<>(team_member, Ni),
[&](int m, bool &llnon_zero) {
llnon_zero = llnon_zero || (std::abs(buf[m]) >= threshold);
},
Kokkos::LOr<bool, parthenon::DevMemSpace>(mnon_zero));

lnon_zero = lnon_zero || mnon_zero;
},
Kokkos::LOr<bool, parthenon::DevMemSpace>(non_zero[iel]));
idx_offset += idxer.size();
Expand Down Expand Up @@ -258,21 +272,41 @@ TaskStatus SetBounds(std::shared_ptr<MeshData<Real>> &md) {
int idx_offset = 0;
for (int iel = 0; iel < bnd_info(b).ntopological_elements; ++iel) {
auto &idxer = bnd_info(b).idxer[iel];
const int Ni = idxer.template EndIdx<5>() - idxer.template StartIdx<5>() + 1;
if (bnd_info(b).buf_allocated && bnd_info(b).allocated) {
Kokkos::parallel_for(Kokkos::TeamThreadRange<>(team_member, idxer.size()),
[&](const int idx) {
const auto [t, u, v, k, j, i] = idxer(idx);
if (idxer.IsActive(k, j, i))
bnd_info(b).var(iel, t, u, v, k, j, i) =
bnd_info(b).buf(idx + idx_offset);
});
Kokkos::parallel_for(
Kokkos::TeamThreadRange<>(team_member, idxer.size() / Ni),
[&](const int idx) {
const auto [t, u, v, k, j, i] = idxer(idx * Ni);
Real *var = &bnd_info(b).var(iel, t, u, v, k, j, i);
Real *buf = &bnd_info(b).buf(idx * Ni + idx_offset);
// Have to do this because of some weird issue about structure bindings
// being captured
const int kk = k;
const int jj = j;
const int ii = i;
Kokkos::parallel_for(Kokkos::ThreadVectorRange<>(team_member, Ni),
[&](int m) {
if (idxer.IsActive(kk, jj, ii + m))
var[m] = buf[m];
});
});
} else if (bnd_info(b).allocated) {
const Real default_val = bnd_info(b).var.sparse_default_val;
Kokkos::parallel_for(Kokkos::TeamThreadRange<>(team_member, idxer.size()),
[&](const int idx) {
const auto [t, u, v, k, j, i] = idxer(idx);
bnd_info(b).var(iel, t, u, v, k, j, i) = default_val;
});
Kokkos::parallel_for(
Kokkos::TeamThreadRange<>(team_member, idxer.size() / Ni),
[&](const int idx) {
const auto [t, u, v, k, j, i] = idxer(idx * Ni);
Real *var = &bnd_info(b).var(iel, t, u, v, k, j, i);
const int kk = k;
const int jj = j;
const int ii = i;
Kokkos::parallel_for(Kokkos::ThreadVectorRange<>(team_member, Ni),
[&](int m) {
if (idxer.IsActive(kk, jj, ii + m))
var[m] = default_val;
});
});
}
idx_offset += idxer.size();
}
Expand Down

0 comments on commit e1a88fd

Please sign in to comment.