From c3bd370cec7ad86bc00482f629a0c18cc990c6d6 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Thu, 7 Nov 2024 08:52:20 -0700 Subject: [PATCH 01/13] Missing send size init --- src/bvals/bvals.cpp | 11 +++++++++++ src/interface/swarm_comms.cpp | 15 +++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/src/bvals/bvals.cpp b/src/bvals/bvals.cpp index 37a4d4689916..c6d8a571be30 100644 --- a/src/bvals/bvals.cpp +++ b/src/bvals/bvals.cpp @@ -100,9 +100,20 @@ void BoundarySwarm::Send(BoundaryCommSubset phase) { #ifdef MPI_PARALLEL PARTHENON_REQUIRE(bd_var_.req_send[nb.bufid] == MPI_REQUEST_NULL, "Trying to create a new send before previous send completes!"); + // printf("[%i] %s:%i\n", Globals::my_rank, __FILE__, __LINE__); + printf("[%i] send buf ptr: %p size: %i (extent: %i)\n", Globals::my_rank, + bd_var_.send[nb.bufid].data(), send_size[nb.bufid], + bd_var_.send[nb.bufid].span()); + if (send_size[nb.bufid] > 1000) { + printf("[%i] send buf ptr: %p size: %i (extent: %i)\n", Globals::my_rank, + bd_var_.send[nb.bufid].data(), send_size[nb.bufid], + bd_var_.send[nb.bufid].span()); + PARTHENON_FAIL("help!"); + } PARTHENON_MPI_CHECK(MPI_Isend(bd_var_.send[nb.bufid].data(), send_size[nb.bufid], MPI_PARTHENON_REAL, nb.rank, send_tag[nb.bufid], swarm_comm, &(bd_var_.req_send[nb.bufid]))); + // printf("[%i] %s:%i\n", Globals::my_rank, __FILE__, __LINE__); #endif // MPI_PARALLEL } else { MeshBlock &target_block = *pmy_mesh_->FindMeshBlock(nb.gid); diff --git a/src/interface/swarm_comms.cpp b/src/interface/swarm_comms.cpp index 7b236b6e4e1f..3ab3a367689e 100644 --- a/src/interface/swarm_comms.cpp +++ b/src/interface/swarm_comms.cpp @@ -146,10 +146,13 @@ void Swarm::SetupPersistentMPI() { // Build device array mapping neighbor index to neighbor bufid if (pmb->neighbors.size() > 0) { + // printf("SANITIZING!\n"); ParArrayND neighbor_buffer_index("Neighbor buffer index", pmb->neighbors.size()); auto neighbor_buffer_index_h = neighbor_buffer_index.GetHostMirror(); for (int n = 0; n < pmb->neighbors.size(); n++) { neighbor_buffer_index_h(n) = pmb->neighbors[n].bufid; + // const int bufid = pmb->neighbors[n].bufid; + // vbswarm->bd_var_.send[bufid] = BufArray1D("Buffer", GetParticleDataSize()); } neighbor_buffer_index.DeepCopy(neighbor_buffer_index_h); neighbor_buffer_index_ = neighbor_buffer_index; @@ -229,6 +232,8 @@ void Swarm::LoadBuffers_() { auto num_particles_to_send_h = num_particles_to_send_.GetHostMirrorAndCopy(); auto buffer_start_h = buffer_start.GetHostMirrorAndCopy(); + Kokkos::fence(); + // Resize send buffers if too small for (int n = 0; n < pmb->neighbors.size(); n++) { num_particles_to_send_h(n) -= buffer_start_h(n); @@ -237,8 +242,13 @@ void Swarm::LoadBuffers_() { if (sendbuf.extent(0) < num_particles_to_send_h(n) * particle_size) { sendbuf = BufArray1D("Buffer", num_particles_to_send_h(n) * particle_size); vbswarm->bd_var_.send[bufid] = sendbuf; + printf("[%i] new buf %i! size: %i span: %i\n", Globals::my_rank, bufid, + num_particles_to_send_h(n) * particle_size, + vbswarm->bd_var_.send[bufid].span()); } vbswarm->send_size[bufid] = num_particles_to_send_h(n) * particle_size; + printf("send size: %i (%i %i)\n", vbswarm->send_size[bufid], + num_particles_to_send_h(n), particle_size); } auto &bdvar = vbswarm->bd_var_; @@ -269,6 +279,11 @@ void Swarm::LoadBuffers_() { // Remove particles that were loaded to send to another block from this block RemoveMarkedParticles(); + } else { + for (int n = 0; n < pmb->neighbors.size(); n++) { + const int bufid = pmb->neighbors[n].bufid; + vbswarm->send_size[bufid] = 0; + } } } From 50069a25b237a95de229f63d5ee6fa98d3dcd77c Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Thu, 7 Nov 2024 09:14:11 -0700 Subject: [PATCH 02/13] cleanup, CHANGELOG --- CHANGELOG.md | 1 + src/bvals/bvals.cpp | 11 ----------- src/interface/swarm_comms.cpp | 10 ---------- 3 files changed, 1 insertion(+), 21 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 17f8a009242d..c29941247e43 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ - [[PR 1161]](https://github.com/parthenon-hpc-lab/parthenon/pull/1161) Make flux field Metadata accessible, add Metadata::CellMemAligned flag, small perfomance upgrades ### Changed (changing behavior/API/variables/...) +- [[PR 1206]](https://github.com/parthenon-hpc-lab/parthenon/pull/1206) Leapfrog fix - [[PR1203]](https://github.com/parthenon-hpc-lab/parthenon/pull/1203) Pin Ubuntu CI image - [[PR1177]](https://github.com/parthenon-hpc-lab/parthenon/pull/1177) Make mesh-level boundary conditions usable without the "user" flag - [[PR 1187]](https://github.com/parthenon-hpc-lab/parthenon/pull/1187) Make DataCollection::Add safer and generalize MeshBlockData::Initialize diff --git a/src/bvals/bvals.cpp b/src/bvals/bvals.cpp index c6d8a571be30..37a4d4689916 100644 --- a/src/bvals/bvals.cpp +++ b/src/bvals/bvals.cpp @@ -100,20 +100,9 @@ void BoundarySwarm::Send(BoundaryCommSubset phase) { #ifdef MPI_PARALLEL PARTHENON_REQUIRE(bd_var_.req_send[nb.bufid] == MPI_REQUEST_NULL, "Trying to create a new send before previous send completes!"); - // printf("[%i] %s:%i\n", Globals::my_rank, __FILE__, __LINE__); - printf("[%i] send buf ptr: %p size: %i (extent: %i)\n", Globals::my_rank, - bd_var_.send[nb.bufid].data(), send_size[nb.bufid], - bd_var_.send[nb.bufid].span()); - if (send_size[nb.bufid] > 1000) { - printf("[%i] send buf ptr: %p size: %i (extent: %i)\n", Globals::my_rank, - bd_var_.send[nb.bufid].data(), send_size[nb.bufid], - bd_var_.send[nb.bufid].span()); - PARTHENON_FAIL("help!"); - } PARTHENON_MPI_CHECK(MPI_Isend(bd_var_.send[nb.bufid].data(), send_size[nb.bufid], MPI_PARTHENON_REAL, nb.rank, send_tag[nb.bufid], swarm_comm, &(bd_var_.req_send[nb.bufid]))); - // printf("[%i] %s:%i\n", Globals::my_rank, __FILE__, __LINE__); #endif // MPI_PARALLEL } else { MeshBlock &target_block = *pmy_mesh_->FindMeshBlock(nb.gid); diff --git a/src/interface/swarm_comms.cpp b/src/interface/swarm_comms.cpp index 3ab3a367689e..054ded34fabb 100644 --- a/src/interface/swarm_comms.cpp +++ b/src/interface/swarm_comms.cpp @@ -146,13 +146,10 @@ void Swarm::SetupPersistentMPI() { // Build device array mapping neighbor index to neighbor bufid if (pmb->neighbors.size() > 0) { - // printf("SANITIZING!\n"); ParArrayND neighbor_buffer_index("Neighbor buffer index", pmb->neighbors.size()); auto neighbor_buffer_index_h = neighbor_buffer_index.GetHostMirror(); for (int n = 0; n < pmb->neighbors.size(); n++) { neighbor_buffer_index_h(n) = pmb->neighbors[n].bufid; - // const int bufid = pmb->neighbors[n].bufid; - // vbswarm->bd_var_.send[bufid] = BufArray1D("Buffer", GetParticleDataSize()); } neighbor_buffer_index.DeepCopy(neighbor_buffer_index_h); neighbor_buffer_index_ = neighbor_buffer_index; @@ -232,8 +229,6 @@ void Swarm::LoadBuffers_() { auto num_particles_to_send_h = num_particles_to_send_.GetHostMirrorAndCopy(); auto buffer_start_h = buffer_start.GetHostMirrorAndCopy(); - Kokkos::fence(); - // Resize send buffers if too small for (int n = 0; n < pmb->neighbors.size(); n++) { num_particles_to_send_h(n) -= buffer_start_h(n); @@ -242,13 +237,8 @@ void Swarm::LoadBuffers_() { if (sendbuf.extent(0) < num_particles_to_send_h(n) * particle_size) { sendbuf = BufArray1D("Buffer", num_particles_to_send_h(n) * particle_size); vbswarm->bd_var_.send[bufid] = sendbuf; - printf("[%i] new buf %i! size: %i span: %i\n", Globals::my_rank, bufid, - num_particles_to_send_h(n) * particle_size, - vbswarm->bd_var_.send[bufid].span()); } vbswarm->send_size[bufid] = num_particles_to_send_h(n) * particle_size; - printf("send size: %i (%i %i)\n", vbswarm->send_size[bufid], - num_particles_to_send_h(n), particle_size); } auto &bdvar = vbswarm->bd_var_; From af087fdf98d822597fa9556f455e4b2e1dcdb788 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Thu, 7 Nov 2024 09:37:09 -0700 Subject: [PATCH 03/13] verbose CI --- .github/workflows/ci-short.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-short.yml b/.github/workflows/ci-short.yml index ecb4052411ee..45617edcfde7 100644 --- a/.github/workflows/ci-short.yml +++ b/.github/workflows/ci-short.yml @@ -164,7 +164,7 @@ jobs: run: | cmake --build build -t particle-leapfrog cd build - ctest -R regression_mpi_test:particle_leapfrog + ctest -R regression_mpi_test:particle_leapfrog -V - uses: actions/upload-artifact@v3 with: From d6145e1055c136d126ecb554035118ba08580708 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Thu, 7 Nov 2024 09:47:12 -0700 Subject: [PATCH 04/13] further CI debugging --- .github/workflows/ci-short.yml | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci-short.yml b/.github/workflows/ci-short.yml index 45617edcfde7..ec56441a564f 100644 --- a/.github/workflows/ci-short.yml +++ b/.github/workflows/ci-short.yml @@ -154,16 +154,18 @@ jobs: -DMACHINE_VARIANT=hip-mpi \ -DCMAKE_CXX_COMPILER=hipcc # Test example with "variables" and output - - name: advection - run: | - cmake --build build -t advection-example - cd build - ctest -R regression_mpi_test:output_hdf5 + #- name: advection + # run: | + # cmake --build build -t advection-example + # cd build + # ctest -R regression_mpi_test:output_hdf5 # Test example with swarms - name: particle-leapfrog run: | cmake --build build -t particle-leapfrog cd build + /usr/bin/mpiexec -n 2 --allow-run-as-root /__w/parthenon/parthenon/build/example/particle_leapfrog/particle-leapfrog -i /__w/parthenon/parthenon/tst/regression/test_suites/particle_leapfrog/parthinput.particle_leapfrog parthenon/job/problem_id=gold --kokkos-map-device-id-by=mpi_rank + ls ctest -R regression_mpi_test:particle_leapfrog -V - uses: actions/upload-artifact@v3 From 8c6f4d00b7458fe0cba56f1cc5543bdea606a4e2 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Thu, 7 Nov 2024 13:35:43 -0700 Subject: [PATCH 05/13] This should be working... --- src/utils/sort.hpp | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/utils/sort.hpp b/src/utils/sort.hpp index 97e9c77a88e4..a4ab8139dab3 100644 --- a/src/utils/sort.hpp +++ b/src/utils/sort.hpp @@ -61,7 +61,7 @@ void sort(ParArray1D data, KeyComparator comparator, size_t min_idx, size_t max_idx) { PARTHENON_DEBUG_REQUIRE(min_idx < data.extent(0), "Invalid minimum sort index!"); PARTHENON_DEBUG_REQUIRE(max_idx < data.extent(0), "Invalid maximum sort index!"); -#ifdef KOKKOS_ENABLE_CUDA +#if defined(KOKKOS_ENABLE_CUDA) #ifdef __clang__ PARTHENON_FAIL("sort is using thrust and there exists an incompatibility with clang, " "see https://github.com/lanl/parthenon/issues/647 for more details. We " @@ -74,6 +74,13 @@ void sort(ParArray1D data, KeyComparator comparator, size_t min_idx, thrust::device_ptr last_d = thrust::device_pointer_cast(data.data()) + max_idx + 1; thrust::sort(first_d, last_d, comparator); #endif +#elif defined(KOKKOS_ENABLE_HIP) + auto data_h = Kokkos::create_mirror_view_and_copy(HostMemSpace(), data); + std::sort(data_h.data() + min_idx, data_h.data() + max_idx + 1, comparator); + Kokkos::deep_copy(data, data_h); + // TODO(BRR) With Kokkos 4.4, switch to Kokkos::sort + // auto sub_data = Kokkos::subview(data, std::make_pair(min_idx, max_idx + 1)); + // Kokkos::sort(sub_data, comparator); #else if (std::is_same::value) { std::sort(data.data() + min_idx, data.data() + max_idx + 1, comparator); @@ -89,7 +96,7 @@ template void sort(ParArray1D data, size_t min_idx, size_t max_idx) { PARTHENON_DEBUG_REQUIRE(min_idx < data.extent(0), "Invalid minimum sort index!"); PARTHENON_DEBUG_REQUIRE(max_idx < data.extent(0), "Invalid maximum sort index!"); -#ifdef KOKKOS_ENABLE_CUDA +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) #ifdef __clang__ PARTHENON_FAIL("sort is using thrust and there exists an incompatibility with clang, " "see https://github.com/lanl/parthenon/issues/647 for more details. We " @@ -102,6 +109,12 @@ void sort(ParArray1D data, size_t min_idx, size_t max_idx) { thrust::device_ptr last_d = thrust::device_pointer_cast(data.data()) + max_idx + 1; thrust::sort(first_d, last_d); #endif + auto data_h = Kokkos::create_mirror_view_and_copy(HostMemSpace(), data); + std::sort(data_h.data() + min_idx, data_h.data() + max_idx + 1); + Kokkos::deep_copy(data, data_h); + // TODO(BRR) With Kokkos 4.4, switch to Kokkos::sort + // auto sub_data = Kokkos::subview(data, std::make_pair(min_idx, max_idx + 1)); + // Kokkos::sort(sub_data); #else if (std::is_same::value) { std::sort(data.data() + min_idx, data.data() + max_idx + 1); From 42a9356b4916d085d5a269cdb4cc439c8a87a7d5 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Thu, 7 Nov 2024 13:57:21 -0700 Subject: [PATCH 06/13] This should be fixed... but I get a segfault on GPU --- src/utils/sort.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/utils/sort.hpp b/src/utils/sort.hpp index a4ab8139dab3..3c91b1ec85ca 100644 --- a/src/utils/sort.hpp +++ b/src/utils/sort.hpp @@ -59,6 +59,8 @@ KOKKOS_INLINE_FUNCTION int upper_bound(const T &arr, Real val) { template void sort(ParArray1D data, KeyComparator comparator, size_t min_idx, size_t max_idx) { + Kokkos::fence(); + printf("%s:%i\n", __FILE__, __LINE__); PARTHENON_DEBUG_REQUIRE(min_idx < data.extent(0), "Invalid minimum sort index!"); PARTHENON_DEBUG_REQUIRE(max_idx < data.extent(0), "Invalid maximum sort index!"); #if defined(KOKKOS_ENABLE_CUDA) @@ -90,6 +92,8 @@ void sort(ParArray1D data, KeyComparator comparator, size_t min_idx, "touch by opening an issue on the Parthenon GitHub."); } #endif // KOKKOS_ENABLE_CUDA + Kokkos::fence(); + printf("%s:%i\n", __FILE__, __LINE__); } template From 82353bcb9e5b237ef4587b25dd9b25ed4e552248 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Thu, 7 Nov 2024 16:01:50 -0700 Subject: [PATCH 07/13] Is it my AMD GPU thats wrong? --- src/bvals/bvals.cpp | 2 ++ src/interface/swarm_comms.cpp | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/src/bvals/bvals.cpp b/src/bvals/bvals.cpp index 37a4d4689916..23fb52fadc85 100644 --- a/src/bvals/bvals.cpp +++ b/src/bvals/bvals.cpp @@ -91,6 +91,7 @@ void BoundarySwarm::SetupPersistentMPI() { // Send particle buffers across meshblocks. If different MPI ranks, use MPI, if same rank, // do a deep copy on device. void BoundarySwarm::Send(BoundaryCommSubset phase) { + // printf("%s:%i\n", __FILE__, __LINE__); std::shared_ptr pmb = GetBlockPointer(); // Fence to make sure buffers are loaded before sending pmb->exec_space.fence(); @@ -161,6 +162,7 @@ void BoundarySwarm::Receive(BoundaryCommSubset phase) { } } #endif + // printf("%s:%i\n", __FILE__, __LINE__); } // BoundarySwarms constructor (the first object constructed inside the MeshBlock() diff --git a/src/interface/swarm_comms.cpp b/src/interface/swarm_comms.cpp index 054ded34fabb..2128e66e8988 100644 --- a/src/interface/swarm_comms.cpp +++ b/src/interface/swarm_comms.cpp @@ -313,6 +313,8 @@ void Swarm::UnloadBuffers_() { auto &bdvar = vbswarm->bd_var_; const int nbmax = vbswarm->bd_var_.nbmax; + return; // debug + if (total_received_particles_ > 0) { auto newParticlesContext = AddEmptyParticles(total_received_particles_); @@ -372,6 +374,8 @@ void Swarm::UnloadBuffers_() { } bool Swarm::Receive(BoundaryCommSubset phase) { + return true; // Debug + auto pmb = GetBlockPointer(); const int nneighbor = pmb->neighbors.size(); From afc51fef2815cbf61d0610f0eb08549185217369 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Thu, 7 Nov 2024 16:07:24 -0700 Subject: [PATCH 08/13] Missing a return statement --- src/bvals/bvals.cpp | 2 -- src/interface/swarm_comms.cpp | 2 -- src/utils/sort.hpp | 4 ---- 3 files changed, 8 deletions(-) diff --git a/src/bvals/bvals.cpp b/src/bvals/bvals.cpp index 23fb52fadc85..37a4d4689916 100644 --- a/src/bvals/bvals.cpp +++ b/src/bvals/bvals.cpp @@ -91,7 +91,6 @@ void BoundarySwarm::SetupPersistentMPI() { // Send particle buffers across meshblocks. If different MPI ranks, use MPI, if same rank, // do a deep copy on device. void BoundarySwarm::Send(BoundaryCommSubset phase) { - // printf("%s:%i\n", __FILE__, __LINE__); std::shared_ptr pmb = GetBlockPointer(); // Fence to make sure buffers are loaded before sending pmb->exec_space.fence(); @@ -162,7 +161,6 @@ void BoundarySwarm::Receive(BoundaryCommSubset phase) { } } #endif - // printf("%s:%i\n", __FILE__, __LINE__); } // BoundarySwarms constructor (the first object constructed inside the MeshBlock() diff --git a/src/interface/swarm_comms.cpp b/src/interface/swarm_comms.cpp index 2128e66e8988..db339d264054 100644 --- a/src/interface/swarm_comms.cpp +++ b/src/interface/swarm_comms.cpp @@ -313,8 +313,6 @@ void Swarm::UnloadBuffers_() { auto &bdvar = vbswarm->bd_var_; const int nbmax = vbswarm->bd_var_.nbmax; - return; // debug - if (total_received_particles_ > 0) { auto newParticlesContext = AddEmptyParticles(total_received_particles_); diff --git a/src/utils/sort.hpp b/src/utils/sort.hpp index 3c91b1ec85ca..a4ab8139dab3 100644 --- a/src/utils/sort.hpp +++ b/src/utils/sort.hpp @@ -59,8 +59,6 @@ KOKKOS_INLINE_FUNCTION int upper_bound(const T &arr, Real val) { template void sort(ParArray1D data, KeyComparator comparator, size_t min_idx, size_t max_idx) { - Kokkos::fence(); - printf("%s:%i\n", __FILE__, __LINE__); PARTHENON_DEBUG_REQUIRE(min_idx < data.extent(0), "Invalid minimum sort index!"); PARTHENON_DEBUG_REQUIRE(max_idx < data.extent(0), "Invalid maximum sort index!"); #if defined(KOKKOS_ENABLE_CUDA) @@ -92,8 +90,6 @@ void sort(ParArray1D data, KeyComparator comparator, size_t min_idx, "touch by opening an issue on the Parthenon GitHub."); } #endif // KOKKOS_ENABLE_CUDA - Kokkos::fence(); - printf("%s:%i\n", __FILE__, __LINE__); } template From a9fb9ea0617f0c26a4b82e01b0fda237450ca59c Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Thu, 7 Nov 2024 16:12:43 -0700 Subject: [PATCH 09/13] retest --- .../particle_leapfrog_outflow/particle_leapfrog_outflow.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tst/regression/test_suites/particle_leapfrog_outflow/particle_leapfrog_outflow.py b/tst/regression/test_suites/particle_leapfrog_outflow/particle_leapfrog_outflow.py index 6b855e2a9fbc..cf806120a946 100644 --- a/tst/regression/test_suites/particle_leapfrog_outflow/particle_leapfrog_outflow.py +++ b/tst/regression/test_suites/particle_leapfrog_outflow/particle_leapfrog_outflow.py @@ -57,5 +57,7 @@ def Analyse(self, parameters): ) if ref_data.shape != final_data.shape: print("TEST FAIL: Mismatch between actual and reference data shape.") + print(ref_data) + print(final_data) return False return (np.abs(final_data - ref_data) <= 1e-10).all() From 375eeb54ab9852645b642af97a1554200029b0d8 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Thu, 7 Nov 2024 16:47:20 -0700 Subject: [PATCH 10/13] Oops missing statement --- src/interface/swarm_comms.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/interface/swarm_comms.cpp b/src/interface/swarm_comms.cpp index db339d264054..054ded34fabb 100644 --- a/src/interface/swarm_comms.cpp +++ b/src/interface/swarm_comms.cpp @@ -372,8 +372,6 @@ void Swarm::UnloadBuffers_() { } bool Swarm::Receive(BoundaryCommSubset phase) { - return true; // Debug - auto pmb = GetBlockPointer(); const int nneighbor = pmb->neighbors.size(); From c7d1abe62dbbcb18e06836102bad79cfd04ca675 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Thu, 7 Nov 2024 16:49:39 -0700 Subject: [PATCH 11/13] Revert test --- .../particle_leapfrog_outflow/particle_leapfrog_outflow.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tst/regression/test_suites/particle_leapfrog_outflow/particle_leapfrog_outflow.py b/tst/regression/test_suites/particle_leapfrog_outflow/particle_leapfrog_outflow.py index cf806120a946..6b855e2a9fbc 100644 --- a/tst/regression/test_suites/particle_leapfrog_outflow/particle_leapfrog_outflow.py +++ b/tst/regression/test_suites/particle_leapfrog_outflow/particle_leapfrog_outflow.py @@ -57,7 +57,5 @@ def Analyse(self, parameters): ) if ref_data.shape != final_data.shape: print("TEST FAIL: Mismatch between actual and reference data shape.") - print(ref_data) - print(final_data) return False return (np.abs(final_data - ref_data) <= 1e-10).all() From 8df50488a962376c1cddfb72e7efe3b141741a09 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Thu, 7 Nov 2024 16:51:25 -0700 Subject: [PATCH 12/13] revert workflow --- .github/workflows/ci-short.yml | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/.github/workflows/ci-short.yml b/.github/workflows/ci-short.yml index ec56441a564f..ecb4052411ee 100644 --- a/.github/workflows/ci-short.yml +++ b/.github/workflows/ci-short.yml @@ -154,19 +154,17 @@ jobs: -DMACHINE_VARIANT=hip-mpi \ -DCMAKE_CXX_COMPILER=hipcc # Test example with "variables" and output - #- name: advection - # run: | - # cmake --build build -t advection-example - # cd build - # ctest -R regression_mpi_test:output_hdf5 + - name: advection + run: | + cmake --build build -t advection-example + cd build + ctest -R regression_mpi_test:output_hdf5 # Test example with swarms - name: particle-leapfrog run: | cmake --build build -t particle-leapfrog cd build - /usr/bin/mpiexec -n 2 --allow-run-as-root /__w/parthenon/parthenon/build/example/particle_leapfrog/particle-leapfrog -i /__w/parthenon/parthenon/tst/regression/test_suites/particle_leapfrog/parthinput.particle_leapfrog parthenon/job/problem_id=gold --kokkos-map-device-id-by=mpi_rank - ls - ctest -R regression_mpi_test:particle_leapfrog -V + ctest -R regression_mpi_test:particle_leapfrog - uses: actions/upload-artifact@v3 with: From 66d38d0feaf9f001120e6651a7b100716bf42540 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Thu, 7 Nov 2024 19:42:32 -0700 Subject: [PATCH 13/13] Kokkos::sort --- src/utils/sort.hpp | 53 ++++++---------------------------------------- 1 file changed, 6 insertions(+), 47 deletions(-) diff --git a/src/utils/sort.hpp b/src/utils/sort.hpp index a4ab8139dab3..2076648f16c3 100644 --- a/src/utils/sort.hpp +++ b/src/utils/sort.hpp @@ -61,34 +61,13 @@ void sort(ParArray1D data, KeyComparator comparator, size_t min_idx, size_t max_idx) { PARTHENON_DEBUG_REQUIRE(min_idx < data.extent(0), "Invalid minimum sort index!"); PARTHENON_DEBUG_REQUIRE(max_idx < data.extent(0), "Invalid maximum sort index!"); -#if defined(KOKKOS_ENABLE_CUDA) -#ifdef __clang__ - PARTHENON_FAIL("sort is using thrust and there exists an incompatibility with clang, " - "see https://github.com/lanl/parthenon/issues/647 for more details. We " - "won't fix it because eventually the Parthenon sort should make use of " - "Kokkos::sort once a performant implementation is availabe. If you see " - "this message and need sort on CUDA devices with clang compiler please " - "get in touch by opening an issue on the Parthenon GitHub repo."); -#else +#if defined(KOKKOS_ENABLE_CUDA) && !defined(__clang__) thrust::device_ptr first_d = thrust::device_pointer_cast(data.data()) + min_idx; thrust::device_ptr last_d = thrust::device_pointer_cast(data.data()) + max_idx + 1; thrust::sort(first_d, last_d, comparator); -#endif -#elif defined(KOKKOS_ENABLE_HIP) - auto data_h = Kokkos::create_mirror_view_and_copy(HostMemSpace(), data); - std::sort(data_h.data() + min_idx, data_h.data() + max_idx + 1, comparator); - Kokkos::deep_copy(data, data_h); - // TODO(BRR) With Kokkos 4.4, switch to Kokkos::sort - // auto sub_data = Kokkos::subview(data, std::make_pair(min_idx, max_idx + 1)); - // Kokkos::sort(sub_data, comparator); #else - if (std::is_same::value) { - std::sort(data.data() + min_idx, data.data() + max_idx + 1, comparator); - } else { - PARTHENON_FAIL("sort is not supported outside of CPU or NVIDIA GPU. If you need sort " - "support on other devices, e.g., AMD or Intel GPUs, please get in " - "touch by opening an issue on the Parthenon GitHub."); - } + auto sub_data = Kokkos::subview(data, std::make_pair(min_idx, max_idx + 1)); + Kokkos::sort(sub_data, comparator); #endif // KOKKOS_ENABLE_CUDA } @@ -96,33 +75,13 @@ template void sort(ParArray1D data, size_t min_idx, size_t max_idx) { PARTHENON_DEBUG_REQUIRE(min_idx < data.extent(0), "Invalid minimum sort index!"); PARTHENON_DEBUG_REQUIRE(max_idx < data.extent(0), "Invalid maximum sort index!"); -#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) -#ifdef __clang__ - PARTHENON_FAIL("sort is using thrust and there exists an incompatibility with clang, " - "see https://github.com/lanl/parthenon/issues/647 for more details. We " - "won't fix it because eventually the Parthenon sort should make use of " - "Kokkos::sort once a performant implementation is availabe. If you see " - "this message and need sort on CUDA devices with clang compiler please " - "get in touch by opening an issue on the Parthenon GitHub repo."); -#else +#if defined(KOKKOS_ENABLE_CUDA) && !defined(__clang__) thrust::device_ptr first_d = thrust::device_pointer_cast(data.data()) + min_idx; thrust::device_ptr last_d = thrust::device_pointer_cast(data.data()) + max_idx + 1; thrust::sort(first_d, last_d); -#endif - auto data_h = Kokkos::create_mirror_view_and_copy(HostMemSpace(), data); - std::sort(data_h.data() + min_idx, data_h.data() + max_idx + 1); - Kokkos::deep_copy(data, data_h); - // TODO(BRR) With Kokkos 4.4, switch to Kokkos::sort - // auto sub_data = Kokkos::subview(data, std::make_pair(min_idx, max_idx + 1)); - // Kokkos::sort(sub_data); #else - if (std::is_same::value) { - std::sort(data.data() + min_idx, data.data() + max_idx + 1); - } else { - PARTHENON_FAIL("sort is not supported outside of CPU or NVIDIA GPU. If you need sort " - "support on other devices, e.g., AMD or Intel GPUs, please get in " - "touch by opening an issue on the Parthenon GitHub."); - } + auto sub_data = Kokkos::subview(data, std::make_pair(min_idx, max_idx + 1)); + Kokkos::sort(sub_data); #endif // KOKKOS_ENABLE_CUDA }