diff --git a/doc/sphinx/src/boundary_communication.rst b/doc/sphinx/src/boundary_communication.rst index 2809536e18fb..b21a490ffa3e 100644 --- a/doc/sphinx/src/boundary_communication.rst +++ b/doc/sphinx/src/boundary_communication.rst @@ -512,19 +512,19 @@ curently by default this is set to ``false``. Implementation Details ~~~~~~~~~~~~~~~~~~~~~~ -The coalesced send and receive buffers for each rank are stored in ``Mesh::pcoalesced_buffers``, -which is a ``std::shared_ptr`` to a ``CoalescedBuffers`` object. To do coalesced communication +The coalesced send and receive buffers for each rank are stored in ``Mesh::pcoalesced_comms``, +which is a ``std::shared_ptr`` to a ``CoalescedComms`` object. To do coalesced communication two pieces are required: 1) an initialization step telling all ranks what coalesced buffer messages they can expect and 2) a mechanism for packing, sending and unpacking the coalesced buffers during each boundary communication step. For the first piece, after every remesh during ``BuildBoundaryBuffers``, each non-local -variable-boundary buffer is registered with ``pcoalesced_buffers``. Once all these buffers are -registered, ``CoalescedBuffers::ResolveAndSendSendBuffers()`` is called, which determines all +variable-boundary buffer is registered with ``pcoalesced_comms``. Once all these buffers are +registered, ``CoalescedComms::ResolveAndSendSendBuffers()`` is called, which determines all the coalesced buffers that are going to be sent from a given rank to every other rank, packs information about each of the coalesced buffers into MPI messages, and sends them to the other ranks so that the receiving ranks know how to interpret the messages they receive from a given -rank. ``CoalescedBuffers::ReceiveBufferInfo()`` is then called to receive this information from +rank. ``CoalescedComms::ReceiveBufferInfo()`` is then called to receive this information from other ranks. This process basically just packs ``BndId`` objects, which contain the information necessary to identify a variable-boundary communication channel and the amount of data that is communicated across that channel, and then unpacks them on the receiving end and finds the @@ -533,7 +533,7 @@ correct variable-boundary buffers. These routines are called once per rank (rath For the second piece, variable-boundary buffers are first filled as normal in ``SendBoundBufs`` but the states of the ``CommBuffer``s are updated without actually calling the associated -``MPI_Isend``s. Then ``CoalescedBuffers::PackAndSend(MeshData *pmd, BoundaryType b_type)`` +``MPI_Isend``s. Then ``CoalescedComms::PackAndSend(MeshData *pmd, BoundaryType b_type)`` is called, which for each rank pair associated with ``pmd`` packs the variable-boundary buffers into the coalesced buffer, packs a second message containing the sparse allocation status of each variable-boundary buffer, send these two messages, and then stales the associated @@ -547,14 +547,14 @@ being in a received state. Once they are all in a received state, setting of bou prolongation, etc. can proceed normally. Some notes: -- Internally ``CoalescedBuffers`` contains maps from MPI rank and ``BoundaryType`` (e.g. regular +- Internally ``CoalescedComms`` contains maps from MPI rank and ``BoundaryType`` (e.g. regular communication, flux correction) to ``CoalescedBuffersRank`` objects for sending and receiving rank pairs. These ``CoalescedBuffersRank`` objects in turn contain maps from ``MeshData`` partition id of the sending ``MeshData`` (which also doubles as the MPI tag for the messages) - to ``CoalescedBuffersRankPartition`` objects. + to ``CoalescedBuffer`` objects. - ``CoalescedBuffersRank`` is where the post-remesh initialization routines are actually implemented. This can either correspond to the send or receive side. -- ``CoalescedBuffersRankPartition`` corresponds to each coalesced buffer and is where the +- ``CoalescedBuffer`` corresponds to each coalesced buffer and is where the the packing, sending, receiving, and unpacking details for coalesced boundary communication are implemented. This object internally owns the ``CommunicationBuffer>`` that is used for sending and receiving the coalesced data (as well as the communication buffer @@ -563,6 +563,6 @@ Some notes: ``MetaData::FillGhost`` fields in a simulation, we need to be able to interpret coalesced messages that that contain a subset of fields. Most of what is needed for this is implemented in ``GetBndIdsOnDevice``. -- Currently, there is a ``Compare`` method in ``CoalescedBuffersRankPartition`` that is just for +- Currently, there is a ``Compare`` method in ``CoalescedBuffer`` that is just for debugging. It should compare the received coalesced messages to the variable-boundary buffer messages, but using it requires some hacks in the code to send both types of buffers. \ No newline at end of file diff --git a/src/bvals/comms/boundary_communication.cpp b/src/bvals/comms/boundary_communication.cpp index f7ec58039ec5..fd46d2010c00 100644 --- a/src/bvals/comms/boundary_communication.cpp +++ b/src/bvals/comms/boundary_communication.cpp @@ -66,7 +66,7 @@ TaskStatus SendBoundBufs(std::shared_ptr> &md) { } bool can_write_combined = - pmesh->pcoalesced_buffers->IsAvailableForWrite(md.get(), bound_type); + pmesh->pcoalesced_comms->IsAvailableForWrite(md.get(), bound_type); if (other_communication_unfinished || !can_write_combined) { return TaskStatus::incomplete; } @@ -160,7 +160,7 @@ TaskStatus SendBoundBufs(std::shared_ptr> &md) { buf.SendNull(coal_comm); } if (pmesh->do_coalesced_comms) - pmesh->pcoalesced_buffers->PackAndSend(md.get(), bound_type); + pmesh->pcoalesced_comms->PackAndSend(md.get(), bound_type); return TaskStatus::complete; } @@ -220,7 +220,7 @@ TaskStatus ReceiveBoundBufs(std::shared_ptr> &md) { if (pmesh->do_coalesced_comms) { // Receive any messages that are around bool all_coalesced_received = - pmesh->pcoalesced_buffers->TryReceiveAny(md.get(), bound_type); + pmesh->pcoalesced_comms->TryReceiveAny(md.get(), bound_type); // all_received = all_received && all_coalesced_received; } const bool coal_comm = pmesh->do_coalesced_comms; @@ -272,7 +272,7 @@ TaskStatus SetBounds(std::shared_ptr> &md) { auto &cache = md->GetBvarsCache().GetSubCache(bound_type, false); // if (pmesh->do_coalesced_comms) { - // pmesh->pcoalesced_buffers->Compare(md.get(), bound_type); + // pmesh->pcoalesced_comms->Compare(md.get(), bound_type); // } auto [rebuild, nbound] = CheckReceiveBufferCacheForRebuild(md); diff --git a/src/bvals/comms/build_boundary_buffers.cpp b/src/bvals/comms/build_boundary_buffers.cpp index b30b7640049b..f8478c77bb52 100644 --- a/src/bvals/comms/build_boundary_buffers.cpp +++ b/src/bvals/comms/build_boundary_buffers.cpp @@ -136,7 +136,7 @@ void BuildBoundaryBufferSubset(std::shared_ptr> &md, // Register this buffer with the combined buffers (must happen after CommBuffer is // created) if (receiver_rank != sender_rank) - pmesh->pcoalesced_buffers->AddSendBuffer(md->partition, pmb, nb, v, BTYPE); + pmesh->pcoalesced_comms->AddSendBuffer(md->partition, pmb, nb, v, BTYPE); } // Also build the non-local receive buffers here @@ -149,7 +149,7 @@ void BuildBoundaryBufferSubset(std::shared_ptr> &md, use_sparse_buffers); // Register this buffer with the combined buffers (must happen after CommBuffer is // created) - pmesh->pcoalesced_buffers->AddRecvBuffer(pmb, nb, v, BTYPE); + pmesh->pcoalesced_comms->AddRecvBuffer(pmb, nb, v, BTYPE); } } }); diff --git a/src/bvals/comms/coalesced_buffers.cpp b/src/bvals/comms/coalesced_buffers.cpp index 9e660d1eddcc..db24fad1fbee 100644 --- a/src/bvals/comms/coalesced_buffers.cpp +++ b/src/bvals/comms/coalesced_buffers.cpp @@ -33,7 +33,7 @@ namespace parthenon { //---------------------------------------------------------------------------------------- -void CoalescedBuffersRankPartition::AllocateCoalescedBuffer() { +void CoalescedBuffer::AllocateCoalescedBuffer() { int send_rank = sender ? Globals::my_rank : other_rank; int recv_rank = sender ? other_rank : Globals::my_rank; coalesced_comm_buffer = CommBuffer(2 * partition, send_rank, recv_rank, comm_); @@ -52,8 +52,7 @@ void CoalescedBuffersRankPartition::AllocateCoalescedBuffer() { } //---------------------------------------------------------------------------------------- -ParArray1D & -CoalescedBuffersRankPartition::GetBndIdsOnDevice(const std::set &vars) { +ParArray1D &CoalescedBuffer::GetBndIdsOnDevice(const std::set &vars) { int nbnd_id{0}; const auto &var_set = vars.size() == 0 ? all_vars : vars; for (auto uid : var_set) @@ -96,7 +95,7 @@ CoalescedBuffersRankPartition::GetBndIdsOnDevice(const std::set &vars) { } //---------------------------------------------------------------------------------------- -void CoalescedBuffersRankPartition::PackAndSend(const std::set &vars) { +void CoalescedBuffer::PackAndSend(const std::set &vars) { PARTHENON_REQUIRE(coalesced_comm_buffer.IsAvailableForWrite(), "Trying to write to a buffer that is in use."); auto &bids = GetBndIdsOnDevice(vars); @@ -148,7 +147,7 @@ void CoalescedBuffersRankPartition::PackAndSend(const std::set &vars) { } //---------------------------------------------------------------------------------------- -bool CoalescedBuffersRankPartition::TryReceiveAndUnpack(const std::set &vars) { +bool CoalescedBuffer::TryReceiveAndUnpack(const std::set &vars) { if ((sparse_status_buffer.GetState() == BufferState::received) && (coalesced_comm_buffer.GetState() == BufferState::received)) return true; @@ -207,7 +206,7 @@ bool CoalescedBuffersRankPartition::TryReceiveAndUnpack(const std::set &v } //---------------------------------------------------------------------------------------- -void CoalescedBuffersRankPartition::Compare(const std::set &vars) { +void CoalescedBuffer::Compare(const std::set &vars) { PARTHENON_REQUIRE(coalesced_comm_buffer.GetState() == BufferState::received, "Combined buffer not in correct state"); PARTHENON_REQUIRE(sparse_status_buffer.GetState() == BufferState::received, @@ -250,7 +249,7 @@ void CoalescedBuffersRankPartition::Compare(const std::set &vars) { } //---------------------------------------------------------------------------------------- -void CoalescedBuffersRankPartition::AddVarBoundary(BndId &bnd_id) { +void CoalescedBuffer::AddVarBoundary(BndId &bnd_id) { auto key = GetChannelKey(bnd_id); PARTHENON_REQUIRE(pmesh->boundary_comm_map.count(key), "Buffer doesn't exist."); var_buf_t *pbuf = &(pmesh->boundary_comm_map.at(key)); @@ -260,8 +259,8 @@ void CoalescedBuffersRankPartition::AddVarBoundary(BndId &bnd_id) { all_vars.insert(bnd_id.var_id()); } -void CoalescedBuffersRankPartition::AddVarBoundary( - MeshBlock *pmb, const NeighborBlock &nb, const std::shared_ptr> &var) { +void CoalescedBuffer::AddVarBoundary(MeshBlock *pmb, const NeighborBlock &nb, + const std::shared_ptr> &var) { // Store both the variable-boundary buffer information and a pointer to the v-b buffer // itself associated with var ids BndId bnd_id = BndId::GetSend(pmb, nb, var, b_type, partition, -1); @@ -293,9 +292,9 @@ void CoalescedBuffersRank::AddSendBuffer(int partition, MeshBlock *pmb, const NeighborBlock &nb, const std::shared_ptr> &var) { if (coalesced_bufs.count(partition) == 0) - coalesced_bufs.emplace(std::make_pair( - partition, CoalescedBuffersRankPartition(true, partition, other_rank, b_type, - comm_, pmb->pmy_mesh))); + coalesced_bufs.emplace( + std::make_pair(partition, CoalescedBuffer(true, partition, other_rank, b_type, + comm_, pmb->pmy_mesh))); auto &coal_buf = coalesced_bufs.at(partition); coal_buf.AddVarBoundary(pmb, nb, var); @@ -319,9 +318,9 @@ bool CoalescedBuffersRank::TryReceiveBufInfo() { const int total_size = mess_buf[idx++]; // Create the new partition - coalesced_bufs.emplace(std::make_pair( - partition, CoalescedBuffersRankPartition(false, partition, other_rank, b_type, - comm_, pmesh))); + coalesced_bufs.emplace( + std::make_pair(partition, CoalescedBuffer(false, partition, other_rank, b_type, + comm_, pmesh))); auto &coal_buf = coalesced_bufs.at(partition); for (int b = 0; b < nbuf; ++b) { @@ -409,10 +408,9 @@ bool CoalescedBuffersRank::TryReceiveAndUnpack(MeshData *pmd, int partitio //---------------------------------------------------------------------------------------- //---------------------------------------------------------------------------------------- //---------------------------------------------------------------------------------------- -void CoalescedBuffers::AddSendBuffer(int partition, MeshBlock *pmb, - const NeighborBlock &nb, - const std::shared_ptr> &var, - BoundaryType b_type) { +void CoalescedComms::AddSendBuffer(int partition, MeshBlock *pmb, const NeighborBlock &nb, + const std::shared_ptr> &var, + BoundaryType b_type) { if (coalesced_send_buffers.count({nb.rank, b_type}) == 0) coalesced_send_buffers.emplace( std::make_pair(std::make_pair(nb.rank, b_type), @@ -422,9 +420,9 @@ void CoalescedBuffers::AddSendBuffer(int partition, MeshBlock *pmb, } //---------------------------------------------------------------------------------------- -void CoalescedBuffers::AddRecvBuffer(MeshBlock *pmb, const NeighborBlock &nb, - const std::shared_ptr>, - BoundaryType b_type) { +void CoalescedComms::AddRecvBuffer(MeshBlock *pmb, const NeighborBlock &nb, + const std::shared_ptr>, + BoundaryType b_type) { // We don't actually know enough here to register this particular buffer, but we do // know that it's existence implies that we need to receive a message from the // neighbor block rank eventually telling us the details @@ -436,13 +434,13 @@ void CoalescedBuffers::AddRecvBuffer(MeshBlock *pmb, const NeighborBlock &nb, } //---------------------------------------------------------------------------------------- -void CoalescedBuffers::ResolveAndSendSendBuffers() { +void CoalescedComms::ResolveAndSendSendBuffers() { for (auto &[id, buf] : coalesced_send_buffers) buf.ResolveSendBuffersAndSendInfo(); } //---------------------------------------------------------------------------------------- -void CoalescedBuffers::ReceiveBufferInfo() { +void CoalescedComms::ReceiveBufferInfo() { constexpr std::int64_t max_it = 1e10; std::vector received(coalesced_recv_buffers.size(), false); bool all_received; @@ -459,7 +457,7 @@ void CoalescedBuffers::ReceiveBufferInfo() { } //---------------------------------------------------------------------------------------- -bool CoalescedBuffers::IsAvailableForWrite(MeshData *pmd, BoundaryType b_type) { +bool CoalescedComms::IsAvailableForWrite(MeshData *pmd, BoundaryType b_type) { bool available{true}; for (int rank = 0; rank < Globals::nranks; ++rank) { if (coalesced_send_buffers.count({rank, b_type})) { @@ -471,7 +469,7 @@ bool CoalescedBuffers::IsAvailableForWrite(MeshData *pmd, BoundaryType b_t } //---------------------------------------------------------------------------------------- -void CoalescedBuffers::PackAndSend(MeshData *pmd, BoundaryType b_type) { +void CoalescedComms::PackAndSend(MeshData *pmd, BoundaryType b_type) { for (int rank = 0; rank < Globals::nranks; ++rank) { if (coalesced_send_buffers.count({rank, b_type})) { coalesced_send_buffers.at({rank, b_type}).PackAndSend(pmd); @@ -480,7 +478,7 @@ void CoalescedBuffers::PackAndSend(MeshData *pmd, BoundaryType b_type) { } //---------------------------------------------------------------------------------------- -void CoalescedBuffers::Compare(MeshData *pmd, BoundaryType b_type) { +void CoalescedComms::Compare(MeshData *pmd, BoundaryType b_type) { for (int rank = 0; rank < Globals::nranks; ++rank) { if (coalesced_recv_buffers.count({rank, b_type})) { auto &coal_bufs = coalesced_recv_buffers.at({rank, b_type}); @@ -492,7 +490,7 @@ void CoalescedBuffers::Compare(MeshData *pmd, BoundaryType b_type) { } //---------------------------------------------------------------------------------------- -bool CoalescedBuffers::TryReceiveAny(MeshData *pmd, BoundaryType b_type) { +bool CoalescedComms::TryReceiveAny(MeshData *pmd, BoundaryType b_type) { #ifdef MPI_PARALLEL bool all_received = true; for (int rank = 0; rank < Globals::nranks; ++rank) { diff --git a/src/bvals/comms/coalesced_buffers.hpp b/src/bvals/comms/coalesced_buffers.hpp index 098d83815711..dcdcb8cc389d 100644 --- a/src/bvals/comms/coalesced_buffers.hpp +++ b/src/bvals/comms/coalesced_buffers.hpp @@ -35,7 +35,7 @@ namespace parthenon { // Structure containing the information required for sending coalesced // messages between ranks -struct CoalescedBuffersRankPartition { +struct CoalescedBuffer { using buf_t = BufArray1D; // Rank that these buffers communicate with @@ -55,8 +55,8 @@ struct CoalescedBuffersRankPartition { CommBuffer> sparse_status_buffer; int current_size; - CoalescedBuffersRankPartition(bool sender, int partition, int other_rank, - BoundaryType b_type, mpi_comm_t comm, Mesh *pmesh) + CoalescedBuffer(bool sender, int partition, int other_rank, BoundaryType b_type, + mpi_comm_t comm, Mesh *pmesh) : sender(sender), partition(partition), other_rank(other_rank), b_type(b_type), comm_(comm), pmesh(pmesh), current_size(0) {} @@ -100,7 +100,7 @@ struct CoalescedBuffersRank { // partition id of the sender will be the mpi tag we use bool buffers_built{false}; - std::map coalesced_bufs; + std::map coalesced_bufs; static constexpr int nglobal{1}; static constexpr int nper_part{3}; @@ -129,7 +129,7 @@ struct CoalescedBuffersRank { bool IsAvailableForWrite(MeshData *pmd); }; -struct CoalescedBuffers { +struct CoalescedComms { // Combined buffers for each rank std::map, CoalescedBuffersRank> coalesced_send_buffers; std::map, CoalescedBuffersRank> coalesced_recv_buffers; @@ -138,7 +138,7 @@ struct CoalescedBuffers { Mesh *pmesh; - explicit CoalescedBuffers(Mesh *pmesh) : pmesh(pmesh) { + explicit CoalescedComms(Mesh *pmesh) : pmesh(pmesh) { // TODO(LFR): Switch to a different communicator for each BoundaryType pair for (auto b_type : {BoundaryType::any, BoundaryType::flxcor_send, BoundaryType::gmg_same, @@ -152,7 +152,7 @@ struct CoalescedBuffers { } } - ~CoalescedBuffers() { + ~CoalescedComms() { #ifdef MPI_PARALLEL for (auto &[b_type, comm] : comms_) PARTHENON_MPI_CHECK(MPI_Comm_free(&comm)); diff --git a/src/mesh/mesh.cpp b/src/mesh/mesh.cpp index b7e33432d801..552ebff11c09 100644 --- a/src/mesh/mesh.cpp +++ b/src/mesh/mesh.cpp @@ -87,7 +87,7 @@ Mesh::Mesh(ParameterInput *pin, ApplicationInput *app_in, Packages_t &packages, nref(Globals::nranks), nderef(Globals::nranks), rdisp(Globals::nranks), ddisp(Globals::nranks), bnref(Globals::nranks), bnderef(Globals::nranks), brdisp(Globals::nranks), bddisp(Globals::nranks), - pcoalesced_buffers(std::make_shared(this)), + pcoalesced_comms(std::make_shared(this)), do_coalesced_comms{ pin->GetOrAddBoolean("parthenon/mesh", "do_coalesced_comms", false)} { // Allow for user overrides to default Parthenon functions @@ -623,7 +623,7 @@ void Mesh::BuildTagMapAndBoundaryBuffers() { // Clear boundary communication buffers boundary_comm_map.clear(); - pcoalesced_buffers->clear(); + pcoalesced_comms->clear(); // Build the boundary buffers for the current mesh for (auto &partition : GetDefaultBlockPartitions()) { @@ -641,9 +641,9 @@ void Mesh::BuildTagMapAndBoundaryBuffers() { } } - pcoalesced_buffers->ResolveAndSendSendBuffers(); + pcoalesced_comms->ResolveAndSendSendBuffers(); // This operation is blocking - pcoalesced_buffers->ReceiveBufferInfo(); + pcoalesced_comms->ReceiveBufferInfo(); } void Mesh::CommunicateBoundaries(std::string md_name, diff --git a/src/mesh/mesh.hpp b/src/mesh/mesh.hpp index 93a6333cbea8..1b8cffd25e62 100644 --- a/src/mesh/mesh.hpp +++ b/src/mesh/mesh.hpp @@ -59,7 +59,7 @@ namespace parthenon { // Forward declarations class ApplicationInput; -class CoalescedBuffers; +class CoalescedComms; class MeshBlock; class MeshRefinement; class Packages_t; @@ -237,7 +237,7 @@ class Mesh { comm_buf_map_t boundary_comm_map; TagMap tag_map; - std::shared_ptr pcoalesced_buffers; + std::shared_ptr pcoalesced_comms; #ifdef MPI_PARALLEL MPI_Comm GetMPIComm(const std::string &label) const { return mpi_comm_map_.at(label); }