Skip to content

Commit

Permalink
chore: format
Browse files Browse the repository at this point in the history
  • Loading branch information
dssgabriel committed Nov 15, 2024
1 parent 788be5b commit c40247e
Show file tree
Hide file tree
Showing 10 changed files with 33 additions and 28 deletions.
1 change: 0 additions & 1 deletion src/KokkosComm/fwd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ using FallbackCommunicationSpace = Mpi;
#error at least one transport must be defined
#endif


template <CommunicationSpace CommSpace = DefaultCommunicationSpace>
class Req;

Expand Down
5 changes: 3 additions & 2 deletions src/KokkosComm/nccl/allgather.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,9 @@ void allgather(const ExecSpace &space, const SendView &sv, const RecvView &rv, n

throw std::runtime_error("allgather for non-contiguous views not implemented");
} else {
constexpr auto count = KokkosComm::span(sv); // all ranks recv `nranks * count`
ncclAllGather(KokkosComm::data_handle(sv), KokkosComm::data_handle(rv), count, datatype_v<SendScalar>, comm, space.cuda_stream());
constexpr auto count = KokkosComm::span(sv); // all ranks recv `nranks * count`
ncclAllGather(KokkosComm::data_handle(sv), KokkosComm::data_handle(rv), count, datatype_v<SendScalar>, comm,
space.cuda_stream());
}

Kokkos::Tools::popRegion();
Expand Down
4 changes: 2 additions & 2 deletions src/KokkosComm/nccl/handle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

#pragma once

#include
#include
#include <KokkosComm/fwd.hpp>
#include <KokkosComm/nccl/nccl.hpp>

Expand Down Expand Up @@ -45,7 +45,7 @@ class Handle<ExecSpace, Nccl> {
// This would require us initializing it manually, which is a lot more work than for initializing MPI.
//
// Commenting it out for now.
//Handle() : Handle(Kokkos::DefaultExecutionSpace{}, ) {}
// Handle() : Handle(Kokkos::DefaultExecutionSpace{}, ) {}

auto get_inner() -> ncclComm_t & { return comm_; }
auto space() const -> const execution_space & { return space_; }
Expand Down
2 changes: 1 addition & 1 deletion src/KokkosComm/nccl/impl/pack_traits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,4 @@ struct PackTraits<View> {
using packer_type = Packer::DeepCopy<View>;
};

} // namespace KokkosComm
} // namespace KokkosComm::Experimental::nccl::Impl
2 changes: 1 addition & 1 deletion src/KokkosComm/nccl/impl/types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,4 +57,4 @@ ncclDataType_t datatype() {
template <typename Scalar>
inline ncclDataType_t datatype_v = datatype<Scalar>();

}; // namespace KokkosComm::Experimental::ncll::Impl
}; // namespace KokkosComm::Experimental::nccl::Impl
10 changes: 5 additions & 5 deletions src/KokkosComm/nccl/nccl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

#include <KokkosComm/concepts.hpp>

#include <Kokkos_Core_fwd.hpp> // Kokkos::Cuda
#include <Kokkos_Core_fwd.hpp> // Kokkos::Cuda
#include <nccl.h>

#include <type_traits>
Expand All @@ -27,13 +27,13 @@ namespace KokkosComm::Experimental {

struct Nccl {
using communication_space = Nccl;
using execution_space = Kokkos::Cuda;
using datatype_type = ncclDataType_t;
using reduction_op_type = ncclRedOp_t;
using execution_space = Kokkos::Cuda;
using datatype_type = ncclDataType_t;
using reduction_op_type = ncclRedOp_t;
};

// Nccl is a KokkosComm::CommunicationSpace
template <>
struct KokkosComm::Impl::is_communication_space<Nccl> : public std::true_type {};

} // namespace KokkosComm::Experimental
} // namespace KokkosComm::Experimental
4 changes: 2 additions & 2 deletions src/KokkosComm/nccl/recv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@ void recv(const ExecSpace &space, RecvView &rv, int src, ncclComm_t comm) {
ncclRecv(rv.data(), rv.span(), datatype_v<RecvScalar>, src, comm, space.cuda_stream());
} else {
using Packer = typename Impl::PackTraits<RecvView>::packer_type;
auto args = Packer::pack(space, rv);
auto args = Packer::pack(space, rv);
// TODO: consider using a private stream pool in order to avoid synchronizing the underlying stream (which may not
// be empty and have in-flight communications we don't want to wait on)
space.fence(); // make sure allocation is complete before receiving
space.fence(); // make sure allocation is complete before receiving

ncclRecv(KokkosComm::data_handle(args.view), args.count, args.datatype, src, comm, space.cuda_stream());
Packer::unpack_into(space, rv, args.view);
Expand Down
21 changes: 13 additions & 8 deletions src/KokkosComm/nccl/reduce.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ constexpr auto reduction_op() -> ncclRedOp_t {
} else {
{
static_assert(std::is_void_v<RedOp>, "NCCL reduction operator not implemented");
return ncclMax; // unreachable
return ncclMax; // unreachable
}
}
}
Expand All @@ -50,7 +50,8 @@ template <typename Scalar>
inline constexpr ncclRedOp_t reduction_op_v = reduction_op<Scalar>();

template <KokkosExecutionSpace ExecSpace, KokkosView SendView, KokkosView RecvView>
void reduce(const ExecSpace &space, const SendView &sv, const RecvView &rv, ncclRedOp_t op, int root, int rank, ncclComm_t comm) {
void reduce(const ExecSpace &space, const SendView &sv, const RecvView &rv, ncclRedOp_t op, int root, int rank,
ncclComm_t comm) {
Kokkos::Tools::pushRegion("KokkosComm::Experimental::nccl::Impl::reduce");

using SendPacker = typename PackTraits<SendView>::packer_type;
Expand All @@ -63,22 +64,26 @@ void reduce(const ExecSpace &space, const SendView &sv, const RecvView &rv, nccl
auto recv_args = RecvPacker::allocate_packed_for(space, "reduce recv", rv);
space.fence();
using SendScalar = typename SendView::non_const_value_type;
ncclReduce(send_args.view.data(), recv_args.view.data(), send_args.count, send_args.datatype, op, root, comm, space.cuda_stream());
ncclReduce(send_args.view.data(), recv_args.view.data(), send_args.count, send_args.datatype, op, root, comm,
space.cuda_stream());
RecvPacker::unpack_into(space, rv, recv_args.view);
} else {
space.fence(); // is this fence necessary?
ncclReduce(send_args.view.data(), rv.data(), send_args.count, send_args.datatype, op, root, comm, space.cuda_stream());
space.fence(); // is this fence necessary?
ncclReduce(send_args.view.data(), rv.data(), send_args.count, send_args.datatype, op, root, comm,
space.cuda_stream());
}
} else {
using SendScalar = typename SendView::value_type;
if ((root == rank) && !KokkosComm::is_contiguous(rv)) {
auto recv_args = RecvPacker::allocate_packed_for(space, "reduce recv", rv);
space.fence();
ncclReduce(sv.data(), recv_args.view.data(), sv.span(), KokkosComm::Experimental::nccl::Impl::datatype_v<SendScalar>, op, root, comm, space.cuda_stream());
ncclReduce(sv.data(), recv_args.view.data(), sv.span(),
KokkosComm::Experimental::nccl::Impl::datatype_v<SendScalar>, op, root, comm, space.cuda_stream());
RecvPacker::unpack_into(space, rv, recv_args.view);
} else {
space.fence(); // is this fence necessary?
ncclReduce(sv.data(), rv.data(), sv.span(), KokkosComm::Experimental::nccl::Impl::datatype_v<SendScalar>, op, root, comm, space.cuda_stream());
space.fence(); // is this fence necessary?
ncclReduce(sv.data(), rv.data(), sv.span(), KokkosComm::Experimental::nccl::Impl::datatype_v<SendScalar>, op,
root, comm, space.cuda_stream());
}
}

Expand Down
8 changes: 4 additions & 4 deletions src/KokkosComm/point_to_point.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,17 +53,17 @@ Req<CommSpace> send(SendView &sv, int dest) {
namespace Experimental {

template <KokkosView SendView, KokkosExecutionSpace ExecSpace = Kokkos::Cuda, CommunicationSpace CommSpace = Nccl>
auto send(const Handle<ExecSpace, CommSpace>& h, const SendView& sv, int dest) -> Req<Nccl> {
auto send(const Handle<ExecSpace, CommSpace> &h, const SendView &sv, int dest) -> Req<Nccl> {
nccl::Impl::send(h.space(), sv, dest, h.get_inner());
return Req<Nccl>(h.space.cuda_stream());
}

template <KokkosView RecvView, KokkosExecutionSpace ExecSpace = Kokkos::Cuda, CommunicationSpace CommSpace = Nccl>
auto recv(const Handle<ExecSpace, CommSpace>& h, const RecvView& sv, int dest) -> Req<Nccl> {
auto recv(const Handle<ExecSpace, CommSpace> &h, const RecvView &sv, int dest) -> Req<Nccl> {
nccl::Impl::recv(h.space(), sv, dest, h.get_inner());
return Req<Nccl>(h.space.cuda_stream());
}

} // namespace Experimental
} // namespace Experimental

} // namespace KokkosComm
} // namespace KokkosComm
4 changes: 2 additions & 2 deletions src/KokkosComm/reduction_op.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,6 @@ struct BinaryOr {};
struct MaximumLoc {};
struct MinimumLoc {};

} // namespace ReductionOp
} // namespace ReductionOp

} // namespace KokkosComm
} // namespace KokkosComm

0 comments on commit c40247e

Please sign in to comment.