Skip to content

Commit

Permalink
Move to cuda::std::iterator_traits in CUB (#3924)
Browse files Browse the repository at this point in the history
  • Loading branch information
bernhardmgruber authored Feb 24, 2025
1 parent c065aa8 commit 8486ea8
Show file tree
Hide file tree
Showing 63 changed files with 223 additions and 210 deletions.
2 changes: 1 addition & 1 deletion c2h/include/c2h/fill_striped.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ struct scalar_to_vec_t
template <int LogicalWarpThreads, int ItemsPerThread, int BlockThreads, typename IteratorT>
void fill_striped(IteratorT it)
{
using T = cub::detail::value_t<IteratorT>;
using T = cub::detail::it_value_t<IteratorT>;

constexpr int warps_in_block = BlockThreads / LogicalWarpThreads;
constexpr int items_per_warp = LogicalWarpThreads * ItemsPerThread;
Expand Down
12 changes: 5 additions & 7 deletions cub/cub/agent/agent_batch_memcpy.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -555,18 +555,16 @@ private:
// TYPE DECLARATIONS
//---------------------------------------------------------------------
/// Internal load/store type. For byte-wise memcpy, a single-byte type
using AliasT =
typename ::cuda::std::conditional<IsMemcpy,
std::iterator_traits<char*>,
std::iterator_traits<cub::detail::value_t<InputBufferIt>>>::type::value_type;
using AliasT = typename ::cuda::std::
conditional_t<IsMemcpy, ::cuda::std::type_identity<char>, lazy_trait<it_value_t, it_value_t<InputBufferIt>>>::type;

/// Types of the input and output buffers
using InputBufferT = cub::detail::value_t<InputBufferIt>;
using OutputBufferT = cub::detail::value_t<OutputBufferIt>;
using InputBufferT = it_value_t<InputBufferIt>;
using OutputBufferT = it_value_t<OutputBufferIt>;

/// Type that has to be sufficiently large to hold any of the buffers' sizes.
/// The BufferSizeIteratorT's value type must be convertible to this type.
using BufferSizeT = cub::detail::value_t<BufferSizeIteratorT>;
using BufferSizeT = it_value_t<BufferSizeIteratorT>;

/// Type used to index into the tile of buffers that this thread block is assigned to.
using BlockBufferOffsetT = uint16_t;
Expand Down
2 changes: 1 addition & 1 deletion cub/cub/agent/agent_histogram.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ struct AgentHistogram
//---------------------------------------------------------------------

/// The sample type of the input iterator
using SampleT = cub::detail::value_t<SampleIteratorT>;
using SampleT = cub::detail::it_value_t<SampleIteratorT>;

/// The pixel type of SampleT
using PixelT = typename CubVector<SampleT, NUM_CHANNELS>::Type;
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/agent/agent_merge.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ struct agent_t
using policy = Policy;

// key and value type are taken from the first input sequence (consistent with old Thrust behavior)
using key_type = typename ::cuda::std::iterator_traits<KeysIt1>::value_type;
using item_type = typename ::cuda::std::iterator_traits<ItemsIt1>::value_type;
using key_type = it_value_t<KeysIt1>;
using item_type = it_value_t<ItemsIt1>;

using keys_load_it1 = typename THRUST_NS_QUALIFIER::cuda_cub::core::detail::LoadIterator<Policy, KeysIt1>::type;
using keys_load_it2 = typename THRUST_NS_QUALIFIER::cuda_cub::core::detail::LoadIterator<Policy, KeysIt2>::type;
Expand Down
2 changes: 1 addition & 1 deletion cub/cub/agent/agent_reduce.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ struct AgentReduce
//---------------------------------------------------------------------

/// The input value type
using InputT = value_t<InputIteratorT>;
using InputT = it_value_t<InputIteratorT>;

/// Vector type of InputT for data movement
using VectorT = typename CubVector<InputT, AgentReducePolicy::VECTOR_LOAD_LENGTH>::Type;
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/agent/agent_reduce_by_key.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -171,13 +171,13 @@ struct AgentReduceByKey
//---------------------------------------------------------------------

// The input keys type
using KeyInputT = value_t<KeysInputIteratorT>;
using KeyInputT = it_value_t<KeysInputIteratorT>;

// The output keys type
using KeyOutputT = non_void_value_t<UniqueOutputIteratorT, KeyInputT>;

// The input values type
using ValueInputT = value_t<ValuesInputIteratorT>;
using ValueInputT = it_value_t<ValuesInputIteratorT>;

// Tuple type for scanning (pairs accumulated segment-value with
// segment-index)
Expand Down
2 changes: 1 addition & 1 deletion cub/cub/agent/agent_rle.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ struct AgentRle
//---------------------------------------------------------------------

/// The input value type
using T = cub::detail::value_t<InputIteratorT>;
using T = cub::detail::it_value_t<InputIteratorT>;

/// The lengths output value type
using LengthT = cub::detail::non_void_value_t<LengthsOutputIteratorT, OffsetT>;
Expand Down
2 changes: 1 addition & 1 deletion cub/cub/agent/agent_scan.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ struct AgentScan
//---------------------------------------------------------------------

// The input value type
using InputT = cub::detail::value_t<InputIteratorT>;
using InputT = cub::detail::it_value_t<InputIteratorT>;

// Tile status descriptor interface type
using ScanTileStateT = ScanTileState<AccumT>;
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/agent/agent_scan_by_key.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,8 @@ struct AgentScanByKey
// Types and constants
//---------------------------------------------------------------------

using KeyT = value_t<KeysInputIteratorT>;
using InputT = value_t<ValuesInputIteratorT>;
using KeyT = it_value_t<KeysInputIteratorT>;
using InputT = it_value_t<ValuesInputIteratorT>;
using FlagValuePairT = KeyValuePair<int, AccumT>;
using ReduceBySegmentOpT = ScanBySegmentOp<ScanOpT>;

Expand Down
4 changes: 2 additions & 2 deletions cub/cub/agent/agent_select_if.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -227,10 +227,10 @@ struct AgentSelectIf
using MemoryOrderedTileStateT = tile_state_with_memory_order<ScanTileStateT, memory_order>;

// The input value type
using InputT = value_t<InputIteratorT>;
using InputT = it_value_t<InputIteratorT>;

// The flag value type
using FlagT = value_t<FlagsInputIteratorT>;
using FlagT = it_value_t<FlagsInputIteratorT>;

// Constants
enum
Expand Down
2 changes: 1 addition & 1 deletion cub/cub/agent/agent_three_way_partition.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ struct AgentThreeWayPartition
//---------------------------------------------------------------------

// The input value type
using InputT = value_t<InputIteratorT>;
using InputT = it_value_t<InputIteratorT>;

using AccumPackHelperT = accumulator_pack_t<OffsetT>;
using AccumPackT = typename AccumPackHelperT::pack_t;
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/agent/agent_unique_by_key.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,8 @@ struct AgentUniqueByKey
//---------------------------------------------------------------------

// The input key and value type
using KeyT = cub::detail::value_t<KeyInputIteratorT>;
using ValueT = cub::detail::value_t<ValueInputIteratorT>;
using KeyT = cub::detail::it_value_t<KeyInputIteratorT>;
using ValueT = cub::detail::it_value_t<ValueInputIteratorT>;

// Tile status descriptor interface type
using ScanTileStateT = ScanTileState<OffsetT>;
Expand Down
2 changes: 1 addition & 1 deletion cub/cub/block/block_load.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -1250,7 +1250,7 @@ public:
//! @} end member group
};

template <class Policy, class It, class T = cub::detail::value_t<It>>
template <class Policy, class It, class T = cub::detail::it_value_t<It>>
struct BlockLoadType
{
using type = cub::BlockLoad<T, Policy::BLOCK_THREADS, Policy::ITEMS_PER_THREAD, Policy::LOAD_ALGORITHM>;
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/block/block_merge_sort.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ MergePath(KeyIt1 keys1, KeyIt2 keys2, OffsetT keys1_count, OffsetT keys2_count,
{
const OffsetT mid = cub::MidPoint<OffsetT>(keys1_begin, keys1_end);
// pull copies of the keys before calling binary_pred so proxy references are unwrapped
const detail::value_t<KeyIt1> key1 = keys1[mid];
const detail::value_t<KeyIt2> key2 = keys2[diag - 1 - mid];
const detail::it_value_t<KeyIt1> key1 = keys1[mid];
const detail::it_value_t<KeyIt2> key2 = keys2[diag - 1 - mid];
if (binary_pred(key2, key1))
{
keys1_end = mid;
Expand Down
2 changes: 1 addition & 1 deletion cub/cub/block/block_store.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -1227,7 +1227,7 @@ public:
};

#ifndef _CCCL_DOXYGEN_INVOKED // Do not document
template <class Policy, class It, class T = cub::detail::value_t<It>>
template <class Policy, class It, class T = cub::detail::it_value_t<It>>
struct BlockStoreType
{
using type = cub::BlockStore<T, Policy::BLOCK_THREADS, Policy::ITEMS_PER_THREAD, Policy::STORE_ALGORITHM>;
Expand Down
16 changes: 8 additions & 8 deletions cub/cub/device/device_for.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,16 @@

#include <thrust/detail/raw_reference_cast.h>
#include <thrust/distance.h>
#include <thrust/iterator/iterator_traits.h>
#include <thrust/system/cuda/detail/core/util.h>
#include <thrust/type_traits/is_contiguous_iterator.h>
#include <thrust/type_traits/unwrap_contiguous_iterator.h>

#include <cuda/std/iterator>
#include <cuda/std/type_traits>

#if __cccl_lib_mdspan
# include <cuda/std/__mdspan/extents.h>
#endif // __cccl_lib_mdspan
#include <cuda/std/type_traits>

CUB_NAMESPACE_BEGIN

Expand Down Expand Up @@ -155,7 +156,8 @@ private:
ContiguousIteratorT first, OffsetT num_items, OpT op, cudaStream_t stream, ::cuda::std::true_type /* vectorize */)
{
auto* unwrapped_first = THRUST_NS_QUALIFIER::unwrap_contiguous_iterator(first);
using wrapped_op_t = detail::for_each::op_wrapper_vectorized_t<OffsetT, OpT, detail::value_t<ContiguousIteratorT>>;
using wrapped_op_t =
detail::for_each::op_wrapper_vectorized_t<OffsetT, OpT, detail::it_value_t<ContiguousIteratorT>>;

if (is_aligned<typename wrapped_op_t::vector_t>(unwrapped_first))
{ // Vectorize loads
Expand Down Expand Up @@ -594,7 +596,7 @@ private:
using offset_t = NumItemsT;
// Disable auto-vectorization for now:
// constexpr bool use_vectorization =
// detail::for_each::can_regain_copy_freedom<detail::value_t<RandomAccessIteratorT>, OpT>::value
// detail::for_each::can_regain_copy_freedom<detail::it_value_t<RandomAccessIteratorT>, OpT>::value
// && THRUST_NS_QUALIFIER::is_contiguous_iterator<RandomAccessIteratorT>::value;
using use_vectorization_t = ::cuda::std::bool_constant<false>;
return for_each_n<RandomAccessIteratorT, offset_t, OpT>(first, num_items, op, stream, use_vectorization_t{});
Expand Down Expand Up @@ -706,10 +708,8 @@ public:
{
CUB_DETAIL_NVTX_RANGE_SCOPE("cub::DeviceFor::ForEach");

using offset_t = typename THRUST_NS_QUALIFIER::iterator_traits<RandomAccessIteratorT>::difference_type;

using offset_t = detail::it_difference_t<RandomAccessIteratorT>;
const auto num_items = static_cast<offset_t>(THRUST_NS_QUALIFIER::distance(first, last));

return ForEachNNoNVTX(first, num_items, op, stream);
}

Expand Down Expand Up @@ -835,7 +835,7 @@ public:
ForEachCopy(RandomAccessIteratorT first, RandomAccessIteratorT last, OpT op, cudaStream_t stream = {})
{
CUB_DETAIL_NVTX_RANGE_SCOPE("cub::DeviceFor::ForEachCopy");
using offset_t = typename THRUST_NS_QUALIFIER::iterator_traits<RandomAccessIteratorT>::difference_type;
using offset_t = detail::it_difference_t<RandomAccessIteratorT>;
const auto num_items = static_cast<offset_t>(THRUST_NS_QUALIFIER::distance(first, last));
return ForEachCopyNNoNVTX(first, num_items, op, stream);
}
Expand Down
12 changes: 6 additions & 6 deletions cub/cub/device/device_histogram.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ struct DeviceHistogram
cudaStream_t stream = 0)
{
/// The sample value type of the input iterator
using SampleT = cub::detail::value_t<SampleIteratorT>;
using SampleT = cub::detail::it_value_t<SampleIteratorT>;
return MultiHistogramEven<1, 1>(
d_temp_storage,
temp_storage_bytes,
Expand Down Expand Up @@ -509,7 +509,7 @@ struct DeviceHistogram
cudaStream_t stream = 0)
{
/// The sample value type of the input iterator
using SampleT = cub::detail::value_t<SampleIteratorT>;
using SampleT = cub::detail::it_value_t<SampleIteratorT>;

return MultiHistogramEven<NUM_CHANNELS, NUM_ACTIVE_CHANNELS>(
d_temp_storage,
Expand Down Expand Up @@ -700,7 +700,7 @@ struct DeviceHistogram
CUB_DETAIL_NVTX_RANGE_SCOPE_IF(d_temp_storage, "cub::DeviceHistogram::MultiHistogramEven");

/// The sample value type of the input iterator
using SampleT = cub::detail::value_t<SampleIteratorT>;
using SampleT = cub::detail::it_value_t<SampleIteratorT>;
::cuda::std::bool_constant<sizeof(SampleT) == 1> is_byte_sample;

if constexpr (sizeof(OffsetT) > sizeof(int))
Expand Down Expand Up @@ -850,7 +850,7 @@ struct DeviceHistogram
cudaStream_t stream = 0)
{
/// The sample value type of the input iterator
using SampleT = cub::detail::value_t<SampleIteratorT>;
using SampleT = cub::detail::it_value_t<SampleIteratorT>;
return MultiHistogramRange<1, 1>(
d_temp_storage,
temp_storage_bytes,
Expand Down Expand Up @@ -1145,7 +1145,7 @@ struct DeviceHistogram
cudaStream_t stream = 0)
{
/// The sample value type of the input iterator
using SampleT = cub::detail::value_t<SampleIteratorT>;
using SampleT = cub::detail::it_value_t<SampleIteratorT>;

return MultiHistogramRange<NUM_CHANNELS, NUM_ACTIVE_CHANNELS>(
d_temp_storage,
Expand Down Expand Up @@ -1326,7 +1326,7 @@ struct DeviceHistogram
CUB_DETAIL_NVTX_RANGE_SCOPE_IF(d_temp_storage, "cub::DeviceHistogram::MultiHistogramRange");

/// The sample value type of the input iterator
using SampleT = cub::detail::value_t<SampleIteratorT>;
using SampleT = cub::detail::it_value_t<SampleIteratorT>;
::cuda::std::bool_constant<sizeof(SampleT) == 1> is_byte_sample;

if constexpr (sizeof(OffsetT) > sizeof(int))
Expand Down
4 changes: 2 additions & 2 deletions cub/cub/device/device_memcpy.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -175,10 +175,10 @@ struct DeviceMemcpy
cudaStream_t stream = 0)
{
CUB_DETAIL_NVTX_RANGE_SCOPE_IF(d_temp_storage, "cub::DeviceMemcpy::Batched");
static_assert(::cuda::std::is_pointer_v<cub::detail::value_t<InputBufferIt>>,
static_assert(::cuda::std::is_pointer_v<cub::detail::it_value_t<InputBufferIt>>,
"DeviceMemcpy::Batched only supports copying of memory buffers."
"Please consider using DeviceCopy::Batched instead.");
static_assert(::cuda::std::is_pointer_v<cub::detail::value_t<OutputBufferIt>>,
static_assert(::cuda::std::is_pointer_v<cub::detail::it_value_t<OutputBufferIt>>,
"DeviceMemcpy::Batched only supports copying of memory buffers."
"Please consider using DeviceCopy::Batched instead.");

Expand Down
14 changes: 7 additions & 7 deletions cub/cub/device/device_reduce.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ struct DeviceReduce
using OffsetT = detail::choose_offset_t<NumItemsT>;

// The output value type
using OutputT = cub::detail::non_void_value_t<OutputIteratorT, cub::detail::value_t<InputIteratorT>>;
using OutputT = cub::detail::non_void_value_t<OutputIteratorT, cub::detail::it_value_t<InputIteratorT>>;

using InitT = OutputT;

Expand Down Expand Up @@ -424,7 +424,7 @@ struct DeviceReduce
using OffsetT = detail::choose_offset_t<NumItemsT>;

// The input value type
using InputT = cub::detail::value_t<InputIteratorT>;
using InputT = cub::detail::it_value_t<InputIteratorT>;

using InitT = InputT;

Expand Down Expand Up @@ -539,7 +539,7 @@ struct DeviceReduce
CUB_DETAIL_NVTX_RANGE_SCOPE_IF(d_temp_storage, "cub::DeviceReduce::ArgMin");

// The input type
using InputValueT = cub::detail::value_t<InputIteratorT>;
using InputValueT = cub::detail::it_value_t<InputIteratorT>;

// Offset type used within the kernel and to index within one partition
using PerPartitionOffsetT = int;
Expand Down Expand Up @@ -673,7 +673,7 @@ struct DeviceReduce
using OffsetT = int;

// The input type
using InputValueT = cub::detail::value_t<InputIteratorT>;
using InputValueT = cub::detail::it_value_t<InputIteratorT>;

// The output tuple type
using OutputTupleT = cub::detail::non_void_value_t<OutputIteratorT, KeyValuePair<OffsetT, InputValueT>>;
Expand Down Expand Up @@ -785,7 +785,7 @@ struct DeviceReduce
using OffsetT = detail::choose_offset_t<NumItemsT>;

// The input value type
using InputT = cub::detail::value_t<InputIteratorT>;
using InputT = cub::detail::it_value_t<InputIteratorT>;

using InitT = InputT;

Expand Down Expand Up @@ -900,7 +900,7 @@ struct DeviceReduce
CUB_DETAIL_NVTX_RANGE_SCOPE_IF(d_temp_storage, "cub::DeviceReduce::ArgMax");

// The input type
using InputValueT = cub::detail::value_t<InputIteratorT>;
using InputValueT = cub::detail::it_value_t<InputIteratorT>;

// Offset type used within the kernel and to index within one partition
using PerPartitionOffsetT = int;
Expand Down Expand Up @@ -1038,7 +1038,7 @@ struct DeviceReduce
using OffsetT = int;

// The input type
using InputValueT = cub::detail::value_t<InputIteratorT>;
using InputValueT = cub::detail::it_value_t<InputIteratorT>;

// The output tuple type
using OutputTupleT = cub::detail::non_void_value_t<OutputIteratorT, KeyValuePair<OffsetT, InputValueT>>;
Expand Down
2 changes: 1 addition & 1 deletion cub/cub/device/device_run_length_encode.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ struct DeviceRunLengthEncode

using accum_t = ::cuda::std::__accumulator_t<reduction_op, length_t, length_t>;

using key_t = cub::detail::non_void_value_t<UniqueOutputIteratorT, cub::detail::value_t<InputIteratorT>>;
using key_t = cub::detail::non_void_value_t<UniqueOutputIteratorT, cub::detail::it_value_t<InputIteratorT>>;

using policy_t = detail::rle::encode::policy_hub<accum_t, key_t>;

Expand Down
6 changes: 3 additions & 3 deletions cub/cub/device/device_scan.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ struct DeviceScan

// Unsigned integer type for global offsets
using OffsetT = detail::choose_offset_t<NumItemsT>;
using InitT = cub::detail::value_t<InputIteratorT>;
using InitT = cub::detail::it_value_t<InputIteratorT>;

// Initial value
InitT init_value{};
Expand Down Expand Up @@ -1156,7 +1156,7 @@ struct DeviceScan

// Unsigned integer type for global offsets
using OffsetT = detail::choose_offset_t<NumItemsT>;
using AccumT = ::cuda::std::__accumulator_t<ScanOpT, cub::detail::value_t<InputIteratorT>, InitValueT>;
using AccumT = ::cuda::std::__accumulator_t<ScanOpT, cub::detail::it_value_t<InputIteratorT>, InitValueT>;

return DispatchScan<
InputIteratorT,
Expand Down Expand Up @@ -1390,7 +1390,7 @@ struct DeviceScan

// Unsigned integer type for global offsets
using OffsetT = detail::choose_offset_t<NumItemsT>;
using InitT = cub::detail::value_t<ValuesInputIteratorT>;
using InitT = cub::detail::it_value_t<ValuesInputIteratorT>;

// Initial value
InitT init_value{};
Expand Down
Loading

0 comments on commit 8486ea8

Please sign in to comment.