-
Notifications
You must be signed in to change notification settings - Fork 196
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement
cudax::async_buffer
(#3460)
* Make cudax depend on thrust * Implement `cudax::heterogeneous_iterator` * Implement `cudax::async_vector` This implements `cudax::async_vector` a contiguous container based on async memory resources. In contrast to `std::async_vector` it is templated on a set of properties and accepts any resource that satisfies these properties. That makes it suitable for heterogeneous systems, where we need to account for different execution spaces. * Add an example on how to use cudax::async_vector * Simplify vector to a buffer * Drop assignment * Add `get` and `get_unsynchonized` methods * Is fake resource_ref to avoid deep copy * Add `copy_to` method to transfer memory * Fix spelling * Drop old dialect checks * Address review comments * Rename to `make_async_buffer` * Fix comparison * Address review comments * Properly wait before accessing pointers * fixup! Address review comments * Drop superfluous `__Copy_same` * We need to synchronize after the host_launch 🤷 * Add comments * Simplify the implementation and add another comment * Address review comments * Fix concept issue with == * Update cudax/include/cuda/experimental/__container/async_buffer.cuh Co-authored-by: Eric Niebler <[email protected]> * Address review comments * Address review comments on `heterogeneous_iterator` * Ensure that we are on the right stream for thrust calls * Make doxygen happy --------- Co-authored-by: Eric Niebler <[email protected]>
- Loading branch information
1 parent
52ed31c
commit b048cb7
Showing
28 changed files
with
3,445 additions
and
25 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// Part of CUDA Experimental in CUDA C++ Core Libraries, | ||
// under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
/** | ||
* Vector addition: C = A + B. | ||
* | ||
* This sample is a very basic sample that implements element by element | ||
* vector addition. It is the same as the sample illustrating Chapter 2 | ||
* of the programming guide with some additions like error checking. | ||
*/ | ||
|
||
#include <thrust/generate.h> | ||
#include <thrust/random.h> | ||
#include <thrust/transform.h> | ||
|
||
#include <cuda/experimental/container.cuh> | ||
#include <cuda/experimental/memory_resource.cuh> | ||
#include <cuda/experimental/stream.cuh> | ||
|
||
#include <iostream> | ||
|
||
namespace cudax = cuda::experimental; | ||
|
||
constexpr int numElements = 50000; | ||
|
||
struct generator | ||
{ | ||
thrust::default_random_engine gen{}; | ||
thrust::uniform_real_distribution<float> dist{-10.0f, 10.0f}; | ||
|
||
__host__ __device__ generator(const unsigned seed) | ||
: gen{seed} | ||
{} | ||
|
||
__host__ __device__ float operator()() noexcept | ||
{ | ||
return dist(gen); | ||
} | ||
}; | ||
|
||
int main() | ||
{ | ||
// A CUDA stream on which to execute the vector addition kernel | ||
cudax::stream stream{}; | ||
|
||
// The execution policy we want to use to run all work on the same stream | ||
auto policy = thrust::cuda::par_nosync.on(stream.get()); | ||
|
||
// An environment we use to pass all necessary information to the containers | ||
cudax::env_t<cuda::mr::device_accessible> env{cudax::device_memory_resource{}, stream}; | ||
|
||
// Allocate the two inputs and output, but do not zero initialize via `cudax::uninit` | ||
cudax::async_device_buffer<float> A{env, numElements, cudax::uninit}; | ||
cudax::async_device_buffer<float> B{env, numElements, cudax::uninit}; | ||
cudax::async_device_buffer<float> C{env, numElements, cudax::uninit}; | ||
|
||
// Fill both vectors on stream using a random number generator | ||
thrust::generate(policy, A.begin(), A.end(), generator{42}); | ||
thrust::generate(policy, B.begin(), B.end(), generator{1337}); | ||
|
||
// Add the vectors together | ||
thrust::transform(policy, A.begin(), A.end(), B.begin(), C.begin(), cuda::std::plus<>{}); | ||
|
||
// Verify that the result vector is correct, by copying it to host | ||
cudax::env_t<cuda::mr::host_accessible> host_env{cudax::pinned_memory_resource{}, stream}; | ||
cudax::async_host_buffer<float> h_A{host_env, A}; | ||
cudax::async_host_buffer<float> h_B{host_env, B}; | ||
cudax::async_host_buffer<float> h_C{host_env, C}; | ||
|
||
// Do not forget to sync afterwards | ||
stream.wait(); | ||
|
||
for (int i = 0; i < numElements; ++i) | ||
{ | ||
if (cuda::std::abs(h_A.get_unsynchronized(i) + h_B.get_unsynchronized(i) - h_C.get_unsynchronized(i)) > 1e-5) | ||
{ | ||
std::cerr << "Result verification failed at element " << i << "\n"; | ||
exit(EXIT_FAILURE); | ||
} | ||
} | ||
|
||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.