Skip to content

Commit 923dbc1

Browse files
committed
Initial CUDA 13 work. Compiles and runs on RH9 with both CUDA 12 and 13.
1 parent 7844b53 commit 923dbc1

3 files changed

Lines changed: 33 additions & 8 deletions

File tree

include/RAJA/policy/cuda/MemUtils_CUDA.hpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,10 +146,26 @@ struct DevicePinnedAllocator
146146
void* ptr;
147147
CAMP_CUDA_API_INVOKE_AND_CHECK(cudaMallocManaged, &ptr, nbytes,
148148
cudaMemAttachGlobal);
149+
#if CUDART_VERSION >= 13000
150+
cudaMemLocation devLoc{};
151+
devLoc.type = cudaMemLocationTypeDevice;
152+
devLoc.id = device;
153+
154+
CAMP_CUDA_API_INVOKE_AND_CHECK(cudaMemAdvise, ptr, nbytes,
155+
cudaMemAdviseSetPreferredLocation, devLoc);
156+
157+
cudaMemLocation cpuLoc{};
158+
cpuLoc.type = cudaMemLocationTypeHost;
159+
cpuLoc.id = 0;
160+
161+
CAMP_CUDA_API_INVOKE_AND_CHECK(cudaMemAdvise, ptr, nbytes,
162+
cudaMemAdviseSetAccessedBy, cpuLoc);
163+
#else
149164
CAMP_CUDA_API_INVOKE_AND_CHECK(cudaMemAdvise, ptr, nbytes,
150165
cudaMemAdviseSetPreferredLocation, device);
151166
CAMP_CUDA_API_INVOKE_AND_CHECK(cudaMemAdvise, ptr, nbytes,
152167
cudaMemAdviseSetAccessedBy, cudaCpuDeviceId);
168+
#endif
153169

154170
return ptr;
155171
}

test/unit/resource/tests/test-resource-AsyncTime.hpp

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,22 +32,31 @@ gpu_time_wait_for(float time, float clockrate) {
3232
int get_clockrate()
3333
{
3434
int cuda_device = 0;
35-
cudaDeviceProp deviceProp;
3635
CAMP_CUDA_API_INVOKE_AND_CHECK(cudaGetDevice, &cuda_device);
37-
CAMP_CUDA_API_INVOKE_AND_CHECK(cudaGetDeviceProperties, &deviceProp, cuda_device);
38-
if ((deviceProp.concurrentKernels == 0))
36+
37+
int concurrentKernels = 0;
38+
CAMP_CUDA_API_INVOKE_AND_CHECK(cudaDeviceGetAttribute,
39+
&concurrentKernels,
40+
cudaDevAttrConcurrentKernels,
41+
cuda_device);
42+
43+
if (concurrentKernels == 0)
3944
{
4045
printf("> GPU does not support concurrent kernel execution\n");
4146
printf(" CUDA kernel runs will be serialized\n");
4247
return -1;
4348
}
44-
//printf("> Detected Compute SM %d.%d hardware with %d multi-processors\n",
45-
// deviceProp.major, deviceProp.minor, deviceProp.multiProcessorCount);
49+
50+
int clockRate = 0;
51+
CAMP_CUDA_API_INVOKE_AND_CHECK(cudaDeviceGetAttribute,
52+
&clockRate,
53+
cudaDevAttrClockRate,
54+
cuda_device);
4655

4756
#if defined(__arm__) || defined(__aarch64__)
48-
return deviceProp.clockRate/1000;
57+
return clockRate / 1000;
4958
#else
50-
return deviceProp.clockRate;
59+
return clockRate;
5160
#endif
5261
}
5362

tpl/camp

0 commit comments

Comments
 (0)