Skip to content

Commit 036fddf

Browse files
misccoalliepiper
andauthored
Add some smoke tests that we are not completely breaking NVHPC stdpar (NVIDIA#4131)
Co-authored-by: Allison Piper <[email protected]>
1 parent e5d0a70 commit 036fddf

File tree

7 files changed

+144
-26
lines changed

7 files changed

+144
-26
lines changed

ci/build_stdpar.sh

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#!/bin/bash
2+
3+
set -euo pipefail
4+
5+
# Ensure the script is being executed in the root cccl directory:
6+
cd "$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/..";
7+
8+
# Get the current CCCL info:
9+
readonly cccl_repo="${PWD}"
10+
readonly workdir="${cccl_repo}/test/stdpar"
11+
12+
CXX_STANDARD=17
13+
14+
args=("$@")
15+
while [ "${#args[@]}" -ne 0 ]; do
16+
case "${args[0]}" in
17+
-std) CXX_STANDARD="${args[1]}"; args=("${args[@]:2}");;
18+
*) echo "Unrecognized option: ${args[0]}"; exit 1 ;;
19+
esac
20+
done
21+
22+
mkdir -p "${workdir}"
23+
cd "${workdir}"
24+
25+
# Configure and build
26+
rm -rf build
27+
mkdir build
28+
cd build
29+
# Explicitly compile for hopper since the CI machine does not have a gpu:
30+
cmake -G Ninja .. -DCMAKE_CXX_STANDARD="${CXX_STANDARD}" -DCMAKE_CXX_FLAGS="-gpu=cc90"
31+
cmake --build .

ci/inspect_changes.sh

+4
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ subprojects=(
2626
cub
2727
thrust
2828
cudax
29+
stdpar
2930
python
3031
cccl_c_parallel
3132
c2h
@@ -38,6 +39,7 @@ declare -A dependencies=(
3839
[cub]="cccl libcudacxx thrust c2h"
3940
[thrust]="cccl libcudacxx cub"
4041
[cudax]="cccl libcudacxx"
42+
[stdpar]="cccl libcudacxx cub thrust"
4143
[python]="cccl libcudacxx cub thrust cccl_c_parallel"
4244
[cccl_c_parallel]="cccl libcudacxx cub thrust"
4345
[c2h]="cccl libcudacxx cub thrust"
@@ -49,6 +51,7 @@ declare -A project_names=(
4951
[cub]="CUB"
5052
[thrust]="Thrust"
5153
[cudax]="CUDA Experimental"
54+
[stdpar]="stdpar"
5255
[python]="python"
5356
[cccl_c_parallel]="CCCL C Parallel Library"
5457
[c2h]="Catch2Helper"
@@ -59,6 +62,7 @@ declare -A project_names=(
5962
# of any subproject directory.
6063
declare -A project_dirs=(
6164
[cccl_c_parallel]="c/parallel"
65+
[stdpar]="test/stdpar"
6266
)
6367

6468
# Changes to files / directories listed here are ignored when checking if the

ci/matrix.yaml

+5
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ workflows:
5858
# cccl-infra:
5959
- {jobs: ['infra'], project: 'cccl', ctk: '12.0', cxx: ['gcc12', 'clang14'], gpu: 'rtx2080'}
6060
- {jobs: ['infra'], project: 'cccl', ctk: 'curr', cxx: ['gcc', 'clang'], gpu: 'rtx2080'}
61+
# NVHPC stdpar smoke tests
62+
- {jobs: ['build'], project: 'stdpar', std: 'all', ctk: '12.6', cxx: 'nvhpc', cpu: ['amd64', 'arm64']}
6163

6264
nightly:
6365
# Edge-case jobs
@@ -239,6 +241,9 @@ projects:
239241
job_map: { test: ['test_cpu', 'test_gpu'] }
240242
cudax:
241243
stds: [17, 20]
244+
stdpar:
245+
name: 'NVHPC stdpar'
246+
stds: [17, 20]
242247
python:
243248
name: "cuda (python)"
244249
job_map: { build: [], test: ['test_nobuild'] }

libcudacxx/include/cuda/std/__cccl/builtin.h

+26-18
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
# pragma system_header
2323
#endif // no system header
2424

25+
#include <cuda/std/__cccl/extended_data_types.h>
26+
2527
//! This file consolidates all compiler builtin detection for CCCL.
2628
//!
2729
//! To work around older compilers not supporting `__has_builtin` we use `_CCCL_CHECK_BUILTIN` that detects more
@@ -427,14 +429,16 @@
427429
# define _CCCL_BUILTIN_HUGE_VALL() static_cast<long double>(__builtin_huge_val())
428430
#endif // _CCCL_CHECK_BUILTIN(builtin_huge_vall)
429431

430-
#if _CCCL_CHECK_BUILTIN(builtin_huge_valf128) || _CCCL_COMPILER(GCC, >=, 7)
431-
# define _CCCL_BUILTIN_HUGE_VALF128() __builtin_huge_valf128()
432-
#endif // _CCCL_CHECK_BUILTIN(builtin_huge_valf128) || _CCCL_COMPILER(GCC, >=, 7)
432+
#if _CCCL_HAS_FLOAT128()
433+
# if _CCCL_CHECK_BUILTIN(builtin_huge_valf128) || _CCCL_COMPILER(GCC, >=, 7)
434+
# define _CCCL_BUILTIN_HUGE_VALF128() __builtin_huge_valf128()
435+
# endif // _CCCL_CHECK_BUILTIN(builtin_huge_valf128) || _CCCL_COMPILER(GCC, >=, 7)
433436

434437
// nvcc does not implement __builtin_huge_valf128
435-
#if _CCCL_CUDA_COMPILER(NVCC)
436-
# undef _CCCL_BUILTIN_HUGE_VALF128
437-
#endif // _CCCL_CUDA_COMPILER(NVCC)
438+
# if _CCCL_CUDA_COMPILER(NVCC)
439+
# undef _CCCL_BUILTIN_HUGE_VALF128
440+
# endif // _CCCL_CUDA_COMPILER(NVCC)
441+
#endif // _CCCL_HAS_FLOAT128()
438442

439443
#if _CCCL_CHECK_BUILTIN(builtin_hypot) || _CCCL_COMPILER(GCC)
440444
# define _CCCL_BUILTIN_HYPOTF(...) __builtin_hypotf(__VA_ARGS__)
@@ -575,14 +579,16 @@
575579
# define _CCCL_BUILTIN_NANL(...) static_cast<long double>(__builtin_nan(__VA_ARGS__))
576580
#endif // _CCCL_CHECK_BUILTIN(builtin_nanl)
577581

578-
#if _CCCL_CHECK_BUILTIN(builtin_nanf128) || _CCCL_COMPILER(GCC, >=, 7)
579-
# define _CCCL_BUILTIN_NANF128(...) __builtin_nanf128(__VA_ARGS__)
580-
#endif // _CCCL_CHECK_BUILTIN(builtin_nanf128) || _CCCL_COMPILER(GCC, >=, 7)
582+
#if _CCCL_HAS_FLOAT128()
583+
# if _CCCL_CHECK_BUILTIN(builtin_nanf128) || _CCCL_COMPILER(GCC, >=, 7)
584+
# define _CCCL_BUILTIN_NANF128(...) __builtin_nanf128(__VA_ARGS__)
585+
# endif // _CCCL_CHECK_BUILTIN(builtin_nanf128) || _CCCL_COMPILER(GCC, >=, 7)
581586

582587
// nvcc does not implement __builtin_nanf128
583-
#if _CCCL_CUDA_COMPILER(NVCC)
584-
# undef _CCCL_BUILTIN_NANF128
585-
#endif // _CCCL_CUDA_COMPILER(NVCC)
588+
# if _CCCL_CUDA_COMPILER(NVCC)
589+
# undef _CCCL_BUILTIN_NANF128
590+
# endif // _CCCL_CUDA_COMPILER(NVCC)
591+
#endif // _CCCL_HAS_FLOAT128()
586592

587593
#if _CCCL_CHECK_BUILTIN(builtin_nansf) || _CCCL_COMPILER(MSVC) || _CCCL_COMPILER(GCC, <, 10)
588594
# define _CCCL_BUILTIN_NANSF(...) __builtin_nansf(__VA_ARGS__)
@@ -598,14 +604,16 @@
598604
# define _CCCL_BUILTIN_NANSL(...) static_cast<long double>(__builtin_nans(__VA_ARGS__))
599605
#endif // _CCCL_CHECK_BUILTIN(builtin_nansl)
600606

601-
#if _CCCL_CHECK_BUILTIN(builtin_nansf128) || _CCCL_COMPILER(GCC, >=, 7)
602-
# define _CCCL_BUILTIN_NANSF128(...) __builtin_nansf128(__VA_ARGS__)
603-
#endif // _CCCL_CHECK_BUILTIN(builtin_nansf128) || _CCCL_COMPILER(GCC, >=, 7)
607+
#if _CCCL_HAS_FLOAT128()
608+
# if _CCCL_CHECK_BUILTIN(builtin_nansf128) || _CCCL_COMPILER(GCC, >=, 7)
609+
# define _CCCL_BUILTIN_NANSF128(...) __builtin_nansf128(__VA_ARGS__)
610+
# endif // _CCCL_CHECK_BUILTIN(builtin_nansf128) || _CCCL_COMPILER(GCC, >=, 7)
604611

605612
// nvcc does not implement __builtin_nansf128
606-
#if _CCCL_CUDA_COMPILER(NVCC)
607-
# undef _CCCL_BUILTIN_NANSF128
608-
#endif // _CCCL_CUDA_COMPILER(NVCC)
613+
# if _CCCL_CUDA_COMPILER(NVCC)
614+
# undef _CCCL_BUILTIN_NANSF128
615+
# endif // _CCCL_CUDA_COMPILER(NVCC)
616+
#endif // _CCCL_HAS_FLOAT128()
609617

610618
#if _CCCL_CHECK_BUILTIN(builtin_nearbyint) || _CCCL_COMPILER(GCC)
611619
# define _CCCL_BUILTIN_NEARBYINTF(...) __builtin_nearbyintf(__VA_ARGS__)

libcudacxx/include/cuda/std/__cccl/extended_data_types.h

+10-8
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
# pragma system_header
2323
#endif // no system header
2424

25+
#include <cuda/std/__cccl/architecture.h>
2526
#include <cuda/std/__cccl/diagnostic.h>
2627
#include <cuda/std/__cccl/os.h>
2728
#include <cuda/std/__cccl/preprocessor.h>
@@ -81,14 +82,15 @@
8182
# define _CCCL_HAS_NVFP4() 1
8283
#endif
8384

84-
// NVC++ supports float128 only in host code
85-
#if !defined(CCCL_DISABLE_FLOAT128_SUPPORT) && _CCCL_OS(LINUX) \
86-
&& ((_CCCL_COMPILER(NVRTC) && defined(__CUDACC_RTC_FLOAT128__)) /*NVRTC*/ \
87-
|| defined(__SIZEOF_FLOAT128__) || defined(__FLOAT128__)) /*HOST COMPILERS*/ \
88-
&& (!defined(__CUDA_ARCH__) || (defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 1000)) /*DEVICE CODE*/
89-
# undef _CCCL_HAS_FLOAT128
90-
# define _CCCL_HAS_FLOAT128() 1
91-
#endif
85+
#if !defined(CCCL_DISABLE_FLOAT128_SUPPORT) && _CCCL_OS(LINUX) && !_CCCL_ARCH(ARM64)
86+
# if (defined(__CUDACC_RTC_FLOAT128__) || defined(__SIZEOF_FLOAT128__) || defined(__FLOAT128__)) /*HOST COMPILERS*/
87+
# if _CCCL_CUDA_COMPILER(NVHPC) \
88+
|| ((_CCCL_CUDA_COMPILER(NVCC) || _CCCL_CUDA_COMPILER(CLANG)) && __CUDA_ARCH__ >= 1000) /*DEVICE CODE*/
89+
# undef _CCCL_HAS_FLOAT128
90+
# define _CCCL_HAS_FLOAT128() 1
91+
# endif // CUDA compiler
92+
# endif // Host compiler support
93+
#endif // !CCCL_DISABLE_FLOAT128_SUPPORT && _CCCL_OS(LINUX)
9294

9395
// gcc does not allow to use 'operator""q' when __STRICT_ANSI__ is defined, it may be allowed by
9496
// -fext-numeric-literals, but we have no way to detect it. However, from gcc 13, we can use 'operator""f128' and cast

test/stdpar/CMakeLists.txt

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
cmake_minimum_required(VERSION 3.21)
2+
3+
# NOTE: this is build outside of the libcu++ test harness
4+
project(CCCL_STDPAR_TESTS LANGUAGES CXX)
5+
6+
if (NOT CMAKE_CXX_COMPILER_ID STREQUAL NVHPC)
7+
message(FATAL_ERROR "The stdpar tests require nvc++ for CMAKE_CXX_COMPILER.")
8+
endif()
9+
10+
# Enable testing for the project
11+
enable_testing()
12+
13+
find_package(CCCL CONFIG REQUIRED
14+
NO_DEFAULT_PATH # Only check the explicit HINTS below:
15+
HINTS "${CMAKE_CURRENT_LIST_DIR}/../../lib/cmake/cccl/"
16+
)
17+
18+
file(GLOB test_files
19+
LIST_DIRECTORIES false
20+
RELATIVE "${CMAKE_CURRENT_LIST_DIR}"
21+
CONFIGURE_DEPENDS
22+
"tests/*.cpp"
23+
)
24+
25+
function(cccl_add_stdpar_test test_file)
26+
get_filename_component(test_name ${test_file} NAME_WE)
27+
28+
add_executable(stdpar_test_${test_name} ${test_file})
29+
target_link_libraries(stdpar_test_${test_name} PUBLIC CCCL::CCCL)
30+
31+
# Ensure that we are testing with GPU support
32+
target_compile_options(stdpar_test_${test_name} PUBLIC -stdpar=gpu)
33+
target_link_options(stdpar_test_${test_name} PUBLIC -stdpar=gpu)
34+
35+
# Ensure that we are indeed testing the same CCCL version
36+
target_compile_definitions(stdpar_test_${test_name} PUBLIC CMAKE_CCCL_VERSION_MAJOR=${CCCL_VERSION_MAJOR})
37+
target_compile_definitions(stdpar_test_${test_name} PUBLIC CMAKE_CCCL_VERSION_MINOR=${CCCL_VERSION_MINOR})
38+
target_compile_definitions(stdpar_test_${test_name} PUBLIC CMAKE_CCCL_VERSION_PATCH=${CCCL_VERSION_PATCH})
39+
40+
# Register with ctest
41+
add_test(NAME stdpar_test_${test_name} COMMAND stdpar_test_${test_name})
42+
endfunction()
43+
44+
foreach(test IN LISTS test_files)
45+
cccl_add_stdpar_test(${test})
46+
endforeach()

test/stdpar/tests/reduce.cpp

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#include <algorithm>
2+
#include <cassert>
3+
#include <execution>
4+
#include <numeric>
5+
#include <vector>
6+
7+
// Ensure that we are indeed using the correct CCCL version
8+
static_assert(CCCL_MAJOR_VERSION == CMAKE_CCCL_VERSION_MAJOR);
9+
static_assert(CCCL_MINOR_VERSION == CMAKE_CCCL_VERSION_MINOR);
10+
static_assert(CCCL_PATCH_VERSION == CMAKE_CCCL_VERSION_PATCH);
11+
12+
constexpr int N = 1000;
13+
14+
int main()
15+
{
16+
std::vector<int> v(N);
17+
std::fill(std::execution::par_unseq, v.begin(), v.end(), 42);
18+
int sum = std::reduce(std::execution::par_unseq, v.begin(), v.end(), 100, [](int a, int b) {
19+
return a + b;
20+
});
21+
assert(sum == (42 * N) + 100);
22+
}

0 commit comments

Comments
 (0)