Skip to content

[Offload] Rework compiling device code for unit test suites #144776

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from

Conversation

jhuber6
Copy link
Contributor

@jhuber6 jhuber6 commented Jun 18, 2025

Summary:
I'll probably want to use this as a more generic utility in the future.
This patch reworks it to make it a top level function. I also tried to
decouple this from the OpenMP utilities to make that easier in the
future. Instead, I just use -march=native functionality which is the
same thing. Needed a small hack to skip the linker stage for checking if
that works.

This should still create the same output as far as I'm aware.

@jhuber6
Copy link
Contributor Author

jhuber6 commented Jun 18, 2025

@callumfare @RossBrunton

@llvmbot
Copy link
Member

llvmbot commented Jun 18, 2025

@llvm/pr-subscribers-offload

Author: Joseph Huber (jhuber6)

Changes

Summary:
I'll probably want to use this as a more generic utility in the future.
This patch reworks it to make it a top level function. I also tried to
decouple this from the OpenMP utilities to make that easier in the
future. Instead, I just use -march=native functionality which is the
same thing. Needed a small hack to skip the linker stage for checking if
that works.

This should still create the same output as far as I'm aware.


Full diff: https://github.com/llvm/llvm-project/pull/144776.diff

2 Files Affected:

  • (modified) offload/unittests/CMakeLists.txt (+73-1)
  • (modified) offload/unittests/OffloadAPI/device_code/CMakeLists.txt (+1-67)
diff --git a/offload/unittests/CMakeLists.txt b/offload/unittests/CMakeLists.txt
index 985dd892d8046..cbeae5ce72989 100644
--- a/offload/unittests/CMakeLists.txt
+++ b/offload/unittests/CMakeLists.txt
@@ -1,6 +1,73 @@
 add_custom_target(OffloadUnitTests)
 set_target_properties(OffloadUnitTests PROPERTIES FOLDER "Tests/UnitTests")
 
+function(add_offload_test_device_code test_filename test_name)
+  set(SRC_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${test_filename})
+  set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
+
+  # Try to build with support for NVPTX devices.
+  if("cuda" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
+    find_package(CUDAToolkit QUIET)
+    if(CUDAToolkit_FOUND)
+      get_filename_component(cuda_path "${CUDAToolkit_BIN_DIR}" DIRECTORY ABSOLUTE)
+    endif()
+    check_cxx_compiler_flag(
+      "--target=nvptx64-nvidia-cuda -march=native --cuda-path=${cuda_path}" PLATFORM_HAS_NVPTX)
+
+    if(PLATFORM_HAS_NVPTX)
+      set(nvptx_arch "native")
+    elseif(OFFLOAD_TESTS_FORCE_NVIDIA_ARCH)
+      set(nvptx_arch "${OFFLOAD_TESTS_FORCE_NVIDIA_ARCH}")
+    endif()
+
+    if(nvptx_arch AND CUDAToolkit_FOUND)
+      set(output_file "${CMAKE_CURRENT_BINARY_DIR}/${test_name}.nvptx64.bin")
+      add_custom_command(
+        OUTPUT ${output_file}
+        COMMAND ${CMAKE_C_COMPILER}
+        --target=nvptx64-nvidia-cuda -march=${nvptx_arch}
+        -nogpulib --cuda-path=${CUDA_ROOT} -flto ${ARGN}
+        -c ${SRC_PATH} -o ${output_file}
+        DEPENDS ${SRC_PATH}
+      )
+      add_custom_target(${test_name}.nvptx64 DEPENDS ${output_file})
+    endif()
+  endif()
+
+  # Try to build with support for AMDGPU devices.
+  if("amdgpu" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
+    check_cxx_compiler_flag("--target=amdgcn-amd-amdhsa -mcpu=native" PLATFORM_HAS_AMDGPU)
+
+    if(PLATFORM_HAS_AMDGPU)
+      set(amdgpu_arch "native")
+    elseif(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH)
+      set(amdgpu_arch "${OFFLOAD_TESTS_FORCE_AMDGPU_ARCH}")
+    endif()
+
+    if(amdgpu_arch)
+      set(output_file "${CMAKE_CURRENT_BINARY_DIR}/${test_name}.amdgpu.bin")
+      add_custom_command(
+        OUTPUT ${output_file}
+        COMMAND ${CMAKE_C_COMPILER}
+        --target=amdgcn-amd-amdhsa -mcpu=${amdgpu_arch}
+        -nogpulib --cuda-path=${CUDA_ROOT} -flto ${ARGN}
+        -c ${SRC_PATH} -o ${output_file}
+        DEPENDS ${SRC_PATH}
+      )
+      add_custom_target(${test_name}.amdgpu DEPENDS ${output_file})
+    endif()
+  endif()
+
+  # Create a single dependency target for the device code.
+  add_custom_target(${test_name})
+  if(TARGET ${test_name}.amdgpu)
+    add_dependencies(${test_name} ${test_name}.amdgpu)
+  endif()
+  if(TARGET ${test_name}.nvptx64)
+    add_dependencies(${test_name} ${test_name}.nvptx64)
+  endif()
+endfunction()
+
 function(add_offload_unittest test_dirname)
   set(target_name "${test_dirname}.unittests")
 
@@ -9,10 +76,15 @@ function(add_offload_unittest test_dirname)
   add_unittest(OffloadUnitTests "${target_name}"
     ${CMAKE_CURRENT_SOURCE_DIR}/common/Environment.cpp
     ${files})
-  add_dependencies(${target_name} ${PLUGINS_TEST_COMMON} OffloadUnitTestsDeviceBins)
+  add_dependencies(${target_name} ${PLUGINS_TEST_COMMON} offload_device_binaries)
   target_compile_definitions(${target_name} PRIVATE DEVICE_CODE_PATH="${OFFLOAD_TEST_DEVICE_CODE_PATH}")
   target_link_libraries(${target_name} PRIVATE ${PLUGINS_TEST_COMMON})
   target_include_directories(${target_name} PRIVATE ${PLUGINS_TEST_INCLUDE})
 endfunction()
 
+set(OFFLOAD_TESTS_FORCE_NVIDIA_ARCH "" CACHE STRING
+  "Force building of NVPTX device code for Offload unit tests with the given arch, e.g. sm_61")
+set(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH "" CACHE STRING
+  "Force building of AMDGPU device code for Offload unit tests with the given arch, e.g. gfx1030")
+
 add_subdirectory(OffloadAPI)
diff --git a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
index c2e4d0cb24e6e..c475bc6f314dd 100644
--- a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
+++ b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
@@ -1,72 +1,6 @@
-macro(add_offload_test_device_code test_filename test_name)
-    set(SRC_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${test_filename})
-
-    # Build for NVPTX
-    if(OFFLOAD_TEST_TARGET_NVIDIA)
-        set(BIN_PATH ${CMAKE_CURRENT_BINARY_DIR}/${test_name}.nvptx64.bin)
-        add_custom_command(OUTPUT ${BIN_PATH}
-            COMMAND
-            ${CMAKE_C_COMPILER} --target=nvptx64-nvidia-cuda
-            ${ARGN}
-            -march=${LIBOMPTARGET_DEP_CUDA_ARCH}
-            --cuda-path=${CUDA_ROOT}
-            ${SRC_PATH} -o ${BIN_PATH}
-            DEPENDS ${SRC_PATH}
-        )
-        list(APPEND BIN_PATHS ${BIN_PATH})
-    endif()
-
-    # Build for AMDGPU
-    if(OFFLOAD_TEST_TARGET_AMDGPU)
-        set(BIN_PATH ${CMAKE_CURRENT_BINARY_DIR}/${test_name}.amdgpu.bin)
-        add_custom_command(OUTPUT ${BIN_PATH}
-            COMMAND
-            ${CMAKE_C_COMPILER} --target=amdgcn-amd-amdhsa -nogpulib
-            ${ARGN}
-            -mcpu=${LIBOMPTARGET_DEP_AMDGPU_ARCH}
-            ${SRC_PATH} -o ${BIN_PATH}
-            DEPENDS ${SRC_PATH}
-        )
-        list(APPEND BIN_PATHS ${BIN_PATH})
-    endif()
-
-    # TODO: Build for host CPU
-endmacro()
-
-
-# Decide what device targets to build for. LibomptargetGetDependencies is
-# included at the top-level so the GPUs present on the system are already
-# detected.
-set(OFFLOAD_TESTS_FORCE_NVIDIA_ARCH "" CACHE STRING
-    "Force building of NVPTX device code for Offload unit tests with the given arch, e.g. sm_61")
-set(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH "" CACHE STRING
-    "Force building of AMDGPU device code for Offload unit tests with the given arch, e.g. gfx1030")
-
-find_package(CUDAToolkit QUIET)
-if(CUDAToolkit_FOUND)
-  get_filename_component(CUDA_ROOT "${CUDAToolkit_BIN_DIR}" DIRECTORY ABSOLUTE)
-endif()
-if (OFFLOAD_TESTS_FORCE_NVIDIA_ARCH)
-    set(LIBOMPTARGET_DEP_CUDA_ARCH ${OFFLOAD_TESTS_FORCE_NVIDIA_ARCH})
-    set(OFFLOAD_TEST_TARGET_NVIDIA ON)
-elseif (LIBOMPTARGET_FOUND_NVIDIA_GPU AND CUDA_ROOT AND "cuda" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
-    set(OFFLOAD_TEST_TARGET_NVIDIA ON)
-endif()
-
-if (OFFLOAD_TESTS_FORCE_AMDGPU_ARCH)
-    set(LIBOMPTARGET_DEP_AMDGPU_ARCH ${OFFLOAD_TESTS_FORCE_AMDGPU_ARCH})
-    set(OFFLOAD_TEST_TARGET_AMDGPU ON)
-elseif (LIBOMPTARGET_FOUND_AMDGPU_GPU AND "amdgpu" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
-    list(GET LIBOMPTARGET_AMDGPU_DETECTED_ARCH_LIST 0 LIBOMPTARGET_DEP_AMDGPU_ARCH)
-    set(OFFLOAD_TEST_TARGET_AMDGPU ON)
-endif()
-
 add_offload_test_device_code(foo.c foo)
 add_offload_test_device_code(bar.c bar)
-# By default, amdhsa will add a number of "hidden" arguments to the kernel defintion
-# O3 disables this, and results in a kernel function with actually no arguments as seen by liboffload
 add_offload_test_device_code(noargs.c noargs -O3)
 
-add_custom_target(OffloadUnitTestsDeviceBins DEPENDS ${BIN_PATHS})
-
+add_custom_target(offload_device_binaries DEPENDS foo bar noargs)
 set(OFFLOAD_TEST_DEVICE_CODE_PATH ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)

@jhuber6 jhuber6 force-pushed the ReworkDeviceCode branch from 4bc1aa3 to 447f1dc Compare June 19, 2025 14:34
Summary:
I'll probably want to use this as a more generic utility in the future.
This patch reworks it to make it a top level function. I also tried to
decouple this from the OpenMP utilities to make that easier in the
future. Instead, I just use `-march=native` functionality which is the
same thing. Needed a small hack to skip the linker stage for checking if
that works.

This should still create the same output as far as I'm aware.
@jhuber6 jhuber6 force-pushed the ReworkDeviceCode branch from 447f1dc to fffb54b Compare June 19, 2025 14:40
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants