From 6b3fb6b57aa7db84611f6b47cec9127157747556 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Thu, 20 Nov 2025 08:43:52 -0500 Subject: [PATCH 01/20] Add missing cassert includes. --- hoomd/hpmc/IntegratorHPMCMonoGPU.cu | 2 ++ hoomd/hpmc/IntegratorHPMCMonoGPUMoves.cuh | 2 ++ 2 files changed, 4 insertions(+) diff --git a/hoomd/hpmc/IntegratorHPMCMonoGPU.cu b/hoomd/hpmc/IntegratorHPMCMonoGPU.cu index 2dafe6adf8..0d00e37682 100644 --- a/hoomd/hpmc/IntegratorHPMCMonoGPU.cu +++ b/hoomd/hpmc/IntegratorHPMCMonoGPU.cu @@ -1,6 +1,8 @@ // Copyright (c) 2009-2025 The Regents of the University of Michigan. // Part of HOOMD-blue, released under the BSD 3-Clause License. +#include + #include "IntegratorHPMCMonoGPUTypes.cuh" namespace hoomd diff --git a/hoomd/hpmc/IntegratorHPMCMonoGPUMoves.cuh b/hoomd/hpmc/IntegratorHPMCMonoGPUMoves.cuh index 977002471a..c9987a7bbd 100644 --- a/hoomd/hpmc/IntegratorHPMCMonoGPUMoves.cuh +++ b/hoomd/hpmc/IntegratorHPMCMonoGPUMoves.cuh @@ -3,6 +3,8 @@ #pragma once +#include + #include "hoomd/BoxDim.h" #include "hoomd/HOOMDMath.h" #include "hoomd/Index1D.h" From 1c3a2f0fa7b169c8c20f6e1383bb87d6a5b7e53d Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Thu, 20 Nov 2025 08:44:16 -0500 Subject: [PATCH 02/20] Add missing undef --- hoomd/md/EvaluatorPairFrictionLJVariants.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hoomd/md/EvaluatorPairFrictionLJVariants.h b/hoomd/md/EvaluatorPairFrictionLJVariants.h index 9c0bd4bd0a..737bd330e0 100644 --- a/hoomd/md/EvaluatorPairFrictionLJVariants.h +++ b/hoomd/md/EvaluatorPairFrictionLJVariants.h @@ -120,4 +120,8 @@ class EvaluatorPairFrictionLJCoulombNewton }; } // namespace md } // namespace hoomd + +#undef DEVICE +#undef HOSTDEVICE + #endif // __PAIR_EVALUATOR_FRICTIONLJVARIANTS_H__ From 1b75fc92b8d6028892aac3de1d834ad7b2296c17 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Thu, 20 Nov 2025 08:48:51 -0500 Subject: [PATCH 03/20] Use modern external HIP headers when HOOMD_GPU_PLATFORM=CUDA --- CMake/hoomd/FindCUDALibs.cmake | 14 +++++++------- CMake/hoomd/HOOMDCUDASetup.cmake | 4 ++-- CMake/hoomd/HOOMDHIPSetup.cmake | 23 +++++++++-------------- CMake/hoomd/HOOMDMPISetup.cmake | 4 ++-- hoomd/CMakeLists.txt | 4 ++-- hoomd/ExecutionConfiguration.cc | 16 ++++++++-------- hoomd/ExecutionConfiguration.h | 4 ++-- hoomd/HOOMDMath.h | 6 +++--- hoomd/HOOMDVersion.cc | 4 ++-- hoomd/ManagedArray.h | 2 +- hoomd/WarpTools.cuh | 4 ++-- hoomd/extern/ECL.cuh | 6 +++--- hoomd/hpmc/CMakeLists.txt | 2 +- hoomd/hpmc/IntegratorHPMCMonoGPU.cuh | 4 ++-- hoomd/hpmc/IntegratorHPMCMonoGPU.h | 2 +- hoomd/hpmc/UpdaterGCAGPU.cu | 6 +++--- hoomd/hpmc/UpdaterGCAGPU.cuh | 4 ++-- hoomd/md/AnisoPotentialPairGPU.cuh | 2 +- hoomd/md/CommunicatorGrid.cc | 2 +- hoomd/md/CommunicatorGridGPU.cc | 2 +- hoomd/md/FrictionPairGPU.cuh | 2 +- hoomd/md/NeighborListGPUBinned.cuh | 2 +- hoomd/md/NeighborListGPUStencil.cuh | 2 +- hoomd/md/PPPMForceComputeGPU.h | 2 +- hoomd/md/PotentialPair.h | 2 +- hoomd/md/PotentialPairDPDThermoGPU.cuh | 2 +- hoomd/md/PotentialPairGPU.cuh | 2 +- hoomd/md/PotentialTersoffGPU.cuh | 2 +- 28 files changed, 63 insertions(+), 68 deletions(-) diff --git a/CMake/hoomd/FindCUDALibs.cmake b/CMake/hoomd/FindCUDALibs.cmake index 37e275254f..792c444913 100644 --- a/CMake/hoomd/FindCUDALibs.cmake +++ b/CMake/hoomd/FindCUDALibs.cmake @@ -1,7 +1,7 @@ # Find CUDA libraries and binaries used by HOOMD set(REQUIRED_CUDA_LIB_VARS "") -if (HIP_PLATFORM STREQUAL "nvcc") +if (HIP_PLATFORM STREQUAL "nvidia") # find CUDA library path get_filename_component(CUDA_BIN_PATH ${CMAKE_CUDA_COMPILER} DIRECTORY) get_filename_component(CUDA_LIB_PATH "${CUDA_BIN_PATH}/../lib64/" ABSOLUTE) @@ -23,7 +23,7 @@ else() add_library(CUDA::cudart UNKNOWN IMPORTED) endif() -if (HIP_PLATFORM STREQUAL "nvcc") +if (HIP_PLATFORM STREQUAL "nvidia") find_library(CUDA_cudadevrt_LIBRARY cudadevrt HINTS ${CUDA_LIB_PATH}) mark_as_advanced(CUDA_cudadevrt_LIBRARY) if(CUDA_cudadevrt_LIBRARY AND NOT TARGET CUDA::cudadevrt) @@ -38,7 +38,7 @@ else() add_library(CUDA::cudadevrt UNKNOWN IMPORTED) endif() -if (HIP_PLATFORM STREQUAL "nvcc") +if (HIP_PLATFORM STREQUAL "nvidia") find_library(CUDA_cufft_LIBRARY cufft HINTS ${CUDA_LIB_PATH}) mark_as_advanced(CUDA_cufft_LIBRARY) if(CUDA_cufft_LIBRARY AND NOT TARGET CUDA::cufft) @@ -55,7 +55,7 @@ else() add_library(CUDA::cufft UNKNOWN IMPORTED) endif() -if (HIP_PLATFORM STREQUAL "nvcc") +if (HIP_PLATFORM STREQUAL "nvidia") find_library(CUDA_cusolver_LIBRARY cusolver HINTS ${CUDA_LIB_PATH}) mark_as_advanced(CUDA_cusolver_LIBRARY) if(CUDA_cusolver_LIBRARY AND NOT TARGET CUDA::cusolver) @@ -72,7 +72,7 @@ else() add_library(CUDA::cusolver UNKNOWN IMPORTED) endif() -if (HIP_PLATFORM STREQUAL "nvcc") +if (HIP_PLATFORM STREQUAL "nvidia") find_library(CUDA_cusparse_LIBRARY cusparse HINTS ${CUDA_LIB_PATH}) mark_as_advanced(CUDA_cusparse_LIBRARY) if(CUDA_cusparse_LIBRARY AND NOT TARGET CUDA::cusparse) @@ -96,7 +96,7 @@ if (HIP_PLATFORM STREQUAL "amd") message("Found hipfft includes: ${hipfft_INCLUDE_DIR}") endif() -if (HIP_PLATFORM STREQUAL "nvcc") +if (HIP_PLATFORM STREQUAL "nvidia") # find compute-sanitizer / cuda-memcheck find_program(CUDA_MEMCHECK_EXECUTABLE NAMES compute-sanitizer @@ -115,7 +115,7 @@ if (HIP_PLATFORM STREQUAL "nvcc") mark_as_advanced(CUDA_MEMCHECK_EXECUTABLE) endif() -if (HIP_PLATFORM STREQUAL "nvcc") +if (HIP_PLATFORM STREQUAL "nvidia") include(FindPackageHandleStandardArgs) find_package_handle_standard_args(CUDALibs REQUIRED_VARS diff --git a/CMake/hoomd/HOOMDCUDASetup.cmake b/CMake/hoomd/HOOMDCUDASetup.cmake index 4da0263536..3e10c8c6d5 100644 --- a/CMake/hoomd/HOOMDCUDASetup.cmake +++ b/CMake/hoomd/HOOMDCUDASetup.cmake @@ -1,6 +1,6 @@ # setup CUDA compile options if (ENABLE_HIP) - if (HIP_PLATFORM STREQUAL "nvcc") + if (HIP_PLATFORM STREQUAL "nvidia") # setup nvcc to build for all CUDA architectures. Allow user to modify the list if desired if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8) set(CUDA_ARCH_LIST 80 CACHE STRING "List of target sm_ architectures to compile CUDA code for. Separate with semicolons.") @@ -55,7 +55,7 @@ if (ENABLE_HIP) endif (ENABLE_HIP) # set CUSOLVER_AVAILABLE depending on CUDA Toolkit version -if (ENABLE_HIP AND HIP_PLATFORM STREQUAL "nvcc") +if (ENABLE_HIP AND HIP_PLATFORM STREQUAL "nvidia") # CUDA 8.0 requires that libgomp be linked in - see if we can link it try_compile(_can_link_gomp ${CMAKE_CURRENT_BINARY_DIR}/tmp diff --git a/CMake/hoomd/HOOMDHIPSetup.cmake b/CMake/hoomd/HOOMDHIPSetup.cmake index 8a15aa72d1..502ee7278a 100644 --- a/CMake/hoomd/HOOMDHIPSetup.cmake +++ b/CMake/hoomd/HOOMDHIPSetup.cmake @@ -1,7 +1,7 @@ if(ENABLE_HIP) if (HOOMD_GPU_PLATFORM STREQUAL "HIP") - find_package(HIP REQUIRED) + find_package(hip REQUIRED) CMAKE_MINIMUM_REQUIRED(VERSION 3.21 FATAL_ERROR) ENABLE_LANGUAGE(HIP) SET(HOOMD_DEVICE_LANGUAGE HIP) @@ -14,26 +14,21 @@ if(ENABLE_HIP) ENABLE_LANGUAGE(CUDA) SET(HOOMD_DEVICE_LANGUAGE CUDA) - set(HIP_INCLUDE_DIR "$,${CMAKE_CURRENT_SOURCE_DIR},${HOOMD_INSTALL_PREFIX}/${PYTHON_SITE_INSTALL_DIR}/include>/hoomd/extern/HIP/include/") - # use CUDA runtime version string(REGEX MATCH "([0-9]*).([0-9]*).([0-9]*).*" _hip_version_match "${CMAKE_CUDA_COMPILER_VERSION}") set(HIP_VERSION_MAJOR "${CMAKE_MATCH_1}") set(HIP_VERSION_MINOR "${CMAKE_MATCH_2}") set(HIP_VERSION_PATCH "${CMAKE_MATCH_3}") - set(HIP_PLATFORM "nvcc") - - # hipCUB - # Use system provided CUB for CUDA 11 and newer - set(HIPCUB_INCLUDE_DIR "$,${CMAKE_CURRENT_SOURCE_DIR},${HOOMD_INSTALL_PREFIX}/${PYTHON_SITE_INSTALL_DIR}/include>/hoomd/extern/hipCUB/hipcub/include/") + set(HIP_PLATFORM "nvidia") + find_package(hip REQUIRED) else() message(FATAL_ERROR "HOOMD_GPU_PLATFORM must be either CUDA or HIP") endif() - if(NOT TARGET hip::host) - add_library(hip::host INTERFACE IMPORTED) + if(${HIP_PLATFORM} STREQUAL "nvidia") + # HIP does not configure hip::host properly for the nvidia platform set_target_properties(hip::host PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES "${HIP_INCLUDE_DIR};${HIPCUB_INCLUDE_DIR}") + INTERFACE_INCLUDE_DIRECTORIES "${HIP_INCLUDE_DIR}") # set HIP_VERSION_* on non-CUDA targets (the version is already defined on AMD targets through hipcc) set_property(TARGET hip::host APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS @@ -46,10 +41,10 @@ if(ENABLE_HIP) endif() # branch upon HCC or NVCC target - if(${HIP_PLATFORM} STREQUAL "nvcc") - set_property(TARGET hip::host APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS __HIP_PLATFORM_NVCC__) + if(${HIP_PLATFORM} STREQUAL "nvidia") + set_property(TARGET hip::host APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS __HIP_PLATFORM_NVIDIA__ HIPPER_CUDA) elseif(${HIP_PLATFORM} STREQUAL "amd") - set_property(TARGET hip::host APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS __HIP_PLATFORM_AMD__ __HIP_PLATFORM_HCC__) + set_property(TARGET hip::host APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS __HIP_PLATFORM_AMD__ __HIP_PLATFORM_HCC__ HIPPER_HIP) endif() find_package(CUDALibs REQUIRED) diff --git a/CMake/hoomd/HOOMDMPISetup.cmake b/CMake/hoomd/HOOMDMPISetup.cmake index b4194420b5..6f432503ee 100644 --- a/CMake/hoomd/HOOMDMPISetup.cmake +++ b/CMake/hoomd/HOOMDMPISetup.cmake @@ -36,12 +36,12 @@ if (ENABLE_MPI) mark_as_advanced(OMPI_INFO) if (ENABLE_HIP) - string(REPLACE "-pthread" "$<$,$>:-Xcompiler>;-pthread" + string(REPLACE "-pthread" "$<$,$>:-Xcompiler>;-pthread" _MPI_C_COMPILE_OPTIONS "${MPI_C_COMPILE_OPTIONS}") set_property(TARGET MPI::MPI_C PROPERTY INTERFACE_COMPILE_OPTIONS "${_MPI_C_COMPILE_OPTIONS}") unset(_MPI_C_COMPILE_OPTIONS) - string(REPLACE "-pthread" "$<$,$>:-Xcompiler>;-pthread" + string(REPLACE "-pthread" "$<$,$>:-Xcompiler>;-pthread" _MPI_CXX_COMPILE_OPTIONS "${MPI_CXX_COMPILE_OPTIONS}") set_property(TARGET MPI::MPI_CXX PROPERTY INTERFACE_COMPILE_OPTIONS "${_MPI_CXX_COMPILE_OPTIONS}") unset(_MPI_CXX_COMPILE_OPTIONS) diff --git a/hoomd/CMakeLists.txt b/hoomd/CMakeLists.txt index 4abb33a2d0..713e6f639a 100644 --- a/hoomd/CMakeLists.txt +++ b/hoomd/CMakeLists.txt @@ -223,7 +223,7 @@ add_library(HOOMD::_hoomd ALIAS _hoomd) # Work around support for the delete operator with pybind11 and older versions of clang # https://github.com/pybind/pybind11/issues/1604 if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") - target_compile_options(_hoomd PUBLIC $<$,$>:-Xcompiler=>;-fsized-deallocation) + target_compile_options(_hoomd PUBLIC $<$,$>:-Xcompiler=>;-fsized-deallocation) endif() # add quick hull as its own library so that it's symbols can be public @@ -267,7 +267,7 @@ target_compile_definitions(_hoomd PUBLIC HOOMD_LONGREAL_SIZE=${HOOMD_LONGREAL_SI if (ENABLE_HIP) if (HIP_PLATFORM STREQUAL "amd") target_link_libraries(_hoomd PUBLIC hip::hipfft) - elseif(HIP_PLATFORM STREQUAL "nvcc") + elseif(HIP_PLATFORM STREQUAL "nvidia") target_link_libraries(_hoomd PUBLIC CUDA::cudart CUDA::cufft) endif() target_compile_definitions(_hoomd PUBLIC ENABLE_HIP CUDA_ARCH=${_cuda_min_arch}) diff --git a/hoomd/ExecutionConfiguration.cc b/hoomd/ExecutionConfiguration.cc index dd173dfc20..0829682a1b 100644 --- a/hoomd/ExecutionConfiguration.cc +++ b/hoomd/ExecutionConfiguration.cc @@ -7,7 +7,7 @@ #ifdef ENABLE_HIP #include -#if defined(__HIP_PLATFORM_NVCC__) +#if defined(__HIP_PLATFORM_NVIDIA__) #include #endif #endif @@ -212,7 +212,7 @@ void ExecutionConfiguration::handleHIPError(hipError_t err, file += strlen(HOOMD_SOURCE_DIR); std::ostringstream s; -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ cudaError_t cuda_error = cudaPeekAtLastError(); s << "CUDA Error: " << string(cudaGetErrorString(cuda_error)); #else @@ -263,7 +263,7 @@ void ExecutionConfiguration::initializeGPU(int gpu_id) if (gpu_id != -1) { -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ cudaSetValidDevices(&s_capable_gpu_ids[gpu_id], 1); #endif hipSetDeviceFlags(hipDeviceMapHost); @@ -272,7 +272,7 @@ void ExecutionConfiguration::initializeGPU(int gpu_id) else { // initialize the default CUDA context from one of the capable GPUs -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ cudaSetValidDevices(&s_capable_gpu_ids[0], (int)s_capable_gpu_ids.size()); #endif hipSetDeviceFlags(hipDeviceMapHost); @@ -327,7 +327,7 @@ void ExecutionConfiguration::scanGPUs() if (error != hipSuccess) { std::string message = "Failed to get GPU device count: "; -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ cudaError_t cuda_error = cudaPeekAtLastError(); message += string(cudaGetErrorString(cuda_error)); #else @@ -352,7 +352,7 @@ void ExecutionConfiguration::scanGPUs() if (error != hipSuccess) { std::string message = "Failed to get device properties: "; -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ cudaError_t cuda_error = cudaPeekAtLastError(); message += string(cudaGetErrorString(cuda_error)); #else @@ -362,7 +362,7 @@ void ExecutionConfiguration::scanGPUs() continue; } -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ // exclude a GPU if it's compute version is not high enough int compoundComputeVer = prop.minor + prop.major * 10; @@ -386,7 +386,7 @@ void ExecutionConfiguration::scanGPUs() } // exclude a GPU when it doesn't support mapped memory -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ int supports_managed_memory = 0; cudaError_t cuda_error = cudaDeviceGetAttribute(&supports_managed_memory, cudaDevAttrConcurrentManagedAccess, diff --git a/hoomd/ExecutionConfiguration.h b/hoomd/ExecutionConfiguration.h index f24f20afd1..1a23f3ddf5 100644 --- a/hoomd/ExecutionConfiguration.h +++ b/hoomd/ExecutionConfiguration.h @@ -152,7 +152,7 @@ class PYBIND11_EXPORT ExecutionConfiguration hipSetDevice(m_gpu_id); hipDeviceSynchronize(); -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ hipProfilerStart(); #elif defined(__HIP_PLATFORM_HCC__) #ifdef ENABLE_ROCTRACER @@ -167,7 +167,7 @@ class PYBIND11_EXPORT ExecutionConfiguration { hipSetDevice(m_gpu_id); hipDeviceSynchronize(); -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ hipProfilerStop(); #elif defined(__HIP_PLATFORM_HCC__) #ifdef ENABLE_ROCTRACER diff --git a/hoomd/HOOMDMath.h b/hoomd/HOOMDMath.h index dbae39dbcc..365fe0ad2b 100644 --- a/hoomd/HOOMDMath.h +++ b/hoomd/HOOMDMath.h @@ -233,7 +233,7 @@ namespace fast inline HOSTDEVICE float rsqrt(float x) { #ifdef __HIP_DEVICE_COMPILE__ -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ return ::rsqrtf(x); #elif defined(__HIP_PLATFORM_HCC__) return ::__frsqrt_rn(x); @@ -248,7 +248,7 @@ inline HOSTDEVICE float rsqrt(float x) //! Compute the reciprocal square root of x inline HOSTDEVICE double rsqrt(double x) { -#if defined(__HIP_DEVICE_COMPILE__) && defined(__HIP_PLATFORM_NVCC__) +#if defined(__HIP_DEVICE_COMPILE__) && defined(__HIP_PLATFORM_NVIDIA__) return ::rsqrt(x); #else return 1.0 / ::sqrt(x); @@ -720,7 +720,7 @@ HOSTDEVICE inline hoomd::Scalar3 operator+(const hoomd::Scalar3& a, const hoomd: return hoomd::make_scalar3(a.x + b.x, a.y + b.y, a.z + b.z); } -#if !defined(ENABLE_HIP) || defined(__HIP_PLATFORM_NVCC__) +#if !defined(ENABLE_HIP) || defined(__HIP_PLATFORM_NVIDIA__) //! Vector addition HOSTDEVICE inline hoomd::Scalar3& operator+=(hoomd::Scalar3& a, const hoomd::Scalar3& b) { diff --git a/hoomd/HOOMDVersion.cc b/hoomd/HOOMDVersion.cc index 55b252838e..a748b6dc93 100644 --- a/hoomd/HOOMDVersion.cc +++ b/hoomd/HOOMDVersion.cc @@ -21,7 +21,7 @@ std::string BuildInfo::getCompileFlags() int hip_minor = HIP_VERSION_MINOR; o << "GPU ["; -#if defined(__HIP_PLATFORM_NVCC__) +#if defined(__HIP_PLATFORM_NVIDIA__) o << "CUDA"; #elif defined(__HIP_PLATFORM_HCC__) o << "ROCm"; @@ -105,7 +105,7 @@ std::string BuildInfo::getGPUAPIVersion() std::string BuildInfo::getGPUPlatform() { -#if defined(__HIP_PLATFORM_NVCC__) +#if defined(__HIP_PLATFORM_NVIDIA__) return std::string("CUDA"); #elif defined(__HIP_PLATFORM_HCC__) return std::string("ROCm"); diff --git a/hoomd/ManagedArray.h b/hoomd/ManagedArray.h index 2ca58b0eea..15f5d8ae92 100644 --- a/hoomd/ManagedArray.h +++ b/hoomd/ManagedArray.h @@ -189,7 +189,7 @@ template class ManagedArray { if (managed && ptr) { -#if defined(__HIP_PLATFORM_NVCC__) && (CUDART_VERSION >= 8000) +#if defined(__HIP_PLATFORM_NVIDIA__) && (CUDART_VERSION >= 8000) cudaMemAdvise(ptr, sizeof(T) * N, cudaMemAdviseSetReadMostly, 0); #endif } diff --git a/hoomd/WarpTools.cuh b/hoomd/WarpTools.cuh index 410775d93c..2c10a05717 100644 --- a/hoomd/WarpTools.cuh +++ b/hoomd/WarpTools.cuh @@ -65,7 +65,7 @@ class WarpReduce public: DEVICE WarpReduce() { -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ static_assert(PTX_ARCH >= 300, "PTX architecture must be >= 300"); static_assert(LOGICAL_WARP_THREADS <= CUB_PTX_WARP_THREADS, "Logical warp size cannot exceed hardware warp size"); @@ -196,7 +196,7 @@ class WarpScan public: DEVICE WarpScan() { -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ static_assert(PTX_ARCH >= 300, "PTX architecture must be >= 300"); static_assert(LOGICAL_WARP_THREADS <= CUB_PTX_WARP_THREADS, "Logical warp size cannot exceed hardware warp size"); diff --git a/hoomd/extern/ECL.cuh b/hoomd/extern/ECL.cuh index 38b48dcdea..2ab5276eb8 100644 --- a/hoomd/extern/ECL.cuh +++ b/hoomd/extern/ECL.cuh @@ -57,7 +57,7 @@ inline void ecl_connected_components(const int nodes, static const int Device = 0; static const int ThreadsPerBlock = 256; -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ static const int warpsize = 32; #else static const int warpsize = 64; @@ -166,7 +166,7 @@ void compute2(const int nodes, const int* const __restrict__ nidx, const int* co int idx; if (lane == 0) idx = atomicAdd(&posL, 1); - #ifdef __HIP_PLATFORM_NVCC__ + #ifdef __HIP_PLATFORM_NVIDIA__ idx = __shfl_sync(0xffffffff,idx, 0); #else idx = __shfl(idx,0); @@ -200,7 +200,7 @@ void compute2(const int nodes, const int* const __restrict__ nidx, const int* co } if (lane == 0) idx = atomicAdd(&posL, 1); - #ifdef __HIP_PLATFORM_NVCC__ + #ifdef __HIP_PLATFORM_NVIDIA__ idx = __shfl_sync(0xffffffff,idx, 0); #else idx = __shfl(idx,0); diff --git a/hoomd/hpmc/CMakeLists.txt b/hoomd/hpmc/CMakeLists.txt index cc9a35abc0..383138b6d0 100644 --- a/hoomd/hpmc/CMakeLists.txt +++ b/hoomd/hpmc/CMakeLists.txt @@ -168,7 +168,7 @@ endif() # link the library to its dependencies target_link_libraries(_hpmc PUBLIC _hoomd) -if (ENABLE_HIP AND HIP_PLATFORM STREQUAL "nvcc") +if (ENABLE_HIP AND HIP_PLATFORM STREQUAL "nvidia") target_link_libraries(_hpmc PUBLIC CUDA::cusparse ) endif() diff --git a/hoomd/hpmc/IntegratorHPMCMonoGPU.cuh b/hoomd/hpmc/IntegratorHPMCMonoGPU.cuh index 3f1f808583..cb20300a00 100644 --- a/hoomd/hpmc/IntegratorHPMCMonoGPU.cuh +++ b/hoomd/hpmc/IntegratorHPMCMonoGPU.cuh @@ -29,7 +29,7 @@ namespace hpmc { namespace gpu { -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ #define MAX_BLOCK_SIZE 1024 #define MIN_BLOCK_SIZE 32 #else @@ -42,7 +42,7 @@ namespace kernel { //! Check narrow-phase overlaps template -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ __launch_bounds__(max_threads) #endif __global__ void hpmc_narrow_phase(const Scalar4* d_postype, diff --git a/hoomd/hpmc/IntegratorHPMCMonoGPU.h b/hoomd/hpmc/IntegratorHPMCMonoGPU.h index 8525f31353..ef41dc1a7f 100644 --- a/hoomd/hpmc/IntegratorHPMCMonoGPU.h +++ b/hoomd/hpmc/IntegratorHPMCMonoGPU.h @@ -840,7 +840,7 @@ template void IntegratorHPMCMonoGPU::updateCellWidth() // update the cell list this->m_cl->setNominalWidth(this->m_nominal_width); -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ // set memory hints cudaMemAdvise(this->m_params.data(), this->m_params.size() * sizeof(typename Shape::param_type), diff --git a/hoomd/hpmc/UpdaterGCAGPU.cu b/hoomd/hpmc/UpdaterGCAGPU.cu index 42dd69d976..8dbf341109 100644 --- a/hoomd/hpmc/UpdaterGCAGPU.cu +++ b/hoomd/hpmc/UpdaterGCAGPU.cu @@ -23,7 +23,7 @@ #include #pragma GCC diagnostic pop -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ #include #endif @@ -39,7 +39,7 @@ namespace hpmc { namespace gpu { -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ #define check_cusparse(a) \ { \ cusparseStatus_t status = (a); \ @@ -277,7 +277,7 @@ void connected_components(uint2* d_adj, const hipDeviceProp_t& dev_prop, CachedAllocator& alloc) { -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ thrust::device_ptr adj(d_adj); // sort the list of pairs diff --git a/hoomd/hpmc/UpdaterGCAGPU.cuh b/hoomd/hpmc/UpdaterGCAGPU.cuh index 58ecb063d2..3efb10e5e0 100644 --- a/hoomd/hpmc/UpdaterGCAGPU.cuh +++ b/hoomd/hpmc/UpdaterGCAGPU.cuh @@ -21,7 +21,7 @@ #include "IntegratorHPMCMonoGPUTypes.cuh" -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ #define MAX_BLOCK_SIZE 1024 #define MIN_BLOCK_SIZE 256 // a reasonable minimum to limit the number of template instantiations #else @@ -194,7 +194,7 @@ namespace kernel { //! Check narrow-phase overlaps template -#ifdef __HIP_PLATFORM_NVCC__ +#ifdef __HIP_PLATFORM_NVIDIA__ __launch_bounds__(max_threads) #endif __global__ void hpmc_cluster_overlaps(const Scalar4* d_postype, diff --git a/hoomd/md/AnisoPotentialPairGPU.cuh b/hoomd/md/AnisoPotentialPairGPU.cuh index 82015e2c39..f56cb06dc7 100644 --- a/hoomd/md/AnisoPotentialPairGPU.cuh +++ b/hoomd/md/AnisoPotentialPairGPU.cuh @@ -24,7 +24,7 @@ //! Maximum number of threads (width of a warp) // currently this is hardcoded, we should set it to the max of platforms -#if defined(__HIP_PLATFORM_NVCC__) +#if defined(__HIP_PLATFORM_NVIDIA__) const int gpu_aniso_pair_force_max_tpp = 32; #elif defined(__HIP_PLATFORM_HCC__) const int gpu_aniso_pair_force_max_tpp = 64; diff --git a/hoomd/md/CommunicatorGrid.cc b/hoomd/md/CommunicatorGrid.cc index 10251c915b..9f8cf2fcf4 100644 --- a/hoomd/md/CommunicatorGrid.cc +++ b/hoomd/md/CommunicatorGrid.cc @@ -12,7 +12,7 @@ #if defined(ENABLE_HIP) #if __HIP_PLATFORM_HCC__ #include -#elif __HIP_PLATFORM_NVCC__ +#elif __HIP_PLATFORM_NVIDIA__ #include typedef cufftComplex hipfftComplex; #endif diff --git a/hoomd/md/CommunicatorGridGPU.cc b/hoomd/md/CommunicatorGridGPU.cc index 3fd9320328..83e6334e6c 100644 --- a/hoomd/md/CommunicatorGridGPU.cc +++ b/hoomd/md/CommunicatorGridGPU.cc @@ -10,7 +10,7 @@ #if __HIP_PLATFORM_HCC__ #include -#elif __HIP_PLATFORM_NVCC__ +#elif __HIP_PLATFORM_NVIDIA__ #include typedef cufftComplex hipfftComplex; #endif diff --git a/hoomd/md/FrictionPairGPU.cuh b/hoomd/md/FrictionPairGPU.cuh index 3932542ae0..1b665f3580 100644 --- a/hoomd/md/FrictionPairGPU.cuh +++ b/hoomd/md/FrictionPairGPU.cuh @@ -22,7 +22,7 @@ //! Maximum number of threads (width of a warp) // currently this is hardcoded, we should set it to the max of platforms -#if defined(__HIP_PLATFORM_NVCC__) +#if defined(__HIP_PLATFORM_NVIDIA__) const int gpu_friction_pair_force_max_tpp = 32; #elif defined(__HIP_PLATFORM_HCC__) const int gpu_friction_pair_force_max_tpp = 64; diff --git a/hoomd/md/NeighborListGPUBinned.cuh b/hoomd/md/NeighborListGPUBinned.cuh index c05971d2e3..b7cf38d41b 100644 --- a/hoomd/md/NeighborListGPUBinned.cuh +++ b/hoomd/md/NeighborListGPUBinned.cuh @@ -15,7 +15,7 @@ */ // currently this is hardcoded, we should set it to the max of platforms -#if defined(__HIP_PLATFORM_NVCC__) +#if defined(__HIP_PLATFORM_NVIDIA__) #define WARP_SIZE 32 #elif defined(__HIP_PLATFORM_HCC__) #define WARP_SIZE 64 diff --git a/hoomd/md/NeighborListGPUStencil.cuh b/hoomd/md/NeighborListGPUStencil.cuh index b303b52fa0..1db5c5f9da 100644 --- a/hoomd/md/NeighborListGPUStencil.cuh +++ b/hoomd/md/NeighborListGPUStencil.cuh @@ -14,7 +14,7 @@ \brief Declares GPU kernel code for neighbor list generation on the GPU */ -#if defined(__HIP_PLATFORM_NVCC__) +#if defined(__HIP_PLATFORM_NVIDIA__) #define WARP_SIZE 32 #elif defined(__HIP_PLATFORM_HCC__) #define WARP_SIZE 64 diff --git a/hoomd/md/PPPMForceComputeGPU.h b/hoomd/md/PPPMForceComputeGPU.h index d74df567e9..da2ec75dd0 100644 --- a/hoomd/md/PPPMForceComputeGPU.h +++ b/hoomd/md/PPPMForceComputeGPU.h @@ -10,7 +10,7 @@ #if __HIP_PLATFORM_HCC__ #include -#elif __HIP_PLATFORM_NVCC__ +#elif __HIP_PLATFORM_NVIDIA__ #include typedef cufftComplex hipfftComplex; typedef cufftHandle hipfftHandle; diff --git a/hoomd/md/PotentialPair.h b/hoomd/md/PotentialPair.h index b5948bdb91..e5e3f10cdb 100644 --- a/hoomd/md/PotentialPair.h +++ b/hoomd/md/PotentialPair.h @@ -368,7 +368,7 @@ PotentialPair::PotentialPair(std::shared_ptr sysdef m_r_cut_nlist = std::make_shared>(m_typpair_idx.getNumElements(), m_exec_conf); nlist->addRCutMatrix(m_r_cut_nlist); -#if defined(ENABLE_HIP) && defined(__HIP_PLATFORM_NVCC__) +#if defined(ENABLE_HIP) && defined(__HIP_PLATFORM_NVIDIA__) if (m_pdata->getExecConf()->isCUDAEnabled()) { // m_params is _always_ in unified memory, so memadvise and prefetch diff --git a/hoomd/md/PotentialPairDPDThermoGPU.cuh b/hoomd/md/PotentialPairDPDThermoGPU.cuh index 8fcee9a0b6..59a2121e94 100644 --- a/hoomd/md/PotentialPairDPDThermoGPU.cuh +++ b/hoomd/md/PotentialPairDPDThermoGPU.cuh @@ -27,7 +27,7 @@ namespace md namespace kernel { // currently this is hardcoded, we should set it to the max of platforms -#if defined(__HIP_PLATFORM_NVCC__) +#if defined(__HIP_PLATFORM_NVIDIA__) const int gpu_dpd_pair_force_max_tpp = 32; #elif defined(__HIP_PLATFORM_HCC__) const int gpu_dpd_pair_force_max_tpp = 64; diff --git a/hoomd/md/PotentialPairGPU.cuh b/hoomd/md/PotentialPairGPU.cuh index 755a757d62..2d77be1a4f 100644 --- a/hoomd/md/PotentialPairGPU.cuh +++ b/hoomd/md/PotentialPairGPU.cuh @@ -32,7 +32,7 @@ namespace kernel { //! Maximum number of threads (width of a warp) // currently this is hardcoded, we should set it to the max of platforms -#if defined(__HIP_PLATFORM_NVCC__) +#if defined(__HIP_PLATFORM_NVIDIA__) const int gpu_pair_force_max_tpp = 32; #elif defined(__HIP_PLATFORM_HCC__) const int gpu_pair_force_max_tpp = 64; diff --git a/hoomd/md/PotentialTersoffGPU.cuh b/hoomd/md/PotentialTersoffGPU.cuh index f639519492..f1946c1e0d 100644 --- a/hoomd/md/PotentialTersoffGPU.cuh +++ b/hoomd/md/PotentialTersoffGPU.cuh @@ -29,7 +29,7 @@ namespace kernel { //! Maximum number of threads (width of a warp) // currently this is hardcoded, we should set it to the max of platforms -#if defined(__HIP_PLATFORM_NVCC__) +#if defined(__HIP_PLATFORM_NVIDIA__) const int gpu_tersoff_max_tpp = 32; #elif defined(__HIP_PLATFORM_HCC__) const int gpu_tersoff_max_tpp = 64; From d0d1444076d0752e904ce7d2cce0cfd779c9d89f Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Thu, 20 Nov 2025 09:14:21 -0500 Subject: [PATCH 04/20] Remove unused submodules. --- .gitmodules | 7 ------- CMakeLists.txt | 1 - hoomd/extern/HIP | 1 - hoomd/extern/hipCUB | 1 - 4 files changed, 10 deletions(-) delete mode 160000 hoomd/extern/HIP delete mode 160000 hoomd/extern/hipCUB diff --git a/.gitmodules b/.gitmodules index 22dff3fe03..9ba5dad040 100644 --- a/.gitmodules +++ b/.gitmodules @@ -7,13 +7,6 @@ [submodule "hoomd/extern/quickhull"] path = hoomd/extern/quickhull url = https://github.com/glotzerlab/quickhull -[submodule "hoomd/extern/HIP"] - path = hoomd/extern/HIP - url = https://github.com/glotzerlab/HIP.git -[submodule "hoomd/extern/hipCUB"] - path = hoomd/extern/hipCUB - url = https://github.com/glotzerlab/hipCUB - branch = header_only [submodule "hoomd/extern/neighbor"] path = hoomd/extern/neighbor url = https://github.com/mphowardlab/neighbor.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 01073b90ec..90e5f360e8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -144,7 +144,6 @@ endif() if ( NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/hoomd/extern/nano-signal-slot/nano_signal_slot.hpp OR NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/hoomd/extern/upp11/upp11.h OR - NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/hoomd/extern/HIP/include/hip/hip_runtime.h OR NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/hoomd/extern/quickhull/ConvexHull.hpp OR NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/hoomd/extern/OpenRAND/include/openrand/philox.h OR NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/hoomd/extern/neighbor/include/neighbor/neighbor.h OR diff --git a/hoomd/extern/HIP b/hoomd/extern/HIP deleted file mode 160000 index db753e4ea7..0000000000 --- a/hoomd/extern/HIP +++ /dev/null @@ -1 +0,0 @@ -Subproject commit db753e4ea7a715afec405117250cecef9e882b33 diff --git a/hoomd/extern/hipCUB b/hoomd/extern/hipCUB deleted file mode 160000 index 64d8adf32b..0000000000 --- a/hoomd/extern/hipCUB +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 64d8adf32bd48d8723cc7df9e5c970169e2845b5 From 0600388a0d91a4ba76e1a9f08ecdd1636baf0c21 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Thu, 20 Nov 2025 09:55:59 -0500 Subject: [PATCH 05/20] Install hip and hipcub. --- .github/workflows/build_and_test.yaml | 39 +++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/.github/workflows/build_and_test.yaml b/.github/workflows/build_and_test.yaml index ad9e3cdf49..74f1e71395 100644 --- a/.github/workflows/build_and_test.yaml +++ b/.github/workflows/build_and_test.yaml @@ -134,6 +134,44 @@ jobs: activate-environment: true manifest-path: code/pixi.toml + - name: Clone hip + if: ${{ inputs.compiler_family == 'cuda' }} + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + repository: ROCm/rocm-systems + ref: 4179531dcd9825841e88c8082ac4068b6a65fd0a # rocm-7.1.0 + path: rocm-systems + + - name: Clone hipCUB + if: ${{ inputs.compiler_family == 'cuda' }} + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + repository: ROCm/rocm-libraries + ref: 1a01b92fd0971c98258c0ea7811aea2e55ec8698 # rocm-7.1.0 + sparse-checkout: projects/hipcub + path: rocm-libraries + + - name: Install hip + if: ${{ inputs.compiler_family == 'cuda' }} + run: | + export CLR_DIR="$(readlink -f rocm-systems/projects/clr)" + export HIP_DIR="$(readlink -f rocm-systems/projects/hip)" + export HIP_OTHER="$(readlink -f rocm-systems/projects/hipother)" + cd "$CLR_DIR" + mkdir build + cd build + cmake -DHIP_COMMON_DIR=$HIP_DIR -DHIP_PLATFORM=nvidia -DCMAKE_INSTALL_PREFIX=${GITHUB_WORKSPACE}/local -DCLR_BUILD_HIP=ON -DCLR_BUILD_OCL=OFF -DHIPNV_DIR=$HIP_OTHER/hipnv .. + make install + + - name: Install hipcub + if: ${{ inputs.compiler_family == 'cuda' }} + run: | + mkdir build + cd build + cmake ../ -Dhip_ROOT=${GITHUB_WORKSPACE}/local -DHIP_PLATFORM=nvidia -DCMAKE_INSTALL_PREFIX=${GITHUB_WORKSPACE}/local + make install + working-directory: rocm-libraries/projects/hipcub + - name: Set compiler if: ${{ inputs.compiler_family != 'cuda' }} run: | @@ -159,6 +197,7 @@ jobs: -DBUILD_HPMC=${BUILD_HPMC:-"ON"} \ -DCUDA_ARCH_LIST="60;70" \ -DCMAKE_INSTALL_PREFIX=${GITHUB_WORKSPACE}/install \ + -Dhip_ROOT=${GITHUB_WORKSPACE}/local \ -DPLUGINS="" env: ENABLE_GPU: ${{ contains(inputs.config, 'cuda') }} From 5de1a138ee61c3820d8473334af0109dc8e8bd65 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Thu, 20 Nov 2025 10:26:56 -0500 Subject: [PATCH 06/20] Use the hip 7.2 prerelease Builds fail with 7.1 on CUDA 12. --- .github/workflows/build_and_test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yaml b/.github/workflows/build_and_test.yaml index 74f1e71395..0fa0baa01b 100644 --- a/.github/workflows/build_and_test.yaml +++ b/.github/workflows/build_and_test.yaml @@ -139,7 +139,7 @@ jobs: uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 with: repository: ROCm/rocm-systems - ref: 4179531dcd9825841e88c8082ac4068b6a65fd0a # rocm-7.1.0 + ref: 75ad45d5f131738a4d888499026edfeaa3a11fe3 # hip-version_7.2.53220 path: rocm-systems - name: Clone hipCUB From 2d32b03510de71c22911090f3a095ec2fab67d8b Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Thu, 20 Nov 2025 11:32:09 -0500 Subject: [PATCH 07/20] Patch hip to enable support for CUDA 12.x (where x < 8) --- .github/workflows/build_and_test.yaml | 6 ++++ .github/workflows/fix-cuda-12.patch | 41 +++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 .github/workflows/fix-cuda-12.patch diff --git a/.github/workflows/build_and_test.yaml b/.github/workflows/build_and_test.yaml index 0fa0baa01b..bae248fc88 100644 --- a/.github/workflows/build_and_test.yaml +++ b/.github/workflows/build_and_test.yaml @@ -151,6 +151,12 @@ jobs: sparse-checkout: projects/hipcub path: rocm-libraries + - name: Patch hip + if: ${{ inputs.compiler_family == 'cuda' }} + run: | + git apply ${GITHUB_WORKSPACE}/code/.github/workflows/fix-cuda-12.patch + working-directory: rocm-systems + - name: Install hip if: ${{ inputs.compiler_family == 'cuda' }} run: | diff --git a/.github/workflows/fix-cuda-12.patch b/.github/workflows/fix-cuda-12.patch new file mode 100644 index 0000000000..3160dd2760 --- /dev/null +++ b/.github/workflows/fix-cuda-12.patch @@ -0,0 +1,41 @@ +diff --git a/projects/hipother/hipnv/include/hip/nvidia_detail/nvidia_hip_runtime_api.h b/projects/hipother/hipnv/include/hip/nvidia_detail/nvidia_hip_runtime_api.h +index 8f6c295aab..05036a57fe 100644 +--- a/projects/hipother/hipnv/include/hip/nvidia_detail/nvidia_hip_runtime_api.h ++++ b/projects/hipother/hipnv/include/hip/nvidia_detail/nvidia_hip_runtime_api.h +@@ -869,7 +869,7 @@ static inline void hipMemcpy2DTocudaMemcpy2D(CUDA_MEMCPY2D* a, const hip_Memcpy2 + a->Height = (size_t)p->Height; + } + +-#if CUDA_VERSION >= CUDA_12020 ++#if CUDA_VERSION >= 12080 + typedef enum cudaMemcpyFlags hipMemcpyFlags; + #define hipMemcpyFlagDefault cudaMemcpyFlagDefault + #define hipMemcpyFlagPreferOverlapWithCompute cudaMemcpyFlagPreferOverlapWithCompute +@@ -2430,6 +2430,7 @@ inline static hipError_t hipMemcpy2DToArrayAsync(hipArray_t dst, size_t wOffset, + cudaMemcpy2DToArrayAsync(dst, wOffset, hOffset, src, spitch, width, height, kind, stream)); + } + ++#if CUDA_VERSION >= 12080 + inline static hipError_t hipMemcpyBatchAsync(void** dsts, void** srcs, size_t* sizes, size_t count, + hipMemcpyAttributes* attrs, size_t* attrsIdxs, + size_t numAttrs, size_t* failIdx, hipStream_t stream) { +@@ -2467,6 +2468,7 @@ inline static hipError_t hipMemcpy3DPeer(hipMemcpy3DPeerParms* p) { + inline static hipError_t hipMemcpy3DPeerAsync(hipMemcpy3DPeerParms* p, hipStream_t stream) { + return hipCUDAErrorTohipError(cudaMemcpy3DPeerAsync(p, stream)); + } ++#endif + + __HIP_DEPRECATED inline static hipError_t hipMemcpyToArray(hipArray_t dst, size_t wOffset, + size_t hOffset, const void* src, +@@ -3756,9 +3758,11 @@ inline static hipError_t hipLibraryEnumerateKernels(hipKernel_t* kernels, unsign + return hipCUResultTohipError(cuLibraryEnumerateKernels(kernels, numKernels, library)); + } + ++#if CUDA_VERSION >= 12060 + inline static hipError_t hipKernelGetLibrary(hipLibrary_t* library, hipKernel_t kernel) { + return hipCUResultTohipError(cuKernelGetLibrary(library, kernel)); + } ++#endif + + inline static hipError_t hipKernelGetName(const char** name, hipKernel_t kernel) { + return hipCUResultTohipError(cuKernelGetName(name, kernel)); From ebbbb2a246930dc9fcafd2163efcee057c3ad147 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Thu, 20 Nov 2025 11:50:39 -0500 Subject: [PATCH 08/20] Also patch rocm-libraries. --- .github/workflows/build_and_test.yaml | 8 +++++++- .../workflows/fix-cuda-12-rocm-libraries.patch | 18 ++++++++++++++++++ ...12.patch => fix-cuda-12-rocm-systems.patch} | 0 .pre-commit-config.yaml | 2 +- 4 files changed, 26 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/fix-cuda-12-rocm-libraries.patch rename .github/workflows/{fix-cuda-12.patch => fix-cuda-12-rocm-systems.patch} (100%) diff --git a/.github/workflows/build_and_test.yaml b/.github/workflows/build_and_test.yaml index bae248fc88..c0134b5aec 100644 --- a/.github/workflows/build_and_test.yaml +++ b/.github/workflows/build_and_test.yaml @@ -154,9 +154,15 @@ jobs: - name: Patch hip if: ${{ inputs.compiler_family == 'cuda' }} run: | - git apply ${GITHUB_WORKSPACE}/code/.github/workflows/fix-cuda-12.patch + git apply ${GITHUB_WORKSPACE}/code/.github/workflows/fix-cuda-12-rocm-systems.patch working-directory: rocm-systems + - name: Patch hipcub + if: ${{ inputs.compiler_family == 'cuda' }} + run: | + git apply ${GITHUB_WORKSPACE}/code/.github/workflows/fix-cuda-12-rocm-libraries.patch + working-directory: rocm-libraries + - name: Install hip if: ${{ inputs.compiler_family == 'cuda' }} run: | diff --git a/.github/workflows/fix-cuda-12-rocm-libraries.patch b/.github/workflows/fix-cuda-12-rocm-libraries.patch new file mode 100644 index 0000000000..35f824f1a2 --- /dev/null +++ b/.github/workflows/fix-cuda-12-rocm-libraries.patch @@ -0,0 +1,18 @@ +diff --git a/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_for.hpp b/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_for.hpp +index 0f22c40517..31fab88dd6 100644 +--- a/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_for.hpp ++++ b/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_for.hpp +@@ -30,6 +30,7 @@ + #define HIPCUB_CUB_DEVICE_DEVICE_FOR_HPP_ + + #include "../../../config.hpp" ++#if CUDA_VERSION >= 12060 + + #include // IWYU pragma: export + +@@ -194,4 +195,5 @@ HIPCUB_RUNTIME_FUNCTION + + END_HIPCUB_NAMESPACE + ++#endif + #endif // HIPCUB_CUB_DEVICE_DEVICE_FOR_HPP_ diff --git a/.github/workflows/fix-cuda-12.patch b/.github/workflows/fix-cuda-12-rocm-systems.patch similarity index 100% rename from .github/workflows/fix-cuda-12.patch rename to .github/workflows/fix-cuda-12-rocm-systems.patch diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d2ae9b07bb..7e608f1236 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -36,7 +36,7 @@ repos: - id: end-of-file-fixer exclude_types: [svg] - id: trailing-whitespace - exclude_types: [svg] + exclude_types: [svg, diff] - id: check-json - id: check-yaml exclude: "\\.clang-format" From 8452b3789e5f13bef655ae0c022bf7fc93fdc267 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Thu, 20 Nov 2025 12:05:11 -0500 Subject: [PATCH 09/20] Patch device_merge --- .../workflows/fix-cuda-12-rocm-libraries.patch | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/.github/workflows/fix-cuda-12-rocm-libraries.patch b/.github/workflows/fix-cuda-12-rocm-libraries.patch index 35f824f1a2..55ee2e23e9 100644 --- a/.github/workflows/fix-cuda-12-rocm-libraries.patch +++ b/.github/workflows/fix-cuda-12-rocm-libraries.patch @@ -16,3 +16,21 @@ index 0f22c40517..31fab88dd6 100644 +#endif #endif // HIPCUB_CUB_DEVICE_DEVICE_FOR_HPP_ +diff --git a/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_merge.hpp b/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_merge.hpp +index f314f5a128..e22f08fba9 100644 +--- a/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_merge.hpp ++++ b/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_merge.hpp +@@ -30,6 +30,7 @@ + #define HIPCUB_CUB_DEVICE_DEVICE_MERGE_HPP_ + + #include "../../../config.hpp" ++#if CUDA_VERSION >= 12080 + + #include // IWYU pragma: export + +@@ -104,4 +105,5 @@ struct DeviceMerge + + END_HIPCUB_NAMESPACE + ++#endif + #endif // HIPCUB_CUB_DEVICE_DEVICE_MERGE_HPP_ From a364dc1cb314b215368083c833ddc9dad015fba3 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Thu, 20 Nov 2025 12:05:32 -0500 Subject: [PATCH 10/20] Test with cuda 12.5, 12.6, 12.8, and 12.9. --- .github/workflows/build_and_test.yaml | 12 ++++++++---- .github/workflows/test.yaml | 4 ++++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build_and_test.yaml b/.github/workflows/build_and_test.yaml index c0134b5aec..6bbcc860af 100644 --- a/.github/workflows/build_and_test.yaml +++ b/.github/workflows/build_and_test.yaml @@ -87,12 +87,16 @@ jobs: echo 'test_docker_options=--gpus=all' >> "$GITHUB_OUTPUT" case "${{ inputs.compiler_version }}" in + 129) + echo "image=nvidia/cuda:12.9.1-devel-ubuntu24.04" >> "$GITHUB_OUTPUT";; + 128) + echo "image=nvidia/cuda:12.8.1-devel-ubuntu24.04" >> "$GITHUB_OUTPUT";; + 126) + echo "image=nvidia/cuda:12.6.3-devel-ubuntu24.04" >> "$GITHUB_OUTPUT";; 125) - echo "image=nvidia/cuda:12.5.0-devel-ubuntu22.04" >> "$GITHUB_OUTPUT";; + echo "image=nvidia/cuda:12.5.1-devel-ubuntu24.04" >> "$GITHUB_OUTPUT";; 124) - echo "image=nvidia/cuda:12.4.1-devel-ubuntu22.04" >> "$GITHUB_OUTPUT";; - 122) - echo "image=nvidia/cuda:12.2.2-devel-ubuntu22.04" >> "$GITHUB_OUTPUT";; + echo "image=nvidia/cuda:12.4.1-devel-ubuntu24.04" >> "$GITHUB_OUTPUT";; *) echo "Unknown compiler" && exit 1;; esac diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index defe9808f6..f1c53f8fb8 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -95,6 +95,10 @@ jobs: - config: [gcc, 13, -py, 312, -mpi] - config: [gcc, 12, -py, 311, -mpi] - config: [gcc, 11, -py, 310, -mpi] + - config: [cuda, 125, -py, 313, -mpi] + - config: [cuda, 126, -py, 313, -mpi] + - config: [cuda, 128, -py, 313, -mpi] + - config: [cuda, 129, -py, 313, -mpi] tests_complete: name: Unit test From 125169f0303029b42a44a1df10358afb305a20ce Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Thu, 20 Nov 2025 12:07:10 -0500 Subject: [PATCH 11/20] Fix failing cuda 12.4 build. --- .github/workflows/build_and_test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yaml b/.github/workflows/build_and_test.yaml index 6bbcc860af..2430312564 100644 --- a/.github/workflows/build_and_test.yaml +++ b/.github/workflows/build_and_test.yaml @@ -96,7 +96,7 @@ jobs: 125) echo "image=nvidia/cuda:12.5.1-devel-ubuntu24.04" >> "$GITHUB_OUTPUT";; 124) - echo "image=nvidia/cuda:12.4.1-devel-ubuntu24.04" >> "$GITHUB_OUTPUT";; + echo "image=nvidia/cuda:12.4.1-devel-ubuntu22.04" >> "$GITHUB_OUTPUT";; *) echo "Unknown compiler" && exit 1;; esac From fb74d6a11b31468219f3965ee1ff91c005c5dde2 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Thu, 20 Nov 2025 12:38:19 -0500 Subject: [PATCH 12/20] device_transform requires CUDA 12.9 --- .../workflows/fix-cuda-12-rocm-libraries.patch | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/.github/workflows/fix-cuda-12-rocm-libraries.patch b/.github/workflows/fix-cuda-12-rocm-libraries.patch index 55ee2e23e9..069bc8a136 100644 --- a/.github/workflows/fix-cuda-12-rocm-libraries.patch +++ b/.github/workflows/fix-cuda-12-rocm-libraries.patch @@ -34,3 +34,21 @@ index f314f5a128..e22f08fba9 100644 +#endif #endif // HIPCUB_CUB_DEVICE_DEVICE_MERGE_HPP_ +diff --git a/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_transform.hpp b/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_transform.hpp +index 681a0bbf98..d071c7d859 100644 +--- a/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_transform.hpp ++++ b/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_transform.hpp +@@ -30,6 +30,7 @@ + #define HIBCUB_ROCPRIM_DEVICE_DEVICE_TRANSFORM_HPP_ + + #include "../../../config.hpp" ++#if CUDA_VERSION >= 12090 + + #include + +@@ -203,4 +204,5 @@ struct DeviceTransform + + END_HIPCUB_NAMESPACE + ++#endif + #endif // HIBCUB_ROCPRIM_DEVICE_DEVICE_TRANSFORM_HPP_ From ec617ac5e37dc9cb3aa59e1f97c1920055f2000a Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Thu, 20 Nov 2025 13:13:38 -0500 Subject: [PATCH 13/20] More hipcub patches. --- .../fix-cuda-12-rocm-libraries.patch | 75 +++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/.github/workflows/fix-cuda-12-rocm-libraries.patch b/.github/workflows/fix-cuda-12-rocm-libraries.patch index 069bc8a136..c18d74b6ce 100644 --- a/.github/workflows/fix-cuda-12-rocm-libraries.patch +++ b/.github/workflows/fix-cuda-12-rocm-libraries.patch @@ -1,3 +1,16 @@ +diff --git a/projects/hipcub/CMakeLists.txt b/projects/hipcub/CMakeLists.txt +index fbcecf0fc4..0fbae70749 100644 +--- a/projects/hipcub/CMakeLists.txt ++++ b/projects/hipcub/CMakeLists.txt +@@ -174,7 +174,7 @@ math(EXPR hipcub_VERSION_NUMBER "${hipcub_VERSION_MAJOR} * 100000 + ${hipcub_VER + include(VerifyCompiler) + + # Get dependencies (except rocm-cmake, included earlier) +-include(Dependencies) ++# include(Dependencies) + + if(BUILD_ADDRESS_SANITIZER) + add_compile_options(-fsanitize=address -shared-libasan) diff --git a/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_for.hpp b/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_for.hpp index 0f22c40517..31fab88dd6 100644 --- a/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_for.hpp @@ -34,6 +47,46 @@ index f314f5a128..e22f08fba9 100644 +#endif #endif // HIPCUB_CUB_DEVICE_DEVICE_MERGE_HPP_ +diff --git a/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_scan.hpp b/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_scan.hpp +index 72ad11f7bc..038fddccdd 100644 +--- a/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_scan.hpp ++++ b/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_scan.hpp +@@ -182,6 +182,7 @@ public: + stream); + } + ++#if CUDA_VERSION >= 12080 + template + HIPCUB_RUNTIME_FUNCTION +diff --git a/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_select.hpp b/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_select.hpp +index 6812c5cfeb..33bce4aa23 100644 +--- a/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_select.hpp ++++ b/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_select.hpp +@@ -223,6 +223,7 @@ public: + stream); + } + ++#if CUDA_VERSION >= 12060 + template + HIPCUB_RUNTIME_FUNCTION diff --git a/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_transform.hpp b/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_transform.hpp index 681a0bbf98..d071c7d859 100644 --- a/projects/hipcub/hipcub/include/hipcub/backend/cub/device/device_transform.hpp @@ -52,3 +105,25 @@ index 681a0bbf98..d071c7d859 100644 +#endif #endif // HIBCUB_ROCPRIM_DEVICE_DEVICE_TRANSFORM_HPP_ +diff --git a/projects/hipcub/hipcub/include/hipcub/backend/cub/util_temporary_storage.hpp b/projects/hipcub/hipcub/include/hipcub/backend/cub/util_temporary_storage.hpp +index fc67d645b1..63540b717f 100644 +--- a/projects/hipcub/hipcub/include/hipcub/backend/cub/util_temporary_storage.hpp ++++ b/projects/hipcub/hipcub/include/hipcub/backend/cub/util_temporary_storage.hpp +@@ -50,10 +50,17 @@ HIPCUB_HOST_DEVICE HIPCUB_FORCEINLINE hipError_t + void* (&allocations)[ALLOCATIONS], + const size_t (&allocation_sizes)[ALLOCATIONS]) + { ++ #if CUDA_VERSION >= 12090 + cudaError_t error = ::cub::detail::AliasTemporaries(d_temp_storage, + temp_storage_bytes, + allocations, + allocation_sizes); ++ #else ++ cudaError_t error = ::cub::AliasTemporaries(d_temp_storage, ++ temp_storage_bytes, ++ allocations, ++ allocation_sizes); ++ #endif + + if(cudaSuccess == error) + { From 8a9a0f5c5850be97d9c0095da9b8896848f592e2 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Thu, 20 Nov 2025 13:30:21 -0500 Subject: [PATCH 14/20] Build for compute 8.0 on CI. --- .github/workflows/build_and_test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yaml b/.github/workflows/build_and_test.yaml index 2430312564..e9a3d9ef18 100644 --- a/.github/workflows/build_and_test.yaml +++ b/.github/workflows/build_and_test.yaml @@ -211,7 +211,7 @@ jobs: -DBUILD_MPCD=${BUILD_MD:-"ON"} \ -DBUILD_METAL=${BUILD_MD:-"ON"} \ -DBUILD_HPMC=${BUILD_HPMC:-"ON"} \ - -DCUDA_ARCH_LIST="60;70" \ + -DCUDA_ARCH_LIST="80" \ -DCMAKE_INSTALL_PREFIX=${GITHUB_WORKSPACE}/install \ -Dhip_ROOT=${GITHUB_WORKSPACE}/local \ -DPLUGINS="" From a951845fa659c8902c654b47517f685754067d38 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Thu, 20 Nov 2025 14:02:10 -0500 Subject: [PATCH 15/20] Test on CUDA 12.5+. --- .github/workflows/build_and_test.yaml | 2 ++ .github/workflows/test.yaml | 8 ++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build_and_test.yaml b/.github/workflows/build_and_test.yaml index e9a3d9ef18..173ce0578c 100644 --- a/.github/workflows/build_and_test.yaml +++ b/.github/workflows/build_and_test.yaml @@ -87,6 +87,8 @@ jobs: echo 'test_docker_options=--gpus=all' >> "$GITHUB_OUTPUT" case "${{ inputs.compiler_version }}" in + 130) + echo "image=nvidia/cuda:13.0.2-devel-ubuntu24.04" >> "$GITHUB_OUTPUT";; 129) echo "image=nvidia/cuda:12.9.1-devel-ubuntu24.04" >> "$GITHUB_OUTPUT";; 128) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index f1c53f8fb8..f859a51667 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -53,13 +53,13 @@ jobs: - config: [gcc, 14, -py, 313, -nomd, -nohpmc] - config: [gcc, 10, -py, 310, -mpi] - - config: [cuda, 124, -py, 313, -mpi] + - config: [cuda, 125, -py, 313, -mpi] validate: true - - config: [cuda, 124, -py, 313] + - config: [cuda, 125, -py, 313] validate: true - - config: [cuda, 124, -py, 313, -mpi, -debug] + - config: [cuda, 125, -py, 313, -mpi, -debug] release: @@ -95,10 +95,10 @@ jobs: - config: [gcc, 13, -py, 312, -mpi] - config: [gcc, 12, -py, 311, -mpi] - config: [gcc, 11, -py, 310, -mpi] - - config: [cuda, 125, -py, 313, -mpi] - config: [cuda, 126, -py, 313, -mpi] - config: [cuda, 128, -py, 313, -mpi] - config: [cuda, 129, -py, 313, -mpi] + - config: [cuda, 130, -py, 313, -mpi] tests_complete: name: Unit test From 54de51ba448dbf412e66f407e05fd7141b2b77cc Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Thu, 20 Nov 2025 14:10:36 -0500 Subject: [PATCH 16/20] Remove unused include. --- hoomd/mpcd/ParticleData.cu | 1 - 1 file changed, 1 deletion(-) diff --git a/hoomd/mpcd/ParticleData.cu b/hoomd/mpcd/ParticleData.cu index 5fc9bd950a..3464cff9ee 100644 --- a/hoomd/mpcd/ParticleData.cu +++ b/hoomd/mpcd/ParticleData.cu @@ -13,7 +13,6 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wconversion" #include -#include #include #pragma GCC diagnostic pop From 542c82a01ceb3b83a5321197314923f426f452b3 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Thu, 20 Nov 2025 15:18:48 -0500 Subject: [PATCH 17/20] Some preparation for CUDA 13 --- CMake/hoomd/HOOMDCUDASetup.cmake | 2 +- CMakeLists.txt | 2 +- hoomd/ManagedArray.h | 2 +- hoomd/hpmc/IntegratorHPMCMonoGPU.h | 8 ++++---- hoomd/md/PotentialPair.h | 8 ++++---- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/CMake/hoomd/HOOMDCUDASetup.cmake b/CMake/hoomd/HOOMDCUDASetup.cmake index 3e10c8c6d5..63958e07ad 100644 --- a/CMake/hoomd/HOOMDCUDASetup.cmake +++ b/CMake/hoomd/HOOMDCUDASetup.cmake @@ -9,7 +9,7 @@ if (ENABLE_HIP) endif() # ignore warnings about unused results - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-unused-result -diag-suppress 2810") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-unused-result -Wno-deprecated-declarations -diag-suppress 2810") if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.2) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DCUSPARSE_NEW_API") diff --git a/CMakeLists.txt b/CMakeLists.txt index 90e5f360e8..90eb430c7e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -75,7 +75,7 @@ if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-sign-conversion -Wno-unknown-pragmas -Wno-deprecated-declarations -Wno-unused-result") # suppress warnings regarding HIP's overly complex vector structs - if (CMAKE_COMPILER_IS_GNUCXXH AND OOMD_GPU_PLATFORM STREQUAL "HIP") + if (CMAKE_COMPILER_IS_GNUCXX AND HOOMD_GPU_PLATFORM STREQUAL "HIP") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-class-memaccess") endif() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall") diff --git a/hoomd/ManagedArray.h b/hoomd/ManagedArray.h index 15f5d8ae92..efd026da78 100644 --- a/hoomd/ManagedArray.h +++ b/hoomd/ManagedArray.h @@ -190,7 +190,7 @@ template class ManagedArray if (managed && ptr) { #if defined(__HIP_PLATFORM_NVIDIA__) && (CUDART_VERSION >= 8000) - cudaMemAdvise(ptr, sizeof(T) * N, cudaMemAdviseSetReadMostly, 0); + hipMemAdvise(ptr, sizeof(T) * N, hipMemAdviseSetReadMostly, 0); #endif } } diff --git a/hoomd/hpmc/IntegratorHPMCMonoGPU.h b/hoomd/hpmc/IntegratorHPMCMonoGPU.h index ef41dc1a7f..d6b93d9562 100644 --- a/hoomd/hpmc/IntegratorHPMCMonoGPU.h +++ b/hoomd/hpmc/IntegratorHPMCMonoGPU.h @@ -842,10 +842,10 @@ template void IntegratorHPMCMonoGPU::updateCellWidth() #ifdef __HIP_PLATFORM_NVIDIA__ // set memory hints - cudaMemAdvise(this->m_params.data(), - this->m_params.size() * sizeof(typename Shape::param_type), - cudaMemAdviseSetReadMostly, - 0); + hipMemAdvise(this->m_params.data(), + this->m_params.size() * sizeof(typename Shape::param_type), + hipMemAdviseSetReadMostly, + 0); CHECK_CUDA_ERROR(); #endif diff --git a/hoomd/md/PotentialPair.h b/hoomd/md/PotentialPair.h index e5e3f10cdb..dec76d2784 100644 --- a/hoomd/md/PotentialPair.h +++ b/hoomd/md/PotentialPair.h @@ -372,10 +372,10 @@ PotentialPair::PotentialPair(std::shared_ptr sysdef if (m_pdata->getExecConf()->isCUDAEnabled()) { // m_params is _always_ in unified memory, so memadvise and prefetch - cudaMemAdvise(m_params.data(), - m_params.size() * sizeof(param_type), - cudaMemAdviseSetReadMostly, - 0); + hipMemAdvise(m_params.data(), + m_params.size() * sizeof(param_type), + hipMemAdviseSetReadMostly, + 0); cudaMemPrefetchAsync(m_params.data(), sizeof(param_type) * m_params.size(), m_exec_conf->getGPUId()); From ed342b45636ab112634c1eb3555166fbdbc7d4d8 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Thu, 20 Nov 2025 15:50:55 -0500 Subject: [PATCH 18/20] Document new build requirements. --- BUILDING.rst | 34 +++++++++++------ INSTALLING.rst | 75 +++++++++++++++++++++++++++++++------ sphinx-doc/conf.py | 1 + sphinx-doc/requirements.in | 1 + sphinx-doc/requirements.txt | 3 ++ 5 files changed, 90 insertions(+), 24 deletions(-) diff --git a/BUILDING.rst b/BUILDING.rst index eaad8681bf..12be9acabf 100644 --- a/BUILDING.rst +++ b/BUILDING.rst @@ -124,22 +124,31 @@ Install additional packages needed to build the documentation: **For GPU execution** (required when ``ENABLE_GPU=on``): -- **NVIDIA CUDA Toolkit** +.. tab:: NVIDIA (CUDA) - *OR* + - NVIDIA CUDA Toolkit >= 12.5 + - `hip`_ + - `hipcub`_ -- AMD ROCm -- HIP [with ``hipcc`` and ``hcc`` as backend] -- rocFFT -- rocPRIM -- rocThrust -- hipCUB -- roctracer-dev + .. note:: -.. note:: + Use hip ``hip-version_7.2.53220`` and hipcub ``rocm-7.1.0`` work with CUDA 12.9. + Apply the patches in `.github/workflows`_ to add support for CUDA 12.5, 12.6, + and 12.8. + +.. tab:: AMD (HIP) + + - AMD ROCm + - HIP [with ``hipcc`` and ``hcc`` as backend] + - rocFFT + - rocPRIM + - rocThrust + - hipCUB + - roctracer-dev - When ``ENABLE_GPU=on``, HOOMD-blue will default to CUDA. Set ``HOOMD_GPU_PLATFORM=HIP`` to - choose HIP. +.. _`hip`: https://rocmdocs.amd.com/projects/HIP/en/latest/install/build.html +.. _`hipcub`: https://rocm.docs.amd.com/projects/hipCUB/en/latest/install/hipCUB-install-overview.html +.. _`.github/workflows`: https://github.com/glotzerlab/hoomd-blue/tree/v5.4.0/.github/workflows **To build the documentation:** @@ -147,6 +156,7 @@ Install additional packages needed to build the documentation: - **furo** - **nbsphinx** - **ipython** +- **sphinx-inline-tabs** .. _Obtain the source: diff --git a/INSTALLING.rst b/INSTALLING.rst index ddf9278ed9..6a669807b5 100644 --- a/INSTALLING.rst +++ b/INSTALLING.rst @@ -20,32 +20,83 @@ Serial CPU and single GPU builds *linux-64*, *osx-64*, and *osx-arm64* platforms. Install the ``hoomd`` package from the conda-forge_ channel: -.. code-block:: bash +.. tab:: Pixi - micromamba install hoomd=5.4.0 + .. code-block:: bash + + pixi add hoomd=5.4.0 + +.. tab:: Micromamba + + .. code-block:: bash + + micromamba install hoomd=5.4.0 + +.. tab:: Mamba + + .. code-block:: bash + + mamba install hoomd=5.4.0 .. _conda-forge: https://conda-forge.org/docs/user/introduction.html By default, micromamba auto-detects whether your system has a GPU and attempts to install the appropriate package. Override this and force the GPU enabled package installation with: -.. code-block:: bash +.. tab:: Pixi + + Add: + + .. code-block:: toml + + [system-requirements] + cuda = "12.9" + + See `Using CUDA in Pixi`_ for more details. Then run: + + .. code-block:: bash + + export CONDA_OVERRIDE_CUDA="12.9" + pixi add "hoomd=5.4.0=*gpu*" - export CONDA_OVERRIDE_CUDA="12.6" - micromamba install "hoomd=5.4.0=*gpu*" "cuda-version=12.6" +.. tab:: Micromamba + + .. code-block:: bash + + export CONDA_OVERRIDE_CUDA="12.9" + micromamba install "hoomd=5.4.0=*gpu*" "cuda-version=12.9" + +.. tab:: Mamba + + .. code-block:: bash + + export CONDA_OVERRIDE_CUDA="12.9" + mamba install "hoomd=5.4.0=*gpu*" "cuda-version=12.9" + +.. _Using CUDA in Pixi: https://pixi.sh/dev/workspace/system_requirements/#using-cuda-in-pixi .. note:: - conda-forge_ may update to a new version of CUDA. If the above command results in an error, - replace ``12.6`` with the version noted in micromamba's error message. + conda-forge_ may update to a new version of CUDA after these instructions are published. + If the above command results in an error, replace ``12.9`` with the version noted in + micromamba's error message. Similarly, you can force CPU-only package installation with: -.. code-block:: bash +.. tab:: Pixi - micromamba install "hoomd=5.4.0=*cpu*" + .. code-block:: bash -.. note:: + pixi add "hoomd=5.4.0=*cpu*" + +.. tab:: Micromamba + + .. code-block:: bash + + micromamba install "hoomd=5.4.0=*cpu*" + +.. tab:: Mamba + + .. code-block:: bash - CUDA 11.8 compatible packages are also available. Replace "12.0" with "11.8" above when - installing HOOMD-blue on systems with CUDA 11 compatible drivers. + mamba install "hoomd=5.4.0=*cpu*" diff --git a/sphinx-doc/conf.py b/sphinx-doc/conf.py index b605ea0d06..414a6672b7 100644 --- a/sphinx-doc/conf.py +++ b/sphinx-doc/conf.py @@ -30,6 +30,7 @@ "sphinx.ext.napoleon", "sphinx.ext.intersphinx", "sphinx.ext.todo", + "sphinx_inline_tabs", ] if find_spec("sphinxcontrib.katex") is not None: diff --git a/sphinx-doc/requirements.in b/sphinx-doc/requirements.in index 3f87c4fa84..181a8c1c3f 100644 --- a/sphinx-doc/requirements.in +++ b/sphinx-doc/requirements.in @@ -9,3 +9,4 @@ sphinxcontrib-googleanalytics sphinxcontrib-katex sphinx-copybutton sphinx-notfound-page +sphinx-inline-tabs diff --git a/sphinx-doc/requirements.txt b/sphinx-doc/requirements.txt index 54405692b1..1ac9acef20 100644 --- a/sphinx-doc/requirements.txt +++ b/sphinx-doc/requirements.txt @@ -146,6 +146,7 @@ sphinx==8.1.3 # nbsphinx # sphinx-basic-ng # sphinx-copybutton + # sphinx-inline-tabs # sphinx-notfound-page # sphinxcontrib-googleanalytics # sphinxcontrib-katex @@ -153,6 +154,8 @@ sphinx-basic-ng==1.0.0b2 # via furo sphinx-copybutton==0.5.2 # via -r sphinx-doc/requirements.in +sphinx-inline-tabs==2023.4.21 + # via -r sphinx-doc/requirements.in sphinx-notfound-page==1.1.0 # via -r sphinx-doc/requirements.in sphinxcontrib-applehelp==2.0.0 From a85e09246b78b59ff973e2c39d3bd1d54471c6e5 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Thu, 20 Nov 2025 16:15:16 -0500 Subject: [PATCH 19/20] Document hip and hipcub dependencies. Also document Pixi installation instructions. --- .bumpversion.toml | 5 +++++ BUILDING.rst | 15 +++++++-------- INSTALLING.rst | 8 +++----- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/.bumpversion.toml b/.bumpversion.toml index 62d9e332b3..50dd62aa40 100644 --- a/.bumpversion.toml +++ b/.bumpversion.toml @@ -6,6 +6,11 @@ filename = "INSTALLING.rst" search = "hoomd={current_version}" replace = "hoomd={new_version}" +[[tool.bumpversion.files]] +filename = "BUILDING.rst" +search = "tree/v{current_version}" +replace = "tree/v{new_version}" + [[tool.bumpversion.files]] filename = "CMakeLists.txt" search = 'HOOMD_VERSION_RAW "{current_version}"' diff --git a/BUILDING.rst b/BUILDING.rst index 12be9acabf..193d9cbc51 100644 --- a/BUILDING.rst +++ b/BUILDING.rst @@ -126,15 +126,14 @@ Install additional packages needed to build the documentation: .. tab:: NVIDIA (CUDA) - - NVIDIA CUDA Toolkit >= 12.5 - - `hip`_ - - `hipcub`_ + - NVIDIA CUDA Toolkit + - hip (`hip installation instructions`_) + - hipcub (`hipcub installation instructions`_) .. note:: - Use hip ``hip-version_7.2.53220`` and hipcub ``rocm-7.1.0`` work with CUDA 12.9. - Apply the patches in `.github/workflows`_ to add support for CUDA 12.5, 12.6, - and 12.8. + hip ``hip-version_7.2.53220`` and hipcub ``rocm-7.1.0`` work with CUDA 12.9. + Apply the patches in `.github/workflows`_ to add support for CUDA 12.5–12.8. .. tab:: AMD (HIP) @@ -146,8 +145,8 @@ Install additional packages needed to build the documentation: - hipCUB - roctracer-dev -.. _`hip`: https://rocmdocs.amd.com/projects/HIP/en/latest/install/build.html -.. _`hipcub`: https://rocm.docs.amd.com/projects/hipCUB/en/latest/install/hipCUB-install-overview.html +.. _`hip installation instructions`: https://rocmdocs.amd.com/projects/HIP/en/latest/install/build.html +.. _`hipcub installation instructions`: https://rocm.docs.amd.com/projects/hipCUB/en/latest/install/hipCUB-install-overview.html .. _`.github/workflows`: https://github.com/glotzerlab/hoomd-blue/tree/v5.4.0/.github/workflows **To build the documentation:** diff --git a/INSTALLING.rst b/INSTALLING.rst index 6a669807b5..539f9747c0 100644 --- a/INSTALLING.rst +++ b/INSTALLING.rst @@ -45,20 +45,20 @@ appropriate package. Override this and force the GPU enabled package installatio .. tab:: Pixi - Add: + First add the following to your ``pixi.toml`` file: .. code-block:: toml [system-requirements] cuda = "12.9" - See `Using CUDA in Pixi`_ for more details. Then run: + Then, add hoomd with: .. code-block:: bash - export CONDA_OVERRIDE_CUDA="12.9" pixi add "hoomd=5.4.0=*gpu*" + .. tab:: Micromamba .. code-block:: bash @@ -73,8 +73,6 @@ appropriate package. Override this and force the GPU enabled package installatio export CONDA_OVERRIDE_CUDA="12.9" mamba install "hoomd=5.4.0=*gpu*" "cuda-version=12.9" -.. _Using CUDA in Pixi: https://pixi.sh/dev/workspace/system_requirements/#using-cuda-in-pixi - .. note:: conda-forge_ may update to a new version of CUDA after these instructions are published. From 4811aa6deb32f54c82e23d6db171282541a8a03f Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Thu, 20 Nov 2025 16:50:02 -0500 Subject: [PATCH 20/20] Disable cuda 13 tests. --- .github/workflows/test.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index f859a51667..acb1d4decf 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -98,7 +98,6 @@ jobs: - config: [cuda, 126, -py, 313, -mpi] - config: [cuda, 128, -py, 313, -mpi] - config: [cuda, 129, -py, 313, -mpi] - - config: [cuda, 130, -py, 313, -mpi] tests_complete: name: Unit test