Skip to content

Commit d2feee8

Browse files
authored
Support HIP/ROCm backends for GPUs (#101)
* Add hydrogen error handling mechanisms * new cuda management infrastructure * everything in rocm compiles i think. linker issues pending * remove override decoration from Element/BlockMatrix functions * patch for finding rocblas; not sure if this is strictly necessary any more * forward kernel arguments by reference * a few tweaks to the CMakeLists * Make sure ROCm and CUDA aren't enabled at the same time. * correct a discrepancy in hipMemcpy2DAsync semantics * clean up HAVE_CUDA macro usage; streamline copy syntax * use nonblocking stream; clean up the mempool * straggler HAVE_CUDA use in include tree * preprocessor macro cleanup in blaslike tests * Remove debugging print statements * add short-circuit returns to copy/fill routines when size is zero * some cleanup * fix some new rocm issues * update aluminum version number * update version number * remove some unneeded CMake * revert changes related to the hip override bug * add support for hipCUB and generalize cublas tensor option * fix annoying clang warnings (that GCC _should_ throw, too, but it doesn't) * address some review comments * fix use of streams that should have been SyncInfos * Clean up device library functions * cleanup timer nonsense in Gemm test * fix some hipCUB linkage * Apply suggestions from code review Co-authored-by: Tim Moon <[email protected]> * Apply suggestions from code review Co-authored-by: Tim Moon <[email protected]> * remove unneeded metafunction. DiHydrogen has a cleaner implementation anyway.
1 parent 48de387 commit d2feee8

File tree

179 files changed

+4733
-1917
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

179 files changed

+4733
-1917
lines changed

CMakeLists.txt

+98-35
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@ endif (__GIT_EXECUTABLE)
5353

5454
# This must be set because version tags
5555
set(HYDROGEN_VERSION_MAJOR 1)
56-
set(HYDROGEN_VERSION_MINOR 3)
57-
set(HYDROGEN_VERSION_PATCH 4)
56+
set(HYDROGEN_VERSION_MINOR 4)
57+
set(HYDROGEN_VERSION_PATCH 0)
5858
set(HYDROGEN_VERSION_MAJOR_MINOR
5959
"${HYDROGEN_VERSION_MAJOR}.${HYDROGEN_VERSION_MINOR}")
6060
set(HYDROGEN_VERSION
@@ -154,20 +154,32 @@ option(Hydrogen_ENABLE_CUDA
154154
"Search for CUDA support and enable related features if found."
155155
OFF)
156156

157-
if (Hydrogen_ENABLE_CUDA)
157+
option(Hydrogen_ENABLE_ROCM
158+
"Search for ROCm/HIP support and enable related features if found."
159+
OFF)
160+
161+
if (Hydrogen_ENABLE_CUDA OR Hydrogen_ENABLE_ROCM)
158162
option(Hydrogen_ENABLE_CUB
159163
"Search for CUB support and enable related features if found."
160164
ON)
161165

162-
option(Hydrogen_ENABLE_CUBLAS_TENSOR_MATH
163-
"Use the cuBLAS tensor operation math."
166+
option(Hydrogen_ENABLE_GPU_TENSOR_MATH
167+
"Use the GPU tensor operations when available."
164168
OFF)
165169

166170
option(Hydrogen_ENABLE_GPU_FP16
167171
"Enable FP16 arithmetic in GPU code."
168172
ON)
169173
endif ()
170174

175+
if (Hydrogen_ENABLE_ROCM AND Hydrogen_ENABLE_CUDA)
176+
message(FATAL_ERROR
177+
"ROCm and CUDA code paths are mutually exclusive. "
178+
"Please enable the one that corresponds to your hardware. "
179+
"If you have mixed hardware, please contact the Hydrogen developers "
180+
"as this would be of great interest.")
181+
endif ()
182+
171183
#
172184
# MEMORY-RELATED OPTIONS
173185
#
@@ -334,8 +346,8 @@ if (Hydrogen_ENABLE_CUDA)
334346
find_package(CUDA REQUIRED) # Enable all the macros
335347
find_package(NVML REQUIRED)
336348

337-
if (Hydrogen_ENABLE_CUBLAS_TENSOR_MATH)
338-
set(HYDROGEN_CUBLAS_USE_TENSOR_OP_MATH TRUE)
349+
if (Hydrogen_ENABLE_GPU_TENSOR_MATH)
350+
set(HYDROGEN_GPU_USE_TENSOR_OP_MATH TRUE)
339351
endif ()
340352

341353
if (Hydrogen_ENABLE_GPU_FP16)
@@ -387,38 +399,64 @@ if (Hydrogen_ENABLE_CUDA)
387399
set(HYDROGEN_HAVE_CUDA FALSE)
388400

389401
endif ()
390-
391402
endif (Hydrogen_ENABLE_CUDA)
392403

393-
set(HYDROGEN_HAVE_GPU ${HYDROGEN_HAVE_CUDA})
404+
if (Hydrogen_ENABLE_ROCM)
405+
set(CMAKE_MODULE_PATH "/opt/rocm/hip/cmake" ${CMAKE_MODULE_PATH})
406+
find_package(HIP REQUIRED)
407+
408+
if (Hydrogen_ENABLE_CUB)
409+
set(CMAKE_PREFIX_PATH "/opt/rocm/hip" ${CMAKE_PREFIX_PATH})
410+
set(HIP_FOUND FALSE)
411+
find_package(HIP CONFIG REQUIRED)
412+
find_package(rocPRIM REQUIRED)
413+
find_package(hipCUB REQUIRED)
414+
set(HYDROGEN_HAVE_CUB TRUE)
415+
else ()
416+
set(HYDROGEN_HAVE_CUB FALSE)
417+
endif ()
418+
419+
if (HIP_FOUND)
420+
set(CMAKE_CXX_EXTENSIONS FALSE)
421+
find_package(ROCBLAS REQUIRED)
422+
set(HYDROGEN_HAVE_ROCM TRUE)
423+
message(STATUS "Found ROCm/HIP toolchain. Using HIP/ROCm.")
424+
else ()
425+
message(FATAL_ERROR "ROCm requested but not found.")
426+
endif ()
427+
endif (Hydrogen_ENABLE_ROCM)
428+
429+
if (HYDROGEN_HAVE_CUDA OR HYDROGEN_HAVE_ROCM)
430+
set(HYDROGEN_HAVE_GPU TRUE)
431+
endif ()
394432

395433
if (Hydrogen_ENABLE_ALUMINUM)
396-
find_package(Aluminum 0.3.0 NO_MODULE
434+
find_package(Aluminum 0.4.0 NO_MODULE
397435
HINTS ${Aluminum_DIR} ${ALUMINUM_DIR} ${AL_DIR}
398436
$ENV{Aluminum_DIR} $ENV{ALUMINUM_DIR} $ENV{AL_DIR}
399437
PATH_SUFFIXES lib64/cmake/aluminum lib/cmake/aluminum
400438
NO_DEFAULT_PATH)
401439
if (NOT Aluminum_FOUND)
402-
find_package(Aluminum 0.3.0 NO_MODULE)
440+
find_package(Aluminum 0.4.0 NO_MODULE)
403441
endif ()
404442

405443
if (Aluminum_FOUND)
406444
set(HYDROGEN_HAVE_ALUMINUM TRUE)
407445
message(STATUS "Found Aluminum: ${Aluminum_DIR}")
408446

409-
if (HYDROGEN_HAVE_CUDA AND AL_HAS_NCCL)
447+
if (HYDROGEN_HAVE_GPU AND AL_HAS_NCCL)
410448
set(HYDROGEN_HAVE_NCCL2 TRUE)
411449
message(STATUS "Aluminum detected with NCCL2 backend support.")
412450
else ()
413451
set(HYDROGEN_HAVE_NCCL2 FALSE)
414-
endif (HYDROGEN_HAVE_CUDA AND AL_HAS_NCCL)
452+
endif (HYDROGEN_HAVE_GPU AND AL_HAS_NCCL)
415453

416-
if (HYDROGEN_HAVE_CUDA AND AL_HAS_MPI_CUDA)
454+
if (HYDROGEN_HAVE_GPU AND AL_HAS_MPI_CUDA)
417455
set(HYDROGEN_HAVE_AL_MPI_CUDA TRUE)
418456
message(STATUS "Aluminum detected with MPI-CUDA backend support.")
419457
else ()
420458
set(HYDROGEN_HAVE_AL_MPI_CUDA FALSE)
421-
endif (HYDROGEN_HAVE_CUDA AND AL_HAS_MPI_CUDA)
459+
endif (HYDROGEN_HAVE_GPU AND AL_HAS_MPI_CUDA)
422460
else ()
423461
set(HYDROGEN_HAVE_ALUMINUM FALSE)
424462
set(HYDROGEN_HAVE_NCCL2 FALSE)
@@ -497,7 +535,12 @@ configure_file("${PROJECT_SOURCE_DIR}/cmake/configure_files/hydrogen_config.h.in
497535
configure_file("${PROJECT_SOURCE_DIR}/doxy/Doxyfile.in"
498536
"${PROJECT_BINARY_DIR}/doxy/Doxyfile")
499537

500-
add_library(Hydrogen_CXX "${HYDROGEN_SOURCES}" "${HYDROGEN_HEADERS}")
538+
if (HYDROGEN_HAVE_ROCM)
539+
hip_add_library(Hydrogen_CXX "${HYDROGEN_SOURCES}" "${HYDROGEN_HEADERS}")
540+
else ()
541+
add_library(Hydrogen_CXX "${HYDROGEN_SOURCES}" "${HYDROGEN_HEADERS}")
542+
endif ()
543+
501544
target_include_directories(Hydrogen_CXX PUBLIC
502545
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
503546
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include/El>
@@ -509,40 +552,60 @@ target_include_directories(Hydrogen_CXX PUBLIC
509552
# be forced to build with that (even though they maybe should)...
510553
target_compile_options(Hydrogen_CXX PRIVATE ${EXTRA_CXX_FLAGS})
511554

512-
target_link_libraries(Hydrogen_CXX PUBLIC ${Aluminum_LIBRARIES})
513-
target_link_libraries(Hydrogen_CXX PUBLIC ${HALF_LIBRARIES})
514-
515-
if (TARGET OpenMP::OpenMP_CXX)
516-
target_link_libraries(Hydrogen_CXX PUBLIC OpenMP::OpenMP_CXX)
517-
endif ()
518-
target_link_libraries(Hydrogen_CXX PUBLIC MPI::MPI_CXX)
519-
target_link_libraries(Hydrogen_CXX PUBLIC LAPACK::lapack)
520-
target_link_libraries(Hydrogen_CXX PUBLIC EP::extended_precision)
521-
522-
target_link_libraries(Hydrogen_CXX PUBLIC ${VTUNE_LIBRARIES})
523-
target_link_libraries(Hydrogen_CXX PUBLIC ${NVTX_LIBRARIES})
524-
if (HYDROGEN_HAVE_CUDA)
525-
target_link_libraries(Hydrogen_CXX PUBLIC cuda::toolkit)
526-
endif ()
555+
target_link_libraries(
556+
Hydrogen_CXX PUBLIC
557+
${Aluminum_LIBRARIES}
558+
${HALF_LIBRARIES}
559+
${VTUNE_LIBRARIES}
560+
${NVTX_LIBRARIES}
561+
${ROCBLAS_LIBRARIES}
562+
$<TARGET_NAME_IF_EXISTS:OpenMP::OpenMP_CXX>
563+
$<TARGET_NAME_IF_EXISTS:MPI::MPI_CXX>
564+
$<TARGET_NAME_IF_EXISTS:LAPACK::lapack>
565+
$<TARGET_NAME_IF_EXISTS:EP::extended_precision>
566+
$<TARGET_NAME_IF_EXISTS:cuda::toolkit>
567+
$<TARGET_NAME_IF_EXISTS:hip::rocprim_hip>
568+
$<TARGET_NAME_IF_EXISTS:hip::hipcub>
569+
)
527570

528571
# Add the CXX library to "Hydrogen"
529572
set(HYDROGEN_LIBRARIES Hydrogen_CXX)
530573

531574
if (HYDROGEN_HAVE_CUDA)
532-
add_library(Hydrogen_CUDA "${HYDROGEN_CUDA_SOURCES}")
575+
add_library(Hydrogen_CUDA "${HYDROGEN_GPU_SOURCES}")
533576
target_include_directories(Hydrogen_CUDA PUBLIC
534577
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
535578
$<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/include>
536579
$<INSTALL_INTERFACE:include>)
537580

538-
target_link_libraries(Hydrogen_CUDA PUBLIC ${HALF_LIBRARIES})
539-
target_link_libraries(Hydrogen_CUDA PUBLIC ${NVTX_LIBRARIES})
540-
target_link_libraries(Hydrogen_CUDA PUBLIC cuda::toolkit)
581+
target_link_libraries(
582+
Hydrogen_CUDA PUBLIC
583+
${HALF_LIBRARIES}
584+
${NVTX_LIBRARIES}
585+
$<TARGET_NAME_IF_EXISTS:cuda::toolkit>
586+
)
541587

542588
target_link_libraries(Hydrogen_CXX PUBLIC Hydrogen_CUDA)
543589
list(APPEND HYDROGEN_LIBRARIES Hydrogen_CUDA)
544590
endif ()
545591

592+
if (HYDROGEN_HAVE_ROCM)
593+
hip_add_library(Hydrogen_ROCM STATIC "${HYDROGEN_GPU_SOURCES}")
594+
target_include_directories(Hydrogen_ROCM PUBLIC
595+
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>
596+
$<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/include>
597+
$<INSTALL_INTERFACE:include>
598+
)
599+
600+
target_link_libraries(Hydrogen_ROCM PUBLIC
601+
${HALF_LIBRARIES}
602+
${ROCBLAS_LIBRARIES}
603+
)
604+
605+
#set_target_properties(Hydrogen_ROCM PROPERTIES LINKER_LANGUAGE CXX)
606+
list(APPEND HYDROGEN_LIBRARIES Hydrogen_ROCM)
607+
endif ()
608+
546609
# Setup the tests
547610
if (Hydrogen_ENABLE_TESTING OR Hydrogen_ENABLE_UNIT_TESTS)
548611
include(CTest)

cmake/configure_files/HydrogenConfig.cmake.in

+20-7
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,14 @@ set(HYDROGEN_MPI_CXX_COMPILER "@MPI_CXX_COMPILER@")
1818
set(MPI_CXX_COMPILER "${HYDROGEN_MPI_CXX_COMPILER}"
1919
CACHE FILEPATH "The MPI CXX compiler.")
2020

21-
set(_OpenMP_DIR "@OpenMP_DIR@")
22-
if (NOT OpenMP_DIR)
23-
set(OpenMP_DIR "${_OpenMP_DIR}")
24-
endif ()
25-
include (FindAndVerifyOpenMP)
21+
set(_HYDROGEN_HAVE_OPENMP @EL_HAVE_OPENMP@)
22+
if (_HYDROGEN_HAVE_OPENMP)
23+
set(_OpenMP_DIR "@OpenMP_DIR@")
24+
if (NOT OpenMP_DIR)
25+
set(OpenMP_DIR "${_OpenMP_DIR}")
26+
endif ()
27+
include (FindAndVerifyOpenMP)
28+
endif (_HYDROGEN_HAVE_OPENMP)
2629
# FIXME: I should do verification to make sure all found features are
2730
# the same.
2831
include (FindAndVerifyMPI)
@@ -33,14 +36,14 @@ set(_HYDROGEN_HAVE_NCCL2 @HYDROGEN_HAVE_NCCL2@)
3336
set(_HYDROGEN_HAVE_AL_MPI_CUDA @HYDROGEN_HAVE_AL_MPI_CUDA@)
3437
if (_HYDROGEN_HAVE_ALUMINUM)
3538
if (NOT Aluminum_FOUND)
36-
find_package(Aluminum 0.3.0 NO_MODULE QUIET
39+
find_package(Aluminum 0.4.0 NO_MODULE QUIET
3740
HINTS ${Aluminum_DIR} ${ALUMINUM_DIR} ${AL_DIR}
3841
$ENV{Aluminum_DIR} $ENV{ALUMINUM_DIR} $ENV{AL_DIR}
3942
PATH_SUFFIXES lib64/cmake/aluminum lib/cmake/aluminum
4043
NO_DEFAULT_PATH)
4144
if (NOT Aluminum_FOUND)
4245
set(Aluminum_DIR "@Aluminum_DIR@")
43-
find_package(Aluminum 0.3.0 NO_MODULE REQUIRED)
46+
find_package(Aluminum 0.4.0 NO_MODULE REQUIRED)
4447
endif ()
4548
endif ()
4649

@@ -56,6 +59,16 @@ if (_HYDROGEN_HAVE_ALUMINUM)
5659
endif ()
5760
endif (_HYDROGEN_HAVE_ALUMINUM)
5861

62+
# ROCm
63+
set(_HYDROGEN_HAVE_ROCM @HYDROGEN_HAVE_ROCM@)
64+
if (_HYDROGEN_HAVE_ROCM)
65+
find_package(HIP REQUIRED)
66+
find_package(ROCBLAS REQUIRED)
67+
68+
# query this beforehand, to set to what it was?
69+
set(CMAKE_CXX_EXTENSIONS FALSE)
70+
endif (_HYDROGEN_HAVE_ROCM)
71+
5972
# CUDA!
6073
set(_HYDROGEN_HAVE_CUDA @HYDROGEN_HAVE_CUDA@)
6174
set(_HYDROGEN_HAVE_CUB @HYDROGEN_HAVE_CUB@)

cmake/configure_files/HydrogenConfigVersion.cmake.in

+4-2
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,15 @@
77
# [0.87 1.0.0)
88
# [1.0.0 1.1.0)
99
# [1.1.0 1.2.0)
10-
# [1.2.0 ???)
10+
# [1.2.0 1.3.0)
11+
# [1.3.0 1.4.0)
12+
# [1.4.0 ???)
1113
#
1214
# IMPORTANT: IF YOU MAKE A BREAKING CHANGE TO HYDROGEN, THE UPDATE
1315
# MUST BE GIVEN A NEW VERSION NUMBER, WHICH THEN MUST BE APPENDED TO
1416
# THIS LIST.
1517

16-
set(_version_compat_ranges 0.0.0 0.87.0 1.0.0 1.1.0 1.2.0)
18+
set(_version_compat_ranges 0.0.0 0.87.0 1.0.0 1.1.0 1.2.0 1.3.0 1.4.0)
1719

1820
# This is the version that has been installed.
1921
set(PACKAGE_VERSION "@HYDROGEN_VERSION@")

cmake/configure_files/hydrogen_config.h.in

+10-5
Original file line numberDiff line numberDiff line change
@@ -33,16 +33,18 @@
3333
#cmakedefine HYDROGEN_HAVE_MKL
3434
#cmakedefine HYDROGEN_HAVE_MKL_GEMMT
3535

36+
#cmakedefine HYDROGEN_HAVE_GPU
37+
3638
// CUDA stuff
3739
#cmakedefine HYDROGEN_HAVE_CUDA
38-
#cmakedefine HYDROGEN_HAVE_CUB
39-
#cmakedefine HYDROGEN_CUBLAS_USE_TENSOR_OP_MATH
40+
41+
// ROCm stuff
42+
#cmakedefine HYDROGEN_HAVE_ROCM
4043

4144
// General GPU stuff
42-
#ifdef HYDROGEN_HAVE_CUDA
43-
#define HYDROGEN_HAVE_GPU
45+
#cmakedefine HYDROGEN_HAVE_CUB
46+
#cmakedefine HYDROGEN_GPU_USE_TENSOR_OP_MATH
4447
#cmakedefine HYDROGEN_GPU_USE_FP16
45-
#endif // HYDROGEN_HAVE_CUDA
4648

4749
// Aluminum stuff
4850
#cmakedefine HYDROGEN_HAVE_ALUMINUM
@@ -62,4 +64,7 @@
6264

6365
#cmakedefine HYDROGEN_DO_BOUNDS_CHECKING
6466

67+
#define H_RESTRICT __restrict__
68+
#define H_PRETTY_FUNCTION __PRETTY_FUNCTION__
69+
6570
#endif /* HYDROGEN_CONFIG_H */

cmake/modules/FindROCBLAS.cmake

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# Find rocBLAS library and supporting header
2+
#
3+
# rocBLAS_DIR or ROCBLAS_DIR[in]: The prefix for rocBLAS
4+
#
5+
# ROCBLAS_INCLUDE_PATH[out,cache]: The include path for rocBLAS
6+
# ROCBLAS_LIBRARY[out,cache]: The rocBLAS library
7+
#
8+
# ROCBLAS_LIBRARIES[out]: The thing to link to for rocBLAS
9+
# ROCBLAS_FOUND[out]: Variable indicating whether rocBLAS has been found
10+
#
11+
# rocm::rocblas: Imported library for rocBLAS
12+
#
13+
14+
find_path(ROCBLAS_INCLUDE_PATH rocblas.h
15+
HINTS ${rocBLAS_DIR} $ENV{rocBLAS_DIR} ${ROCBLAS_DIR} $ENV{ROCBLAS_DIR}
16+
PATH_SUFFIXES include
17+
NO_DEFAULT_PATH
18+
DOC "The rocBLAS include path.")
19+
find_path(ROCBLAS_INCLUDE_PATH rocblas.h)
20+
21+
find_library(ROCBLAS_LIBRARY rocblas
22+
HINTS ${rocBLAS_DIR} $ENV{rocBLAS_DIR} ${ROCBLAS_DIR} $ENV{ROCBLAS_DIR}
23+
PATH_SUFFIXES lib64 lib
24+
NO_DEFAULT_PATH
25+
DOC "The rocBLAS library.")
26+
find_library(ROCBLAS_LIBRARY rocblas)
27+
28+
# Standard handling of the package arguments
29+
include(FindPackageHandleStandardArgs)
30+
find_package_handle_standard_args(Rocblas
31+
REQUIRED_VARS ROCBLAS_LIBRARY ROCBLAS_INCLUDE_PATH)
32+
33+
if (NOT TARGET rocblas::rocblas)
34+
add_library(rocblas::rocblas INTERFACE IMPORTED)
35+
endif ()
36+
37+
if (ROCBLAS_INCLUDE_PATH AND ROCBLAS_LIBRARY)
38+
set_target_properties(rocblas::rocblas PROPERTIES
39+
INTERFACE_INCLUDE_DIRECTORIES
40+
"${ROCBLAS_INCLUDE_PATH};/opt/rocm/hsa/include;/opt/rocm/hip/include"
41+
INTERFACE_LINK_LIBRARIES "${ROCBLAS_LIBRARY}")
42+
endif ()
43+
44+
set(ROCBLAS_LIBRARIES rocblas::rocblas)
45+
mark_as_advanced(ROCBLAS_INCLUDE_PATH)
46+
mark_as_advanced(ROCBLAS_LIBRARY)

include/El/blas_like/level1/AllReduce.hpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,11 @@ void AllReduce(AbstractMatrix<T>& A, mpi::Comm const& comm, mpi::Op op)
6161
case Device::CPU:
6262
AllReduce(static_cast<Matrix<T,Device::CPU>&>(A), comm, op);
6363
break;
64-
#ifdef HYDROGEN_HAVE_CUDA
64+
#ifdef HYDROGEN_HAVE_GPU
6565
case Device::GPU:
6666
AllReduce(static_cast<Matrix<T,Device::GPU>&>(A), comm, op);
6767
break;
68-
#endif // HYDROGEN_HAVE_CUDA
68+
#endif // HYDROGEN_HAVE_GPU
6969
default:
7070
LogicError("AllReduce: Bad device!");
7171
}

0 commit comments

Comments
 (0)