diff --git a/CMakeLists.txt b/CMakeLists.txt index cf59caa1da..f615275e22 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -43,7 +43,6 @@ set(CMAKE_SKIP_INSTALL_RPATH FALSE) # Compiler flags - defined as lists for cleaner management set(WARNING_FLAGS "-Wall" - "-Wextra" "-Winit-self" "-Wno-switch-bool" "-Wunused-function" @@ -232,6 +231,10 @@ if(FLASHINFER_ENABLE_HIP) endif() +if(FLASHINFER_UNITTESTS) + enable_testing() +endif() + # Add library subdirectories add_subdirectory(libflashinfer) diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 187a48a64e..30737b25a3 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -21,15 +21,9 @@ endif() # === Test Dependencies === if(FLASHINFER_UNITTESTS) - include(FetchContent) - - # Google Test for unit testing - FetchContent_Declare( - googletest - GIT_REPOSITORY https://github.com/google/googletest.git - GIT_TAG 6910c9d9165801d8827d628cb72eb7ea9dd538c5 # release-1.16.0 - FIND_PACKAGE_ARGS NAMES GTest) - FetchContent_MakeAvailable(googletest) + find_package(GTest REQUIRED) + include(GoogleTest) + message(STATUS "Found GoogleTest: ${GTEST_INCLUDE_DIRS}") endif() # === Benchmark Dependencies === diff --git a/cmake/Options.cmake b/cmake/Options.cmake index bebcada6e6..74c8a74d8b 100644 --- a/cmake/Options.cmake +++ b/cmake/Options.cmake @@ -73,10 +73,10 @@ flashinfer_option(FLASHINFER_PY_LIMITED_API "Use Python's limited API for better flashinfer_option(FLASHINFER_MIN_PYTHON_ABI "Minimum Python ABI version for limited API compatibility" "3.9") # === CUDA OPTIONS === -flashinfer_option(FLASHINFER_ENABLE_CUDA "Enable NVIDIA CUDA backend" ON) +flashinfer_option(FLASHINFER_ENABLE_CUDA "Enable NVIDIA CUDA backend" OFF) # === HIP/ROCm OPTIONS === -flashinfer_option(FLASHINFER_ENABLE_HIP "Enable AMD HIP/ROCm backend" OFF) +flashinfer_option(FLASHINFER_ENABLE_HIP "Enable AMD HIP/ROCm backend" ON) flashinfer_option(FLASHINFER_HIP_ARCHITECTURES "AMD GPU architectures to target" "") # === AUTO-DERIVED OPTIONS === @@ -112,22 +112,24 @@ if(FLASHINFER_ENABLE_HIP AND FLASHINFER_ENABLE_CUDA) endif() # Handle CUDA architectures -if(FLASHINFER_CUDA_ARCHITECTURES) - message(STATUS "CMAKE_CUDA_ARCHITECTURES set to ${FLASHINFER_CUDA_ARCHITECTURES}.") -else() - # No user-provided architectures, try to detect the CUDA archs based on where - # the project is being built - set(detected_archs "") - detect_cuda_architectures(detected_archs) - if(detected_archs) - set(FLASHINFER_CUDA_ARCHITECTURES ${detected_archs} CACHE STRING - "CUDA architectures" FORCE) - message(STATUS "Setting FLASHINFER_CUDA_ARCHITECTURES to detected values: ${FLASHINFER_CUDA_ARCHITECTURES}") +if(FLASHINFER_ENABLE_CUDA) + if(FLASHINFER_CUDA_ARCHITECTURES) + message(STATUS "CMAKE_CUDA_ARCHITECTURES set to ${FLASHINFER_CUDA_ARCHITECTURES}.") else() - # No architectures detected, use safe defaults - set(FLASHINFER_CUDA_ARCHITECTURES "75;80;86" CACHE STRING - "CUDA architectures to compile for" FORCE) - message(STATUS "No architectures detected, using defaults: ${FLASHINFER_CUDA_ARCHITECTURES}") + # No user-provided architectures, try to detect the CUDA archs based on where + # the project is being built + set(detected_archs "") + detect_cuda_architectures(detected_archs) + if(detected_archs) + set(FLASHINFER_CUDA_ARCHITECTURES ${detected_archs} CACHE STRING + "CUDA architectures" FORCE) + message(STATUS "Setting FLASHINFER_CUDA_ARCHITECTURES to detected values: ${FLASHINFER_CUDA_ARCHITECTURES}") + else() + # No architectures detected, use safe defaults + set(FLASHINFER_CUDA_ARCHITECTURES "75;80;86" CACHE STRING + "CUDA architectures to compile for" FORCE) + message(STATUS "No architectures detected, using defaults: ${FLASHINFER_CUDA_ARCHITECTURES}") + endif() endif() endif() diff --git a/cmake/utils/ConfigureTargets.cmake b/cmake/utils/ConfigureTargets.cmake index 8819c5f289..b66b8faacc 100644 --- a/cmake/utils/ConfigureTargets.cmake +++ b/cmake/utils/ConfigureTargets.cmake @@ -1,7 +1,7 @@ # cmake-format: off # Common configuration function for tests and benchmarks function(configure_flashinfer_target) - set(options IS_GTEST IS_BENCHMARK) + set(options IS_GTEST IS_BENCHMARK IS_HIP) set(oneValueArgs TARGET_NAME) set(multiValueArgs SOURCES LINK_LIBS COMPILE_FLAGS INCLUDE_DIRS) @@ -27,6 +27,10 @@ function(configure_flashinfer_target) # Create executable target add_executable(${arg_TARGET_NAME} EXCLUDE_FROM_ALL ${arg_SOURCES}) + if(arg_IS_HIP) + set_source_files_properties(${arg_SOURCES} PROPERTIES LANGUAGE HIP) + endif() + # Add all include directories target_include_directories( ${arg_TARGET_NAME} @@ -40,9 +44,6 @@ function(configure_flashinfer_target) target_include_directories(${arg_TARGET_NAME} PRIVATE ${extra_include_dir}) endforeach() - # Add dispatch_inc dependency - add_dependencies(${arg_TARGET_NAME} dispatch_inc) - # Add benchmark-specific library for benchmarks if(arg_IS_BENCHMARK) target_link_libraries(${arg_TARGET_NAME} PRIVATE nvbench::main) @@ -59,10 +60,7 @@ function(configure_flashinfer_target) # Add Google Test libraries if required if(arg_IS_GTEST) - target_include_directories(${arg_TARGET_NAME} PRIVATE - ${gtest_SOURCE_DIR}/include - ${gtest_SOURCE_DIR}) - target_link_libraries(${arg_TARGET_NAME} PRIVATE gtest gtest_main) + target_link_libraries(${arg_TARGET_NAME} PRIVATE GTest::gtest GTest::gtest_main Threads::Threads) endif() # Register with CTest if it's a test diff --git a/libflashinfer/CMakeLists.txt b/libflashinfer/CMakeLists.txt index 44329cc6b7..aaa8d11972 100644 --- a/libflashinfer/CMakeLists.txt +++ b/libflashinfer/CMakeLists.txt @@ -64,7 +64,9 @@ message(STATUS "HEAD_DIMS_SM90=${HEAD_DIMS_SM90}") # the DECODE_KERNELS_SRCS, PREFILL_KERNELS_SRCS, and DISPATCH_INC_FILE # variables. include(ConfigureKernelGeneration) -flashinfer_configure_kernel_generation() +if(FLASHINFER_BUILD_KERNELS) + flashinfer_configure_kernel_generation() +endif() # ---------------------------------------------------------------------------# # Set the install path for the libflashinfer headers based on whether the build @@ -97,10 +99,7 @@ flashinfer_generate_config_header( # ---------------------------------------------------------------------------# # Build decode_kernels and prefill_kernels if needed -if(FLASHINFER_BUILD_KERNELS - OR FLASHINFER_UNITTESTS - OR FLASHINFER_CXX_BENCHMARKS) - +if(FLASHINFER_BUILD_KERNELS) set(FLASHINFER_KERNEL_TARGETS "") add_library(decode_kernels STATIC ${DECODE_KERNELS_SRCS}) diff --git a/libflashinfer/tests/CMakeLists.txt b/libflashinfer/tests/CMakeLists.txt index 630ca61eea..5262f6625b 100644 --- a/libflashinfer/tests/CMakeLists.txt +++ b/libflashinfer/tests/CMakeLists.txt @@ -4,151 +4,25 @@ set(ALL_TEST_TARGETS "") # Include centralized config utilities include(ConfigureTargets) -# Include GoogleTest to enable test discovery -include(GoogleTest) find_package(Threads REQUIRED) # cmake-format: off -# === Core Tests Configuration === - -# Single and batch decode tests -configure_flashinfer_target( - TARGET_NAME test_single_decode - SOURCES "test_single_decode.cu" - LINK_LIBS "decode_kernels" - IS_GTEST ON -) - -configure_flashinfer_target( - TARGET_NAME test_batch_decode - SOURCES "test_batch_decode.cu" - LINK_LIBS "decode_kernels" - IS_GTEST ON -) - -# Single and batch prefill tests -configure_flashinfer_target( - TARGET_NAME test_single_prefill - SOURCES "test_single_prefill.cu" - LINK_LIBS "prefill_kernels" - IS_GTEST ON -) - -configure_flashinfer_target( - TARGET_NAME test_batch_prefill - SOURCES "test_batch_prefill.cu" - LINK_LIBS "prefill_kernels" - IS_GTEST ON -) - -# Other core tests -configure_flashinfer_target( - TARGET_NAME test_page - SOURCES "test_page.cu" - IS_GTEST ON -) - -configure_flashinfer_target( - TARGET_NAME test_cascade - SOURCES "test_cascade.cu" - LINK_LIBS "decode_kernels;prefill_kernels" - IS_GTEST ON -) - -configure_flashinfer_target( - TARGET_NAME test_sampling - SOURCES "test_sampling.cu" - IS_GTEST ON -) - -configure_flashinfer_target( - TARGET_NAME test_norm - SOURCES "test_norm.cu" - IS_GTEST ON -) - -configure_flashinfer_target( - TARGET_NAME test_fastdiv - SOURCES "test_fastdiv.cu" - IS_GTEST ON -) - -configure_flashinfer_target( - TARGET_NAME test_fast_dequant - SOURCES "test_fast_dequant.cu" - IS_GTEST ON -) - -# === Distributed Test Configuration === -if(FLASHINFER_DIST_UNITTESTS) - set(DIST_INCLUDE_DIRS - "${FLASHINFER_INCLUDE_DIR}" - "${mscclpp_SOURCE_DIR}/include" - ) - - # Add spdlog include directory if available - if(DEFINED SPDLOG_INCLUDE_DIR) - list(APPEND DIST_INCLUDE_DIRS "${SPDLOG_INCLUDE_DIR}") - elseif(TARGET spdlog::spdlog) - # If found via find_package - get_target_property(SPDLOG_INCLUDE_DIRS spdlog::spdlog INTERFACE_INCLUDE_DIRECTORIES) - list(APPEND DIST_INCLUDE_DIRS "${SPDLOG_INCLUDE_DIRS}") - endif() - - configure_flashinfer_target( - TARGET_NAME test_sum_all_reduce - SOURCES "test_sum_all_reduce.cu" - LINK_LIBS "MPI::MPI_CXX;flashinfer::mscclpp" - COMPILE_FLAGS "-DENABLE_MPI" - INCLUDE_DIRS "${DIST_INCLUDE_DIRS}" - IS_GTEST OFF - ) - - configure_flashinfer_target( - TARGET_NAME test_attn_all_reduce - SOURCES "test_attn_all_reduce.cu" - LINK_LIBS "MPI::MPI_CXX;flashinfer::mscclpp" - COMPILE_FLAGS "-DENABLE_MPI" - IS_GTEST OFF - ) -endif() - -# === FP8 Test Configuration === -if(FLASHINFER_FP8_TESTS) - # Set path to FP8 utilities - set(FP8_UTILS_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../utils/fp8) - - # Define FP8-specific CUDA compile flags - one per line for readability - set(FP8_CUDA_FLAGS_LIST - --expt-extended-lambda - --use_fast_math - --generate-code=arch=compute_90a,code=sm_90a - ) - - # Combine the list into a generator expression for CUDA language only - string(JOIN ";" FP8_CUDA_FLAGS_JOINED "${FP8_CUDA_FLAGS_LIST}") - set(FP8_CUDA_FLAGS "$<$:${FP8_CUDA_FLAGS_JOINED}>") - - # Define FP8-specific include directories - set(FP8_INCLUDE_DIRS - "${TORCH_INCLUDE_DIRS}" - "${CUDA_INCLUDE_DIRS}" - "${Python3_INCLUDE_DIRS}" - "${FA3_INCLUDE_DIR}" - "${CUTLASS_INCLUDE_DIRS}" - "${FP8_UTILS_DIR}" - ) - - # Add the FP8 test with improved readability - configure_flashinfer_target( - TARGET_NAME test_single_prefill_fa3_sm90 - SOURCES "fp8/test_single_prefill_fa3_sm90.cu" - LINK_LIBS "FA3_LIB;${TORCH_LIBRARIES}" - COMPILE_FLAGS "${FP8_CUDA_FLAGS}" - INCLUDE_DIRS "${FP8_INCLUDE_DIRS}" - ) -endif() - +# === HIP C++ Unit Tests Configuration === +if(FLASHINFER_ENABLE_HIP) + file(GLOB HIP_TEST_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/hip/*.cpp") + + foreach(test_source IN LISTS HIP_TEST_SOURCES) + get_filename_component(test_name ${test_source} NAME_WE) + set(target_name "${test_name}_hip") + configure_flashinfer_target( + TARGET_NAME ${target_name} + SOURCES ${test_source} + IS_GTEST ON + IS_HIP ON + ) + endforeach() + +endif(FLASHINFER_ENABLE_HIP) # cmake-format: on # === Test Discovery and Targets === @@ -164,10 +38,3 @@ endforeach() # Create target to build all tests add_custom_target(build_tests) add_dependencies(build_tests ${ALL_TEST_TARGETS}) - -# Setup "check" target similar to autotools -set(CMAKE_CTEST_COMMAND ctest --progress --output-on-failure) -add_custom_target(check COMMAND ${CMAKE_COMMAND} ${CMAKE_CTEST_COMMAND}) -add_dependencies(check build_tests) - -enable_testing() diff --git a/libflashinfer/tests/hip/CMakeLists.txt b/libflashinfer/tests/hip/CMakeLists.txt deleted file mode 100644 index 91e0b0101b..0000000000 --- a/libflashinfer/tests/hip/CMakeLists.txt +++ /dev/null @@ -1,184 +0,0 @@ -# SPDX - FileCopyrightText : 2025 Advanced Micro Devices, Inc. -# -# SPDX - License - Identifier : Apache 2.0 - -cmake_minimum_required(VERSION 3.21) - -project(FlashInferTests LANGUAGES CXX HIP) - -set(CMAKE_CXX_STANDARD 17) -set(CMAKE_CXX_STANDARD_REQUIRED ON) - -include(FetchContent) -FetchContent_Declare( - googletest - GIT_REPOSITORY https://github.com/google/googletest.git - GIT_TAG 6910c9d9165801d8827d628cb72eb7ea9dd538c5) -FetchContent_MakeAvailable(googletest) -get_target_property(gtest_include_dirs GTest::gtest - INTERFACE_INCLUDE_DIRECTORIES) - -# GoogleTest has to be included so that gtest_discover_tests works Thread -# support is needed for gtest -find_package(Threads REQUIRED) - -find_package(HIP REQUIRED) -set(CMAKE_HIP_COMPILER ${HIP_HIPCC_EXECUTABLE}) -set(HIP_HIPCC_FLAGS - "${HIP_HIPCC_FLAGS} -O3 --offload-arch=gfx942 -D__HIP_NO_HALF_OPERATORS__=0 -D__HIP_NO_HALF_CONVERSIONS__=0" -) - -# clang-format off Add flashinfer directory here Example: -# set(FLASHINFER_INCLUDE_DIRS /root/flashinfer/libflashinfer/include/) -set(FLASHINFER_INCLUDE_DIRS - "" - CACHE PATH "Path to FlashInfer include directory") -# clang-format on - -include_directories(${HIP_INCLUDE_DIRS}) -include_directories("/opt/rocm/include") # Add this explicit include path -include_directories(${FLASHINFER_INCLUDE_DIRS}) -include_directories(${CMAKE_CURRENT_SOURCE_DIR}) - -# find_package(GTest REQUIRED) -include_directories(${GTEST_INCLUDE_DIRS}) - -# Add definition to use HIP API -add_definitions(-D__HIP_PLATFORM_AMD__) - -add_library(math STATIC ${FLASHINFER_INCLUDE_DIRS}/gpu_iface/math_ops.hpp) -add_library(layout STATIC ${FLASHINFER_INCLUDE_DIRS}/gpu_iface/layout.cuh) -add_library(pos_enc STATIC - ${FLASHINFER_INCLUDE_DIRS}/flashinfer/attention/generic/pos_enc.cuh) -add_library(state STATIC - ${FLASHINFER_INCLUDE_DIRS}/flashinfer/attention/generic/state.cuh) -add_library(cascade STATIC - ${FLASHINFER_INCLUDE_DIRS}/flashinfer/attention/generic/cascade.cuh) -add_library(decode STATIC - ${FLASHINFER_INCLUDE_DIRS}/flashinfer/attention/generic/decode.cuh) - -add_library(mma_ops STATIC ${FLASHINFER_INCLUDE_DIRS}/gpu_iface/mma_ops.hpp) - -set(CMAKE_HIP_FLAGS - "${CMAKE_HIP_FLAGS} -O3 -g -D__HIP_PLATFORM_AMD__ -D__HIP_NO_HALF_OPERATORS__=0 -D__HIP_NO_HALF_CONVERSIONS__=0 -DPLATFORM_HIP_DEVICE -DHIP_ENABLE_WARP_SYNC_BUILTINS=1" -) - -set_target_properties( - math - PROPERTIES HIP_SOURCES_PROPERTY_FORMAT 1 - HIP_SEPARABLE_COMPILATION ON - LINKER_LANGUAGE HIP) - -set_target_properties( - layout - PROPERTIES HIP_SOURCES_PROPERTY_FORMAT 1 - HIP_SEPARABLE_COMPILATION ON - LINKER_LANGUAGE HIP) - -set_target_properties( - pos_enc - PROPERTIES HIP_SOURCES_PROPERTY_FORMAT 1 - HIP_SEPARABLE_COMPILATION ON - LINKER_LANGUAGE HIP) - -set_target_properties( - state - PROPERTIES HIP_SOURCES_PROPERTY_FORMAT 1 - HIP_SEPARABLE_COMPILATION ON - LINKER_LANGUAGE HIP) - -set_target_properties( - cascade - PROPERTIES HIP_SOURCES_PROPERTY_FORMAT 1 - HIP_SEPARABLE_COMPILATION ON - LINKER_LANGUAGE HIP) - -set_target_properties( - decode - PROPERTIES HIP_SOURCES_PROPERTY_FORMAT 1 - HIP_SEPARABLE_COMPILATION ON - LINKER_LANGUAGE HIP) - -set_target_properties( - mma_ops - PROPERTIES HIP_SOURCES_PROPERTY_FORMAT 1 - HIP_SEPARABLE_COMPILATION ON - LINKER_LANGUAGE HIP) - -target_link_libraries(math PUBLIC hip::host hip::device) -target_link_libraries(layout PUBLIC hip::host hip::device) -target_link_libraries(pos_enc PUBLIC hip::host hip::device) -target_link_libraries(state PUBLIC hip::host hip::device) -target_link_libraries(cascade PUBLIC hip::host hip::device) -target_link_libraries( - decode - PUBLIC hip::host - hip::device - cascade - state - pos_enc - layout - math) - -target_link_libraries(mma_ops PUBLIC hip::host hip::device) - -message("CMAKE_CURRENT_SOURCE_DIR: ${CMAKE_CURRENT_SOURCE_DIR}") - -add_executable(test_math ${CMAKE_CURRENT_SOURCE_DIR}/test_math.cpp) -target_link_libraries(test_math PRIVATE math gtest gtest_main pthread) - -add_executable(test_pos_enc ${CMAKE_CURRENT_SOURCE_DIR}/test_pos_enc.cpp) -target_link_libraries(test_pos_enc PRIVATE pos_enc gtest gtest_main pthread) - -add_executable(test_cascade ${CMAKE_CURRENT_SOURCE_DIR}/test_cascade.cpp) -target_link_libraries(test_cascade PRIVATE state cascade gtest gtest_main - pthread) - -add_executable(test_single_decode - ${CMAKE_CURRENT_SOURCE_DIR}/test_single_decode.cpp) -target_link_libraries(test_single_decode PRIVATE decode gtest gtest_main - pthread) - -add_executable(test_batch_decode - ${CMAKE_CURRENT_SOURCE_DIR}/test_batch_decode.cpp) -target_link_libraries(test_batch_decode PRIVATE decode gtest gtest_main pthread) - -add_executable(test_mfma_fp32_16x16x16fp16 - ${CMAKE_CURRENT_SOURCE_DIR}/test_mfma_fp32_16x16x16fp16.cpp) -target_link_libraries(test_mfma_fp32_16x16x16fp16 PRIVATE mma_ops gtest - gtest_main pthread) - -add_executable(test_rowsum ${CMAKE_CURRENT_SOURCE_DIR}/test_rowsum.cpp) -target_link_libraries(test_rowsum PRIVATE mma_ops gtest gtest_main pthread) - -add_executable( - test_transpose_4x4_half_registers - ${CMAKE_CURRENT_SOURCE_DIR}/test_transpose_4x4_half_registers.cpp) -target_link_libraries(test_transpose_4x4_half_registers - PRIVATE mma_ops gtest gtest_main pthread) - -if(HIP_FOUND) - set_target_properties(test_math PROPERTIES HIP_SEPARABLE_COMPILATION ON) - set_target_properties(test_pos_enc PROPERTIES HIP_SEPARABLE_COMPILATION ON) - set_target_properties(test_cascade PROPERTIES HIP_SEPARABLE_COMPILATION ON) - set_target_properties(test_single_decode PROPERTIES HIP_SEPARABLE_COMPILATION - ON) - set_target_properties(test_batch_decode PROPERTIES HIP_SEPARABLE_COMPILATION - ON) - set_target_properties(test_mfma_fp32_16x16x16fp16 - PROPERTIES HIP_SEPARABLE_COMPILATION ON) - set_target_properties(test_transpose_4x4_half_registers - PROPERTIES HIP_SEPARABLE_COMPILATION ON) - set_target_properties(test_rowsum PROPERTIES HIP_SEPARABLE_COMPILATION ON) -endif() - -enable_testing() -add_test(NAME MathTest COMMAND test_math) -add_test(NAME PosEncTest COMMAND test_pos_enc) -add_test(NAME CascadeTest COMMAND test_cascade) -add_test(NAME SingleDecodeTest COMMAND test_single_decode) -add_test(NAME BatchDecodeTest COMMAND test_batch_decode) -add_test(NAME test_mfma_fp32_16x16x16fp16 COMMAND test_mfma_fp32_16x16x16fp16) -add_test(NAME test_transpose_4x4_half_registers - COMMAND test_transpose_4x4_half_registers) -add_test(NAME test_rowsum COMMAND test_rowsum)