From fe2bd01967f29389679c2d099d8d0b509b5fd83b Mon Sep 17 00:00:00 2001 From: Duncan Moss Date: Wed, 22 Oct 2025 21:36:50 -0700 Subject: [PATCH 1/3] feat: enable deepgemm jit for fp8 block-scale Signed-off-by: Duncan Moss --- csrc/nv_internal/tensorrt_llm/deep_gemm/compiler.cuh | 9 ++------- csrc/nv_internal/tensorrt_llm/deep_gemm/runtime.cuh | 9 +++++++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/csrc/nv_internal/tensorrt_llm/deep_gemm/compiler.cuh b/csrc/nv_internal/tensorrt_llm/deep_gemm/compiler.cuh index 3b7ed113e7..04bae9fd43 100644 --- a/csrc/nv_internal/tensorrt_llm/deep_gemm/compiler.cuh +++ b/csrc/nv_internal/tensorrt_llm/deep_gemm/compiler.cuh @@ -125,7 +125,7 @@ std::vector getJitIncludeDirs() { static std::vector includeDirs; if (includeDirs.empty()) { // Command to execute - char const* cmd = "pip show tensorrt_llm 2>/dev/null"; + char const* cmd = "pip show flashinfer-python 2>/dev/null"; // Buffer to store the output std::array buffer; @@ -174,12 +174,7 @@ std::vector getJitIncludeDirs() { location.erase(location.find_last_not_of(" \n\r\t") + 1); // Set the include directory based on the package location - includeDirs.push_back(std::filesystem::path(location) / "tensorrt_llm" / "include"); - - if (!kJitUseNvcc) { - includeDirs.push_back(std::filesystem::path(location) / "tensorrt_llm" / "include" / - "cuda" / "include"); - } + includeDirs.push_back(std::filesystem::path(location) / "flashinfer" / "data" / "csrc" / "nv_internal" / "tensorrt_llm"); } } else { TLLM_LOG_WARNING("Failed to find TensorRT LLM installation, DeepGEMM will be disabled."); diff --git a/csrc/nv_internal/tensorrt_llm/deep_gemm/runtime.cuh b/csrc/nv_internal/tensorrt_llm/deep_gemm/runtime.cuh index 35af1fcd23..f4e6ab124e 100644 --- a/csrc/nv_internal/tensorrt_llm/deep_gemm/runtime.cuh +++ b/csrc/nv_internal/tensorrt_llm/deep_gemm/runtime.cuh @@ -36,8 +36,13 @@ static bool kJitDebugging = []() { }(); static bool kJitUseNvcc = []() { - char const* env_var = getenv("TRTLLM_DG_JIT_USE_NVCC"); - return env_var && (std::string(env_var) == "1" || std::string(env_var) == "true"); + // char const* env_var = getenv("TRTLLM_DG_JIT_USE_NVCC"); + // return env_var && (std::string(env_var) == "1" || std::string(env_var) == "true"); + // always use nvcc + // TODO: Enable nvrtc -- need these headers: + // [TensorRT-LLM][INFO] Compilation log: + // kernel.cu(16): catastrophic error: cannot open source file "cuda_bf16.h" + return true; }(); static bool kJitDumpCubin = []() { From c9a1a1ab0d1628829383bc4beb1096eae8f9fc93 Mon Sep 17 00:00:00 2001 From: Duncan Moss Date: Wed, 22 Oct 2025 21:39:07 -0700 Subject: [PATCH 2/3] pre-commit Signed-off-by: Duncan Moss --- csrc/nv_internal/tensorrt_llm/deep_gemm/compiler.cuh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/csrc/nv_internal/tensorrt_llm/deep_gemm/compiler.cuh b/csrc/nv_internal/tensorrt_llm/deep_gemm/compiler.cuh index 04bae9fd43..2aed11a76c 100644 --- a/csrc/nv_internal/tensorrt_llm/deep_gemm/compiler.cuh +++ b/csrc/nv_internal/tensorrt_llm/deep_gemm/compiler.cuh @@ -174,7 +174,8 @@ std::vector getJitIncludeDirs() { location.erase(location.find_last_not_of(" \n\r\t") + 1); // Set the include directory based on the package location - includeDirs.push_back(std::filesystem::path(location) / "flashinfer" / "data" / "csrc" / "nv_internal" / "tensorrt_llm"); + includeDirs.push_back(std::filesystem::path(location) / "flashinfer" / "data" / "csrc" / + "nv_internal" / "tensorrt_llm"); } } else { TLLM_LOG_WARNING("Failed to find TensorRT LLM installation, DeepGEMM will be disabled."); From aa0421e502fbbc1d6504ea0126e2f7e33615b490 Mon Sep 17 00:00:00 2001 From: Duncan Moss Date: Wed, 22 Oct 2025 21:46:30 -0700 Subject: [PATCH 3/3] gemini comment Signed-off-by: Duncan Moss --- csrc/nv_internal/tensorrt_llm/deep_gemm/compiler.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csrc/nv_internal/tensorrt_llm/deep_gemm/compiler.cuh b/csrc/nv_internal/tensorrt_llm/deep_gemm/compiler.cuh index 2aed11a76c..9222bf19d2 100644 --- a/csrc/nv_internal/tensorrt_llm/deep_gemm/compiler.cuh +++ b/csrc/nv_internal/tensorrt_llm/deep_gemm/compiler.cuh @@ -178,7 +178,7 @@ std::vector getJitIncludeDirs() { "nv_internal" / "tensorrt_llm"); } } else { - TLLM_LOG_WARNING("Failed to find TensorRT LLM installation, DeepGEMM will be disabled."); + TLLM_LOG_WARNING("Failed to find FlashInfer installation, DeepGEMM will be disabled."); } } return includeDirs;