From 7ae078b4174279aed8ef8ab4d1cd433eb71e4e53 Mon Sep 17 00:00:00 2001 From: Chenjie Luo <108829653+cjluo-nv@users.noreply.github.com> Date: Mon, 6 Oct 2025 11:21:38 -0700 Subject: [PATCH] Explicitly register real quant gemms Signed-off-by: Chenjie Luo <108829653+cjluo-nv@users.noreply.github.com> --- .../torch/quantization/backends/__init__.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/modelopt/torch/quantization/backends/__init__.py b/modelopt/torch/quantization/backends/__init__.py index 92317d92b..c4c2fadd5 100644 --- a/modelopt/torch/quantization/backends/__init__.py +++ b/modelopt/torch/quantization/backends/__init__.py @@ -15,5 +15,18 @@ """Quantization backends.""" -from .gemm_registry import * -from .nvfp4_gemm import * +from .fp8_per_tensor_gemm import Fp8PerTensorLinear, _fp8_availability_check +from .gemm_registry import gemm_registry +from .nvfp4_gemm import Nvfp4Linear, _nvfp4_availability_check + +# Register default implementations +gemm_registry.register( + gemm_func=Fp8PerTensorLinear.apply, + availability_check=_fp8_availability_check, +) + +# Register default implementations +gemm_registry.register( + gemm_func=Nvfp4Linear.apply, + availability_check=_nvfp4_availability_check, +)