[None][fix] Restrict tinygemm use to certain SMs (#8182)

dongfengy · web-flow · commit 9f2a3ae88cdf · 2025-10-08T17:55:57.000-07:00
Signed-off-by: Dongfeng Yu &lt;dongfengy@nvidia.com&gt;
Signed-off-by: dongfengy &lt;99041270+dongfengy@users.noreply.github.com&gt;
diff --git a/cpp/tensorrt_llm/thop/tinygemm2.cpp b/cpp/tensorrt_llm/thop/tinygemm2.cpp
@@ -30,6 +30,9 @@ namespace torch_ext
 {
 torch::Tensor tinygemm2_forward(torch::Tensor input, torch::Tensor weight, torch::Tensor bias)
 {
+    auto const smVersion = tensorrt_llm::common::getSMVersion();
+    TORCH_CHECK(
+        smVersion == 90 || smVersion == 100 || smVersion == 103, "tinygemm2 only supports SM90, SM100, and SM103.");
     TORCH_CHECK(input.dim() == 2, "input must be 2D");
     TORCH_CHECK(weight.dim() == 2, "weight must be 2D");
     TORCH_CHECK(bias.dim() == 1, "bias must be 1D");
diff --git a/tensorrt_llm/_torch/models/modeling_gpt_oss.py b/tensorrt_llm/_torch/models/modeling_gpt_oss.py
@@ -7,6 +7,7 @@
 from tqdm import tqdm
 from transformers import GptOssConfig
 
+from tensorrt_llm._utils import get_sm_version
 from tensorrt_llm.functional import PositionEmbeddingType, RotaryScalingType
 
 from ..attention_backend import AttentionMetadata
@@ -225,7 +226,9 @@ def _create_ideal_expert_load_balanced_logits(
             dtype=pretrained_config.torch_dtype)
 
     def compute_gate_output(self, x: torch.Tensor) -> torch.Tensor:
-        if x.shape[0] <= MIN_LATENCY_TINYGEMM_NUM_TOKENS:
+        if get_sm_version() in [
+                90, 100, 103
+        ] and x.shape[0] <= MIN_LATENCY_TINYGEMM_NUM_TOKENS:
             weight = self.gate.weight
             bias = self.gate.bias
             g = torch.ops.trtllm.tinygemm2(x, weight, bias)

Original file line number	Diff line number	Diff line change
`@@ -30,6 +30,9 @@ namespace torch_ext`
`30`	`30`	`{`
`31`	`31`	`torch::Tensor tinygemm2_forward(torch::Tensor input, torch::Tensor weight, torch::Tensor bias)`
`32`	`32`	`{`
	`33`	`+ auto const smVersion = tensorrt_llm::common::getSMVersion();`
	`34`	`+ TORCH_CHECK(`
	`35`	`+ smVersion == 90 \|\| smVersion == 100 \|\| smVersion == 103, "tinygemm2 only supports SM90, SM100, and SM103.");`
`33`	`36`	`TORCH_CHECK(input.dim() == 2, "input must be 2D");`
`34`	`37`	`TORCH_CHECK(weight.dim() == 2, "weight must be 2D");`
`35`	`38`	`TORCH_CHECK(bias.dim() == 1, "bias must be 1D");`