From 1aa819429fdc0992786d1a435ff8c115d78f9146 Mon Sep 17 00:00:00 2001 From: Dongfeng Yu Date: Mon, 27 Oct 2025 23:58:58 +0000 Subject: [PATCH 1/2] [https://nvbugs/5596343][test] Update test waive to get back some coverage Signed-off-by: Dongfeng Yu --- tests/integration/defs/accuracy/test_llm_api_pytorch.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/integration/defs/accuracy/test_llm_api_pytorch.py b/tests/integration/defs/accuracy/test_llm_api_pytorch.py index 5a4af5a5600..ff178981a63 100644 --- a/tests/integration/defs/accuracy/test_llm_api_pytorch.py +++ b/tests/integration/defs/accuracy/test_llm_api_pytorch.py @@ -3465,7 +3465,6 @@ def test_dummy_load_format(self): task = GSM8K(model_name) task.evaluate(llm, is_integration_test=True) - @pytest.mark.skip(reason="https://nvbugs/5596343") @pytest.mark.skip_less_device(4) @pytest.mark.parametrize( "kv_cache_dtype", @@ -3485,6 +3484,10 @@ def test_dummy_load_format(self): def test_w4_4gpus(self, kv_cache_dtype, moe_backend, tp_size, pp_size, ep_size, attention_dp, cuda_graph, overlap_scheduler, mocker): + if get_sm_version() < 100: + pytest.skip( + "https://nvbugs/5596343: Skip Hopper due to accuracy issue.") + mocker.patch.object(GSM8K, "MAX_OUTPUT_LEN", 8192) mocker.patch.dict(GSM8K.EVALUATE_KWARGS, {"scores_filter": "exact_match,flexible-extract"}) From bff5a9662fc898c9f8c755436bb4ccce2d60e171 Mon Sep 17 00:00:00 2001 From: dongfengy <99041270+dongfengy@users.noreply.github.com> Date: Tue, 28 Oct 2025 10:19:01 -0700 Subject: [PATCH 2/2] Update waives.txt Signed-off-by: dongfengy <99041270+dongfengy@users.noreply.github.com> --- tests/integration/test_lists/waives.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index 3ff1355b9e0..3a81c8ca7b0 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -330,7 +330,6 @@ cpp/test_e2e.py::test_benchmarks[gpt-80] SKIP (https://nvbugs/5550689) cpp/test_e2e.py::test_benchmarks[bart-90] SKIP (https://nvbugs/5550689) examples/test_nemotron_nas.py::test_nemotron_nano_8b_lora_torch[Llama-3.1-Nemotron-Nano-8B-v1] SKIP (https://nvbugs/5563469) test_e2e.py::test_ptp_quickstart_multimodal[qwen2-vl-7b-instruct-Qwen2-VL-7B-Instruct-image-False] SKIP (https://nvbugs/5547437) -accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_1gpu[True-True-trtllm-auto] SKIP (https://nvbugs/5575913) accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8_eagle3[tp8-torch_compile=True] SKIP (https://nvbugs/5546510) test_e2e.py::test_multi_nodes_eval[Kimi-K2-Instruct-tp16-mmlu] SKIP (https://nvbugs/5556998) full:H20/accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8[tp8ep8-cuda_graph=True] SKIP (https://nvbugs/5574553)