Skip to content

Commit f22a87f

Browse files
[https://nvbugs/5325296][fix] Enable relaxed acceptance test on Blackwell (#8709)
Signed-off-by: Barry Kang <[email protected]>
1 parent 752cc3a commit f22a87f

File tree

1 file changed

+8
-6
lines changed

1 file changed

+8
-6
lines changed

tests/integration/defs/test_e2e.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,10 @@
3131
from .common import (PluginOptions, convert_weights, get_mmlu_accuracy,
3232
prune_checkpoint, quantize_data, refit_model,
3333
venv_check_call)
34-
from .conftest import (get_device_count, llm_models_root, skip_no_sm120,
35-
skip_nvlink_inactive, skip_post_blackwell, skip_pre_ada,
36-
skip_pre_blackwell, skip_pre_hopper, tests_path,
37-
unittest_path)
34+
from .conftest import (get_device_count, get_sm_version, llm_models_root,
35+
skip_no_sm120, skip_nvlink_inactive, skip_post_blackwell,
36+
skip_pre_ada, skip_pre_blackwell, skip_pre_hopper,
37+
tests_path, unittest_path)
3838

3939
sys.path.append(os.path.join(str(tests_path()), '/../examples/apps'))
4040

@@ -2184,7 +2184,6 @@ def test_ptp_quickstart_advanced_deepseek_r1_8gpus(llm_root, llm_venv,
21842184
_check_mem_usage(running_log, [106.3, 0, 0, 0], 8)
21852185

21862186

2187-
@skip_post_blackwell
21882187
@pytest.mark.skip_less_device_memory(110000)
21892188
@pytest.mark.skip_less_device(8)
21902189
@pytest.mark.parametrize("model_name,model_path", [
@@ -2195,6 +2194,7 @@ def test_relaxed_acceptance_quickstart_advanced_deepseek_r1_8gpus(
21952194
llm_root, llm_venv, model_name, model_path):
21962195
print(f"Testing {model_name}.")
21972196
example_root = Path(os.path.join(llm_root, "examples", "llm-api"))
2197+
is_blackwell = get_sm_version() > 90
21982198
with tempfile.NamedTemporaryFile(mode='w+t',
21992199
suffix=f".{model_name}.log",
22002200
dir="./",
@@ -2208,7 +2208,7 @@ def test_relaxed_acceptance_quickstart_advanced_deepseek_r1_8gpus(
22082208
"--moe_ep_size=8",
22092209
"--tp_size=8",
22102210
"--use_cuda_graph",
2211-
f"--kv_cache_fraction={_MEM_FRACTION_95}",
2211+
f"--kv_cache_fraction={_MEM_FRACTION_50 if is_blackwell else _MEM_FRACTION_95}",
22122212
"--max_batch_size=1",
22132213
"--max_seq_len=3000",
22142214
"--disable_kv_cache_reuse",
@@ -2221,6 +2221,8 @@ def test_relaxed_acceptance_quickstart_advanced_deepseek_r1_8gpus(
22212221
"--relaxed_delta=0.5",
22222222
"--enable_attention_dp",
22232223
"--use_one_model",
2224+
"--moe_backend",
2225+
"DEEPGEMM" if is_blackwell else "CUTLASS",
22242226
],
22252227
stdout=running_log)
22262228
_check_mem_usage(running_log, [85.6, 0, 0, 0], 8)

0 commit comments

Comments
 (0)