3131from .common import (PluginOptions , convert_weights , get_mmlu_accuracy ,
3232 prune_checkpoint , quantize_data , refit_model ,
3333 venv_check_call )
34- from .conftest import (get_device_count , llm_models_root , skip_no_sm120 ,
35- skip_nvlink_inactive , skip_post_blackwell , skip_pre_ada ,
36- skip_pre_blackwell , skip_pre_hopper , tests_path ,
37- unittest_path )
34+ from .conftest import (get_device_count , get_sm_version , llm_models_root ,
35+ skip_no_sm120 , skip_nvlink_inactive , skip_post_blackwell ,
36+ skip_pre_ada , skip_pre_blackwell , skip_pre_hopper ,
37+ tests_path , unittest_path )
3838
3939sys .path .append (os .path .join (str (tests_path ()), '/../examples/apps' ))
4040
@@ -2184,7 +2184,6 @@ def test_ptp_quickstart_advanced_deepseek_r1_8gpus(llm_root, llm_venv,
21842184 _check_mem_usage (running_log , [106.3 , 0 , 0 , 0 ], 8 )
21852185
21862186
2187- @skip_post_blackwell
21882187@pytest .mark .skip_less_device_memory (110000 )
21892188@pytest .mark .skip_less_device (8 )
21902189@pytest .mark .parametrize ("model_name,model_path" , [
@@ -2195,6 +2194,7 @@ def test_relaxed_acceptance_quickstart_advanced_deepseek_r1_8gpus(
21952194 llm_root , llm_venv , model_name , model_path ):
21962195 print (f"Testing { model_name } ." )
21972196 example_root = Path (os .path .join (llm_root , "examples" , "llm-api" ))
2197+ is_blackwell = get_sm_version () > 90
21982198 with tempfile .NamedTemporaryFile (mode = 'w+t' ,
21992199 suffix = f".{ model_name } .log" ,
22002200 dir = "./" ,
@@ -2208,7 +2208,7 @@ def test_relaxed_acceptance_quickstart_advanced_deepseek_r1_8gpus(
22082208 "--moe_ep_size=8" ,
22092209 "--tp_size=8" ,
22102210 "--use_cuda_graph" ,
2211- f"--kv_cache_fraction={ _MEM_FRACTION_95 } " ,
2211+ f"--kv_cache_fraction={ _MEM_FRACTION_50 if is_blackwell else _MEM_FRACTION_95 } " ,
22122212 "--max_batch_size=1" ,
22132213 "--max_seq_len=3000" ,
22142214 "--disable_kv_cache_reuse" ,
@@ -2221,6 +2221,8 @@ def test_relaxed_acceptance_quickstart_advanced_deepseek_r1_8gpus(
22212221 "--relaxed_delta=0.5" ,
22222222 "--enable_attention_dp" ,
22232223 "--use_one_model" ,
2224+ "--moe_backend" ,
2225+ "DEEPGEMM" if is_blackwell else "CUTLASS" ,
22242226 ],
22252227 stdout = running_log )
22262228 _check_mem_usage (running_log , [85.6 , 0 , 0 , 0 ], 8 )
0 commit comments