Skip to content

Commit 0e36484

Browse files
authored
[None][test] Add gpt_oss_20b Model to Sanity Perf Test (#8265)
1 parent a966644 commit 0e36484

File tree

2 files changed

+27
-72
lines changed

2 files changed

+27
-72
lines changed

tests/integration/defs/perf/test_perf.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,9 @@
139139
"bielik_11b_v2.2_instruct_fp8": "Bielik-11B-v2.2-Instruct-FP8",
140140
"mistral_small_v3.1_24b": "Mistral-Small-3.1-24B-Instruct-2503",
141141
"gpt_oss_120b_fp4": "gpt_oss/gpt-oss-120b",
142+
"gpt_oss_20b_fp4": "gpt_oss/gpt-oss-20b",
143+
"nemotron_nano_9b_v2": "NVIDIA-Nemotron-Nano-12B-v2",
144+
"starcoder2_7b": "starcoder2-7b",
142145
}
143146
# Model PATH of HuggingFace
144147
HF_MODEL_PATH = {
Lines changed: 24 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,23 @@
11
version: 0.0.1
22
llm_perf_sanity:
3+
# A100, L40S, L20, H20, H100, H200, Blackwell
34
- condition:
45
ranges:
56
system_gpu_count:
67
gte: 1
7-
wildcards:
8-
gpu:
9-
- '*h100*'
10-
- '*h200*'
11-
- '*a100*'
12-
- '*l40s*'
13-
- '*l20*'
14-
- '*h20*'
158
tests:
169
# E2E trtllm-bench
1710

1811
#llama_v3.1_8b_instruct
1912
#trt backend
2013
- perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-bfloat16-input_output_len:128,128]
2114
- perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-pytorch-bfloat16-input_output_len:512,32]
15+
- perf/test_perf.py::test_perf[starcoder2_7b-bench-bfloat16-input_output_len:512,512]
16+
#pytorch backend
2217
- perf/test_perf.py::test_perf[llama_v3.1_8b-bench-pytorch-bfloat16-input_output_len:500,2000]
2318
- perf/test_perf.py::test_perf[llama_v3.1_8b-bench-pytorch-bfloat16-input_output_len:512,32]
2419
- perf/test_perf.py::test_perf[llama_v3.1_8b-bench-pytorch-streaming-bfloat16-input_output_len:500,2000]
2520
- perf/test_perf.py::test_perf[llama_v3.1_8b-bench-pytorch-streaming-bfloat16-input_output_len:512,32]
26-
#pytorch backend
2721
- perf/test_perf.py::test_perf[llama_v3.1_8b-bench-pytorch-bfloat16-input_output_len:128,128]
2822
# Phi-4-multimodal-instruct
2923
- perf/test_perf.py::test_perf[phi_4_multimodal_instruct-bench-pytorch-bfloat16-input_output_len:128,128]
@@ -32,22 +26,17 @@ llm_perf_sanity:
3226
# Ministral-8B
3327
- perf/test_perf.py::test_perf[ministral_8b-bench-pytorch-bfloat16-input_output_len:500,2000-reqs:500-con:250]
3428
- perf/test_perf.py::test_perf[phi_4_mini_instruct-bench-pytorch-bfloat16-input_output_len:500,2000]
29+
- perf/test_perf.py::test_perf[nemotron_nano_9b_v2-bench-pytorch-bfloat16-input_output_len:512,512]
3530

3631
# Test list validation
3732
- test_list_validation.py::test_list_validation
3833

3934

4035
# FP8 specific tests
36+
# A100, L40S, L20, H20, H100, H200, Blackwell
4137
- condition:
4238
terms:
4339
supports_fp8: true
44-
wildcards:
45-
gpu:
46-
- '*h100*'
47-
- '*h200*'
48-
- '*l40s*'
49-
- '*l20*'
50-
- '*h20*'
5140
tests:
5241
#llama_v3.1_8b_instruct_fp8
5342
#pytorch backend
@@ -59,19 +48,12 @@ llm_perf_sanity:
5948
- perf/test_perf.py::test_perf[bielik_11b_v2.2_instruct_fp8-bench-pytorch-float8-input_output_len:1000,1000-con:250]
6049
- perf/test_perf.py::test_perf[ministral_8b_fp8-bench-pytorch-float8-input_output_len:500,2000-reqs:500-con:250]
6150

62-
# Tests for systems with 2+ GPUs
51+
# Tests for ALL systems with 2+ GPUs
52+
# A100, L40S, L20, H20, H100, H200, Blackwell
6353
- condition:
6454
ranges:
6555
system_gpu_count:
6656
gte: 2
67-
wildcards:
68-
gpu:
69-
- '*h100*'
70-
- '*h200*'
71-
- '*a100*'
72-
- '*l40s*'
73-
- '*l20*'
74-
- '*h20*'
7557
tests:
7658
#llama_v3.1_8b_instruct
7759
#pytorch backend
@@ -81,53 +63,38 @@ llm_perf_sanity:
8163
- perf/test_perf.py::test_perf[llama_v3.2_1b-bench-pytorch-bfloat16-input_output_len:128,128-gpus:2]
8264

8365
# FP8 tests for systems with 2+ GPUs
66+
# A100, L40S, L20, H20, H100, H200, Blackwell
8467
- condition:
8568
terms:
8669
supports_fp8: true
8770
ranges:
8871
system_gpu_count:
8972
gte: 2
90-
wildcards:
91-
gpu:
92-
- '*h100*'
93-
- '*h200*'
94-
- '*l40s*'
95-
- '*l20*'
96-
- '*h20*'
73+
9774
tests:
9875
#mixtral_8x7b_v0.1_fp8 pytorch backend
9976
- perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct_fp8-bench-pytorch-float8-input_output_len:128,128-gpus:2]
10077

10178
# Tests for systems with 2+ GPUs and high memory
79+
# A100, L40S, H20, H100, H200, Blackwell
10280
- condition:
10381
ranges:
10482
system_gpu_count:
10583
gte: 2
10684
gpu_memory:
10785
gt: 80000
108-
wildcards:
109-
gpu:
110-
- '*h100*'
111-
- '*h200*'
112-
- '*a100*'
113-
- '*l40s*'
114-
- '*h20*'
86+
11587
tests:
11688
- perf/test_perf.py::test_perf[llama_v3.1_70b-bench-pytorch-bfloat16-maxbs:1-input_output_len:128,128-reqs:10-gpus:2]
11789
- perf/test_perf.py::test_perf[mixtral_8x7b_v0.1-bench-pytorch-float16-input_output_len:128,128-gpus:2]
11890

11991
# Tests for systems with 4+ GPUs
92+
# A100, L40S, H20, H100, H200, Blackwell
12093
- condition:
12194
ranges:
12295
system_gpu_count:
12396
gte: 4
124-
wildcards:
125-
gpu:
126-
- '*h100*'
127-
- '*h200*'
128-
- '*a100*'
129-
- '*l40s*'
130-
- '*h20*'
97+
13198
tests:
13299
#llama_v3.1_70b
133100
#trt backend
@@ -136,37 +103,28 @@ llm_perf_sanity:
136103
- perf/test_perf.py::test_perf[llama_v3.1_70b-bench-pytorch-bfloat16-maxbs:1-input_output_len:128,128-reqs:10-gpus:4]
137104

138105
# FP8 specific tests
106+
# L40S, H20, H100, H200, Blackwell
139107
- condition:
140108
terms:
141109
supports_fp8: true
142110
ranges:
143111
system_gpu_count:
144112
gte: 4
145-
wildcards:
146-
gpu:
147-
- '*h100*'
148-
- '*h200*'
149-
- '*l40s*'
150-
- '*h20*'
113+
151114
tests:
152115
- perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-input_output_len:2000,500-gpus:4]
153116
- perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b-bench-pytorch-bfloat16-maxbs:256-input_output_len:500,2000-reqs:250-con:250-tp:4-gpus:4]
154117
- perf/test_perf.py::test_perf[llama_v3.3_nemotron_super_49b_fp8-bench-pytorch-float8-maxbs:256-input_output_len:500,2000-reqs:250-con:250-tp:4-gpus:4]
155118

156119
# Tests for systems with 8+ GPUs
120+
# A100, L40S, H20, H100, H200, Blackwell
157121
- condition:
158122
ranges:
159123
system_gpu_count:
160124
gte: 8
161125
gpu_memory:
162126
gt: 46000
163-
wildcards:
164-
gpu:
165-
- '*h100*'
166-
- '*h200*'
167-
- '*a100*'
168-
- '*l40s*'
169-
- '*h20*'
127+
170128
tests:
171129
#llama_v3.1_70b
172130
#pytorch backend
@@ -176,18 +134,13 @@ llm_perf_sanity:
176134
- perf/test_perf.py::test_perf[llama_v3.3_70b_instruct-bench-pytorch-bfloat16-input_output_len:2000,500-gpus:8]
177135

178136
# FP8 tests for systems with 8+ GPUs
137+
# L40S, H20, H100, H200, Blackwell
179138
- condition:
180139
terms:
181140
supports_fp8: true
182141
ranges:
183142
system_gpu_count:
184143
gte: 8
185-
wildcards:
186-
gpu:
187-
- '*h100*'
188-
- '*h200*'
189-
- '*l40s*'
190-
- '*h20*'
191144

192145
tests:
193146
#llama_v3.1_70b
@@ -199,17 +152,14 @@ llm_perf_sanity:
199152
- perf/test_perf.py::test_perf[mixtral_8x7b_v0.1_instruct-bench-pytorch-float16-input_output_len:128,128-reqs:64-gpus:8]
200153

201154

155+
# FP4, FP8 tests for systems with 8+ GPUs
156+
# H20, H100, H200, Blackwell
202157
- condition:
203-
terms:
204-
supports_fp8: true
205158
ranges:
206159
system_gpu_count:
207160
gte: 8
208-
wildcards:
209-
gpu:
210-
- '*h100*'
211-
- '*h200*'
212-
- '*h20*'
161+
compute_capability:
162+
gte: 9.0
213163

214164
tests:
215165
- perf/test_perf.py::test_perf[deepseek_v3_lite_fp8-bench-pytorch-float8-input_output_len:128,128]
@@ -224,3 +174,5 @@ llm_perf_sanity:
224174
- perf/test_perf.py::test_perf[qwen3_235b_a22b_fp8-bench-pytorch-float8-input_output_len:1000,2000-con:256-ep:8-gpus:8] TIMEOUT(60)
225175
- perf/test_perf.py::test_perf[deepseek_v3_lite_fp8-disagg_server-ctx_dp:4-gen_tp:4]
226176
- perf/test_perf.py::test_perf[llama_v3.1_8b-disagg_server-ctx_dp:4-gen_tp:4]
177+
# gpt_oss_20b_fp4
178+
- perf/test_perf.py::test_perf[gpt_oss_20b_fp4-bench-pytorch-float4-input_output_len:512,512]

0 commit comments

Comments
 (0)