File tree Expand file tree Collapse file tree 2 files changed +220
-0
lines changed
lightllm/common/triton_utils/autotune_kernel_configs/triton_3.4.0/NVIDIA_H200/grouped_matmul:v1 Expand file tree Collapse file tree 2 files changed +220
-0
lines changed Original file line number Diff line number Diff line change 1+ {
2+ "1024" : {
3+ "BLOCK_SIZE_K" : 64 ,
4+ "BLOCK_SIZE_M" : 16 ,
5+ "BLOCK_SIZE_N" : 64 ,
6+ "GROUP_SIZE_M" : 1 ,
7+ "NEED_TRANS" : false ,
8+ "num_stages" : 3 ,
9+ "num_warps" : 4
10+ },
11+ "128" : {
12+ "BLOCK_SIZE_K" : 64 ,
13+ "BLOCK_SIZE_M" : 16 ,
14+ "BLOCK_SIZE_N" : 128 ,
15+ "GROUP_SIZE_M" : 1 ,
16+ "NEED_TRANS" : false ,
17+ "num_stages" : 3 ,
18+ "num_warps" : 4
19+ },
20+ "131072" : {
21+ "BLOCK_SIZE_K" : 64 ,
22+ "BLOCK_SIZE_M" : 128 ,
23+ "BLOCK_SIZE_N" : 128 ,
24+ "GROUP_SIZE_M" : 16 ,
25+ "NEED_TRANS" : false ,
26+ "num_stages" : 3 ,
27+ "num_warps" : 4
28+ },
29+ "16384" : {
30+ "BLOCK_SIZE_K" : 32 ,
31+ "BLOCK_SIZE_M" : 64 ,
32+ "BLOCK_SIZE_N" : 128 ,
33+ "GROUP_SIZE_M" : 1 ,
34+ "NEED_TRANS" : false ,
35+ "num_stages" : 4 ,
36+ "num_warps" : 4
37+ },
38+ "2048" : {
39+ "BLOCK_SIZE_K" : 64 ,
40+ "BLOCK_SIZE_M" : 32 ,
41+ "BLOCK_SIZE_N" : 128 ,
42+ "GROUP_SIZE_M" : 64 ,
43+ "NEED_TRANS" : false ,
44+ "num_stages" : 3 ,
45+ "num_warps" : 4
46+ },
47+ "256" : {
48+ "BLOCK_SIZE_K" : 64 ,
49+ "BLOCK_SIZE_M" : 16 ,
50+ "BLOCK_SIZE_N" : 128 ,
51+ "GROUP_SIZE_M" : 1 ,
52+ "NEED_TRANS" : false ,
53+ "num_stages" : 2 ,
54+ "num_warps" : 4
55+ },
56+ "32768" : {
57+ "BLOCK_SIZE_K" : 32 ,
58+ "BLOCK_SIZE_M" : 64 ,
59+ "BLOCK_SIZE_N" : 128 ,
60+ "GROUP_SIZE_M" : 32 ,
61+ "NEED_TRANS" : false ,
62+ "num_stages" : 4 ,
63+ "num_warps" : 4
64+ },
65+ "512" : {
66+ "BLOCK_SIZE_K" : 64 ,
67+ "BLOCK_SIZE_M" : 16 ,
68+ "BLOCK_SIZE_N" : 128 ,
69+ "GROUP_SIZE_M" : 1 ,
70+ "NEED_TRANS" : false ,
71+ "num_stages" : 3 ,
72+ "num_warps" : 4
73+ },
74+ "64" : {
75+ "BLOCK_SIZE_K" : 64 ,
76+ "BLOCK_SIZE_M" : 16 ,
77+ "BLOCK_SIZE_N" : 64 ,
78+ "GROUP_SIZE_M" : 64 ,
79+ "NEED_TRANS" : false ,
80+ "num_stages" : 3 ,
81+ "num_warps" : 4
82+ },
83+ "8" : {
84+ "BLOCK_SIZE_K" : 32 ,
85+ "BLOCK_SIZE_M" : 16 ,
86+ "BLOCK_SIZE_N" : 128 ,
87+ "GROUP_SIZE_M" : 1 ,
88+ "NEED_TRANS" : false ,
89+ "num_stages" : 3 ,
90+ "num_warps" : 4
91+ },
92+ "800" : {
93+ "BLOCK_SIZE_K" : 64 ,
94+ "BLOCK_SIZE_M" : 16 ,
95+ "BLOCK_SIZE_N" : 128 ,
96+ "GROUP_SIZE_M" : 1 ,
97+ "NEED_TRANS" : false ,
98+ "num_stages" : 3 ,
99+ "num_warps" : 4
100+ },
101+ "8192" : {
102+ "BLOCK_SIZE_K" : 64 ,
103+ "BLOCK_SIZE_M" : 64 ,
104+ "BLOCK_SIZE_N" : 128 ,
105+ "GROUP_SIZE_M" : 16 ,
106+ "NEED_TRANS" : false ,
107+ "num_stages" : 3 ,
108+ "num_warps" : 4
109+ }
110+ }
Original file line number Diff line number Diff line change 1+ {
2+ "1" : {
3+ "BLOCK_SIZE_K" : 128 ,
4+ "BLOCK_SIZE_M" : 16 ,
5+ "BLOCK_SIZE_N" : 64 ,
6+ "GROUP_SIZE_M" : 1 ,
7+ "NEED_TRANS" : false ,
8+ "num_stages" : 5 ,
9+ "num_warps" : 4
10+ },
11+ "100" : {
12+ "BLOCK_SIZE_K" : 128 ,
13+ "BLOCK_SIZE_M" : 16 ,
14+ "BLOCK_SIZE_N" : 128 ,
15+ "GROUP_SIZE_M" : 32 ,
16+ "NEED_TRANS" : false ,
17+ "num_stages" : 2 ,
18+ "num_warps" : 4
19+ },
20+ "1024" : {
21+ "BLOCK_SIZE_K" : 64 ,
22+ "BLOCK_SIZE_M" : 128 ,
23+ "BLOCK_SIZE_N" : 128 ,
24+ "GROUP_SIZE_M" : 16 ,
25+ "NEED_TRANS" : false ,
26+ "num_stages" : 5 ,
27+ "num_warps" : 8
28+ },
29+ "128" : {
30+ "BLOCK_SIZE_K" : 128 ,
31+ "BLOCK_SIZE_M" : 16 ,
32+ "BLOCK_SIZE_N" : 128 ,
33+ "GROUP_SIZE_M" : 1 ,
34+ "NEED_TRANS" : false ,
35+ "num_stages" : 2 ,
36+ "num_warps" : 8
37+ },
38+ "16" : {
39+ "BLOCK_SIZE_K" : 128 ,
40+ "BLOCK_SIZE_M" : 16 ,
41+ "BLOCK_SIZE_N" : 128 ,
42+ "GROUP_SIZE_M" : 64 ,
43+ "NEED_TRANS" : false ,
44+ "num_stages" : 4 ,
45+ "num_warps" : 4
46+ },
47+ "16384" : {
48+ "BLOCK_SIZE_K" : 64 ,
49+ "BLOCK_SIZE_M" : 128 ,
50+ "BLOCK_SIZE_N" : 128 ,
51+ "GROUP_SIZE_M" : 32 ,
52+ "NEED_TRANS" : false ,
53+ "num_stages" : 3 ,
54+ "num_warps" : 8
55+ },
56+ "2048" : {
57+ "BLOCK_SIZE_K" : 64 ,
58+ "BLOCK_SIZE_M" : 64 ,
59+ "BLOCK_SIZE_N" : 128 ,
60+ "GROUP_SIZE_M" : 16 ,
61+ "NEED_TRANS" : false ,
62+ "num_stages" : 3 ,
63+ "num_warps" : 8
64+ },
65+ "256" : {
66+ "BLOCK_SIZE_K" : 128 ,
67+ "BLOCK_SIZE_M" : 32 ,
68+ "BLOCK_SIZE_N" : 128 ,
69+ "GROUP_SIZE_M" : 32 ,
70+ "NEED_TRANS" : false ,
71+ "num_stages" : 2 ,
72+ "num_warps" : 4
73+ },
74+ "32" : {
75+ "BLOCK_SIZE_K" : 128 ,
76+ "BLOCK_SIZE_M" : 16 ,
77+ "BLOCK_SIZE_N" : 64 ,
78+ "GROUP_SIZE_M" : 64 ,
79+ "NEED_TRANS" : false ,
80+ "num_stages" : 3 ,
81+ "num_warps" : 4
82+ },
83+ "4096" : {
84+ "BLOCK_SIZE_K" : 64 ,
85+ "BLOCK_SIZE_M" : 128 ,
86+ "BLOCK_SIZE_N" : 128 ,
87+ "GROUP_SIZE_M" : 16 ,
88+ "NEED_TRANS" : false ,
89+ "num_stages" : 3 ,
90+ "num_warps" : 4
91+ },
92+ "64" : {
93+ "BLOCK_SIZE_K" : 128 ,
94+ "BLOCK_SIZE_M" : 16 ,
95+ "BLOCK_SIZE_N" : 128 ,
96+ "GROUP_SIZE_M" : 32 ,
97+ "NEED_TRANS" : false ,
98+ "num_stages" : 2 ,
99+ "num_warps" : 4
100+ },
101+ "8" : {
102+ "BLOCK_SIZE_K" : 64 ,
103+ "BLOCK_SIZE_M" : 16 ,
104+ "BLOCK_SIZE_N" : 128 ,
105+ "GROUP_SIZE_M" : 32 ,
106+ "NEED_TRANS" : false ,
107+ "num_stages" : 5 ,
108+ "num_warps" : 4
109+ }
110+ }
You can’t perform that action at this time.
0 commit comments