1+ #include " ggml.h"
2+
3+ namespace hardware {
4+
5+ float GetQuantBit (GGMLType gt) {
6+ switch (gt) {
7+ case GGML_TYPE_I32:
8+ case GGML_TYPE_F32:
9+ return 32 .0f ;
10+ case GGML_TYPE_I16:
11+ case GGML_TYPE_BF16:
12+ case GGML_TYPE_F16:
13+ return 16 .0f ;
14+ case GGML_TYPE_IQ2_S:
15+ case GGML_TYPE_IQ2_XXS:
16+ case GGML_TYPE_IQ2_XS:
17+ return 2 .31f ;
18+ case GGML_TYPE_Q2_K:
19+ return 2 .5625f ;
20+ case GGML_TYPE_IQ3_XXS:
21+ case GGML_TYPE_IQ3_S:
22+ case GGML_TYPE_Q3_K:
23+ return 3 .4375f ;
24+ case GGML_TYPE_Q4_0_4_4:
25+ case GGML_TYPE_Q4_0_4_8:
26+ case GGML_TYPE_Q4_0_8_8:
27+ case GGML_TYPE_IQ4_NL:
28+ case GGML_TYPE_IQ4_XS:
29+ case GGML_TYPE_Q4_0:
30+ case GGML_TYPE_Q4_1:
31+ case GGML_TYPE_Q4_K:
32+ return 4 .5f ;
33+ case GGML_TYPE_Q5_0:
34+ case GGML_TYPE_Q5_1:
35+ case GGML_TYPE_Q5_K:
36+ return 5 .5f ;
37+ case GGML_TYPE_Q6_K:
38+ return 6 .5625f ;
39+ case GGML_TYPE_I8:
40+ case GGML_TYPE_Q8_0:
41+ case GGML_TYPE_Q8_1:
42+ case GGML_TYPE_Q8_K:
43+ return 8 .0f ;
44+ case GGML_TYPE_I64:
45+ case GGML_TYPE_F64:
46+ return 64 .0f ;
47+ default :
48+ return 8 .0f ;
49+ }
50+ }
51+
52+ std::string to_string (GGMLType t) {
53+ switch (t) {
54+ case GGML_TYPE_F32:
55+ return " F32" ;
56+ case GGML_TYPE_F16:
57+ return " F16" ;
58+ case GGML_TYPE_Q4_0:
59+ return " Q4_0" ;
60+ case GGML_TYPE_Q4_1:
61+ return " Q4_1" ;
62+ case GGML_TYPE_Q5_0:
63+ return " Q5_0" ;
64+ case GGML_TYPE_Q5_1:
65+ return " Q5_1" ;
66+ case GGML_TYPE_Q8_0:
67+ return " Q8_0" ;
68+ case GGML_TYPE_Q8_1:
69+ return " Q8_1" ;
70+ case GGML_TYPE_Q2_K:
71+ return " Q2_K" ;
72+ case GGML_TYPE_Q3_K:
73+ return " Q3_K" ;
74+ case GGML_TYPE_Q4_K:
75+ return " Q4_K" ;
76+ case GGML_TYPE_Q5_K:
77+ return " Q5_K" ;
78+ case GGML_TYPE_Q6_K:
79+ return " Q6_K" ;
80+ case GGML_TYPE_Q8_K:
81+ return " Q8_K" ;
82+ case GGML_TYPE_IQ2_XXS:
83+ return " IQ2_XXS" ;
84+ case GGML_TYPE_IQ2_XS:
85+ return " IQ2_XS" ;
86+ case GGML_TYPE_IQ3_XXS:
87+ return " IQ3_XXS" ;
88+ case GGML_TYPE_IQ1_S:
89+ return " IQ1_S" ;
90+ case GGML_TYPE_IQ4_NL:
91+ return " IQ4_NL" ;
92+ case GGML_TYPE_IQ3_S:
93+ return " IQ3_S" ;
94+ case GGML_TYPE_IQ2_S:
95+ return " IQ2_S" ;
96+ case GGML_TYPE_IQ4_XS:
97+ return " IQ4_XS" ;
98+ case GGML_TYPE_I8:
99+ return " I8" ;
100+ case GGML_TYPE_I16:
101+ return " I16" ;
102+ case GGML_TYPE_I32:
103+ return " I32" ;
104+ case GGML_TYPE_I64:
105+ return " I64" ;
106+ case GGML_TYPE_F64:
107+ return " F64" ;
108+ case GGML_TYPE_IQ1_M:
109+ return " IQ1_M" ;
110+ case GGML_TYPE_BF16:
111+ return " BF16" ;
112+ case GGML_TYPE_Q4_0_4_4:
113+ return " Q4_0_4_4" ;
114+ case GGML_TYPE_Q4_0_4_8:
115+ return " Q4_0_4_8" ;
116+ case GGML_TYPE_Q4_0_8_8:
117+ return " Q4_0_8_8" ;
118+ case GGML_TYPE_TQ1_0:
119+ return " TQ1_0" ;
120+ case GGML_TYPE_TQ2_0:
121+ return " TQ2_0" ;
122+ default :
123+ return " Invalid" ;
124+ }
125+ }
126+
127+ const std::unordered_map<GGMLType, GGMLTypeTrait> kGGMLTypeTraits = {
128+ {GGML_TYPE_F32, {.block_size = 1 , .type_size = 4 }},
129+ {GGML_TYPE_F16, {.block_size = 1 , .type_size = 2 }},
130+ {GGML_TYPE_Q4_0, {.block_size = 32 , .type_size = 18 , .is_quantized = true }},
131+ {GGML_TYPE_Q4_1, {.block_size = 32 , .type_size = 20 , .is_quantized = true }},
132+ {GGML_TYPE_Q5_0, {.block_size = 32 , .type_size = 22 , .is_quantized = true }},
133+ {GGML_TYPE_Q5_1, {.block_size = 32 , .type_size = 24 , .is_quantized = true }},
134+ {GGML_TYPE_Q8_0, {.block_size = 32 , .type_size = 34 , .is_quantized = true }},
135+ {GGML_TYPE_Q8_1, {.block_size = 32 , .type_size = 36 , .is_quantized = true }},
136+ {GGML_TYPE_Q2_K,
137+ {.block_size = 256 , .type_size = 84 , .is_quantized = true }},
138+ {GGML_TYPE_Q3_K,
139+ {.block_size = 256 , .type_size = 110 , .is_quantized = true }},
140+ {GGML_TYPE_Q4_K,
141+ {.block_size = 256 , .type_size = 144 , .is_quantized = true }},
142+ {GGML_TYPE_Q5_K,
143+ {.block_size = 256 , .type_size = 176 , .is_quantized = true }},
144+ {GGML_TYPE_Q6_K,
145+ {.block_size = 256 , .type_size = 210 , .is_quantized = true }},
146+ {GGML_TYPE_Q8_K,
147+ {.block_size = 256 , .type_size = 292 , .is_quantized = true }},
148+ {GGML_TYPE_IQ2_XXS,
149+ {.block_size = 256 , .type_size = 66 , .is_quantized = true }},
150+ {GGML_TYPE_IQ2_XS,
151+ {.block_size = 256 , .type_size = 74 , .is_quantized = true }},
152+ {GGML_TYPE_IQ3_XXS,
153+ {.block_size = 256 , .type_size = 98 , .is_quantized = true }},
154+ {GGML_TYPE_IQ1_S,
155+ {.block_size = 256 , .type_size = 50 , .is_quantized = true }},
156+ {GGML_TYPE_IQ4_NL,
157+ {.block_size = 32 , .type_size = 18 , .is_quantized = true }},
158+ {GGML_TYPE_IQ3_S,
159+ {.block_size = 256 , .type_size = 110 , .is_quantized = true }},
160+ {GGML_TYPE_IQ2_S,
161+ {.block_size = 256 , .type_size = 82 , .is_quantized = true }},
162+ {GGML_TYPE_IQ4_XS,
163+ {.block_size = 256 , .type_size = 136 , .is_quantized = true }},
164+ {GGML_TYPE_I8, {.block_size = 1 , .type_size = 1 }},
165+ {GGML_TYPE_I16, {.block_size = 1 , .type_size = 2 }},
166+ {GGML_TYPE_I32, {.block_size = 1 , .type_size = 4 }},
167+ {GGML_TYPE_I64, {.block_size = 1 , .type_size = 8 }},
168+ {GGML_TYPE_F64, {.block_size = 1 , .type_size = 8 }},
169+ {GGML_TYPE_IQ1_M,
170+ {.block_size = 256 , .type_size = 56 , .is_quantized = true }},
171+ {GGML_TYPE_BF16, {.block_size = 1 , .type_size = 2 }},
172+ {GGML_TYPE_Q4_0_4_4,
173+ {.block_size = 32 , .type_size = 18 , .is_quantized = true }},
174+ {GGML_TYPE_Q4_0_4_8,
175+ {.block_size = 32 , .type_size = 18 , .is_quantized = true }},
176+ {GGML_TYPE_Q4_0_8_8,
177+ {.block_size = 32 , .type_size = 18 , .is_quantized = true }},
178+ {GGML_TYPE_TQ1_0,
179+ {.block_size = 256 , .type_size = 54 , .is_quantized = true }},
180+ {GGML_TYPE_TQ2_0,
181+ {.block_size = 256 , .type_size = 66 , .is_quantized = true }},
182+ };
183+
184+ } // namespace hardware
0 commit comments