1
+ #include " ggml.h"
2
+
3
+ namespace hardware {
4
+
5
+ float GetQuantBit (GGMLType gt) {
6
+ switch (gt) {
7
+ case GGML_TYPE_I32:
8
+ case GGML_TYPE_F32:
9
+ return 32 .0f ;
10
+ case GGML_TYPE_I16:
11
+ case GGML_TYPE_BF16:
12
+ case GGML_TYPE_F16:
13
+ return 16 .0f ;
14
+ case GGML_TYPE_IQ2_S:
15
+ case GGML_TYPE_IQ2_XXS:
16
+ case GGML_TYPE_IQ2_XS:
17
+ return 2 .31f ;
18
+ case GGML_TYPE_Q2_K:
19
+ return 2 .5625f ;
20
+ case GGML_TYPE_IQ3_XXS:
21
+ case GGML_TYPE_IQ3_S:
22
+ case GGML_TYPE_Q3_K:
23
+ return 3 .4375f ;
24
+ case GGML_TYPE_Q4_0_4_4:
25
+ case GGML_TYPE_Q4_0_4_8:
26
+ case GGML_TYPE_Q4_0_8_8:
27
+ case GGML_TYPE_IQ4_NL:
28
+ case GGML_TYPE_IQ4_XS:
29
+ case GGML_TYPE_Q4_0:
30
+ case GGML_TYPE_Q4_1:
31
+ case GGML_TYPE_Q4_K:
32
+ return 4 .5f ;
33
+ case GGML_TYPE_Q5_0:
34
+ case GGML_TYPE_Q5_1:
35
+ case GGML_TYPE_Q5_K:
36
+ return 5 .5f ;
37
+ case GGML_TYPE_Q6_K:
38
+ return 6 .5625f ;
39
+ case GGML_TYPE_I8:
40
+ case GGML_TYPE_Q8_0:
41
+ case GGML_TYPE_Q8_1:
42
+ case GGML_TYPE_Q8_K:
43
+ return 8 .0f ;
44
+ case GGML_TYPE_I64:
45
+ case GGML_TYPE_F64:
46
+ return 64 .0f ;
47
+ default :
48
+ return 8 .0f ;
49
+ }
50
+ }
51
+
52
+ std::string to_string (GGMLType t) {
53
+ switch (t) {
54
+ case GGML_TYPE_F32:
55
+ return " F32" ;
56
+ case GGML_TYPE_F16:
57
+ return " F16" ;
58
+ case GGML_TYPE_Q4_0:
59
+ return " Q4_0" ;
60
+ case GGML_TYPE_Q4_1:
61
+ return " Q4_1" ;
62
+ case GGML_TYPE_Q5_0:
63
+ return " Q5_0" ;
64
+ case GGML_TYPE_Q5_1:
65
+ return " Q5_1" ;
66
+ case GGML_TYPE_Q8_0:
67
+ return " Q8_0" ;
68
+ case GGML_TYPE_Q8_1:
69
+ return " Q8_1" ;
70
+ case GGML_TYPE_Q2_K:
71
+ return " Q2_K" ;
72
+ case GGML_TYPE_Q3_K:
73
+ return " Q3_K" ;
74
+ case GGML_TYPE_Q4_K:
75
+ return " Q4_K" ;
76
+ case GGML_TYPE_Q5_K:
77
+ return " Q5_K" ;
78
+ case GGML_TYPE_Q6_K:
79
+ return " Q6_K" ;
80
+ case GGML_TYPE_Q8_K:
81
+ return " Q8_K" ;
82
+ case GGML_TYPE_IQ2_XXS:
83
+ return " IQ2_XXS" ;
84
+ case GGML_TYPE_IQ2_XS:
85
+ return " IQ2_XS" ;
86
+ case GGML_TYPE_IQ3_XXS:
87
+ return " IQ3_XXS" ;
88
+ case GGML_TYPE_IQ1_S:
89
+ return " IQ1_S" ;
90
+ case GGML_TYPE_IQ4_NL:
91
+ return " IQ4_NL" ;
92
+ case GGML_TYPE_IQ3_S:
93
+ return " IQ3_S" ;
94
+ case GGML_TYPE_IQ2_S:
95
+ return " IQ2_S" ;
96
+ case GGML_TYPE_IQ4_XS:
97
+ return " IQ4_XS" ;
98
+ case GGML_TYPE_I8:
99
+ return " I8" ;
100
+ case GGML_TYPE_I16:
101
+ return " I16" ;
102
+ case GGML_TYPE_I32:
103
+ return " I32" ;
104
+ case GGML_TYPE_I64:
105
+ return " I64" ;
106
+ case GGML_TYPE_F64:
107
+ return " F64" ;
108
+ case GGML_TYPE_IQ1_M:
109
+ return " IQ1_M" ;
110
+ case GGML_TYPE_BF16:
111
+ return " BF16" ;
112
+ case GGML_TYPE_Q4_0_4_4:
113
+ return " Q4_0_4_4" ;
114
+ case GGML_TYPE_Q4_0_4_8:
115
+ return " Q4_0_4_8" ;
116
+ case GGML_TYPE_Q4_0_8_8:
117
+ return " Q4_0_8_8" ;
118
+ case GGML_TYPE_TQ1_0:
119
+ return " TQ1_0" ;
120
+ case GGML_TYPE_TQ2_0:
121
+ return " TQ2_0" ;
122
+ default :
123
+ return " Invalid" ;
124
+ }
125
+ }
126
+
127
+ const std::unordered_map<GGMLType, GGMLTypeTrait> kGGMLTypeTraits = {
128
+ {GGML_TYPE_F32, {.block_size = 1 , .type_size = 4 }},
129
+ {GGML_TYPE_F16, {.block_size = 1 , .type_size = 2 }},
130
+ {GGML_TYPE_Q4_0, {.block_size = 32 , .type_size = 18 , .is_quantized = true }},
131
+ {GGML_TYPE_Q4_1, {.block_size = 32 , .type_size = 20 , .is_quantized = true }},
132
+ {GGML_TYPE_Q5_0, {.block_size = 32 , .type_size = 22 , .is_quantized = true }},
133
+ {GGML_TYPE_Q5_1, {.block_size = 32 , .type_size = 24 , .is_quantized = true }},
134
+ {GGML_TYPE_Q8_0, {.block_size = 32 , .type_size = 34 , .is_quantized = true }},
135
+ {GGML_TYPE_Q8_1, {.block_size = 32 , .type_size = 36 , .is_quantized = true }},
136
+ {GGML_TYPE_Q2_K,
137
+ {.block_size = 256 , .type_size = 84 , .is_quantized = true }},
138
+ {GGML_TYPE_Q3_K,
139
+ {.block_size = 256 , .type_size = 110 , .is_quantized = true }},
140
+ {GGML_TYPE_Q4_K,
141
+ {.block_size = 256 , .type_size = 144 , .is_quantized = true }},
142
+ {GGML_TYPE_Q5_K,
143
+ {.block_size = 256 , .type_size = 176 , .is_quantized = true }},
144
+ {GGML_TYPE_Q6_K,
145
+ {.block_size = 256 , .type_size = 210 , .is_quantized = true }},
146
+ {GGML_TYPE_Q8_K,
147
+ {.block_size = 256 , .type_size = 292 , .is_quantized = true }},
148
+ {GGML_TYPE_IQ2_XXS,
149
+ {.block_size = 256 , .type_size = 66 , .is_quantized = true }},
150
+ {GGML_TYPE_IQ2_XS,
151
+ {.block_size = 256 , .type_size = 74 , .is_quantized = true }},
152
+ {GGML_TYPE_IQ3_XXS,
153
+ {.block_size = 256 , .type_size = 98 , .is_quantized = true }},
154
+ {GGML_TYPE_IQ1_S,
155
+ {.block_size = 256 , .type_size = 50 , .is_quantized = true }},
156
+ {GGML_TYPE_IQ4_NL,
157
+ {.block_size = 32 , .type_size = 18 , .is_quantized = true }},
158
+ {GGML_TYPE_IQ3_S,
159
+ {.block_size = 256 , .type_size = 110 , .is_quantized = true }},
160
+ {GGML_TYPE_IQ2_S,
161
+ {.block_size = 256 , .type_size = 82 , .is_quantized = true }},
162
+ {GGML_TYPE_IQ4_XS,
163
+ {.block_size = 256 , .type_size = 136 , .is_quantized = true }},
164
+ {GGML_TYPE_I8, {.block_size = 1 , .type_size = 1 }},
165
+ {GGML_TYPE_I16, {.block_size = 1 , .type_size = 2 }},
166
+ {GGML_TYPE_I32, {.block_size = 1 , .type_size = 4 }},
167
+ {GGML_TYPE_I64, {.block_size = 1 , .type_size = 8 }},
168
+ {GGML_TYPE_F64, {.block_size = 1 , .type_size = 8 }},
169
+ {GGML_TYPE_IQ1_M,
170
+ {.block_size = 256 , .type_size = 56 , .is_quantized = true }},
171
+ {GGML_TYPE_BF16, {.block_size = 1 , .type_size = 2 }},
172
+ {GGML_TYPE_Q4_0_4_4,
173
+ {.block_size = 32 , .type_size = 18 , .is_quantized = true }},
174
+ {GGML_TYPE_Q4_0_4_8,
175
+ {.block_size = 32 , .type_size = 18 , .is_quantized = true }},
176
+ {GGML_TYPE_Q4_0_8_8,
177
+ {.block_size = 32 , .type_size = 18 , .is_quantized = true }},
178
+ {GGML_TYPE_TQ1_0,
179
+ {.block_size = 256 , .type_size = 54 , .is_quantized = true }},
180
+ {GGML_TYPE_TQ2_0,
181
+ {.block_size = 256 , .type_size = 66 , .is_quantized = true }},
182
+ };
183
+
184
+ } // namespace hardware
0 commit comments