Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 9685dcb

Browse files
chore: refactor ggml.h
1 parent c27bb18 commit 9685dcb

File tree

4 files changed

+193
-179
lines changed

4 files changed

+193
-179
lines changed

engine/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ file(APPEND "${CMAKE_CURRENT_BINARY_DIR}/cortex_openapi.h"
169169

170170
add_executable(${TARGET_NAME} main.cc
171171
${CMAKE_CURRENT_SOURCE_DIR}/utils/cpuid/cpu_info.cc
172+
${CMAKE_CURRENT_SOURCE_DIR}/utils/hardware/gguf/ggml.cc
172173
${CMAKE_CURRENT_SOURCE_DIR}/utils/file_logger.cc
173174

174175
${CMAKE_CURRENT_SOURCE_DIR}/extensions/template_renderer.cc

engine/cli/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ find_package(lfreist-hwinfo CONFIG REQUIRED)
7474

7575
add_executable(${TARGET_NAME} main.cc
7676
${CMAKE_CURRENT_SOURCE_DIR}/../utils/cpuid/cpu_info.cc
77+
${CMAKE_CURRENT_SOURCE_DIR}/../utils/hardware/gguf/ggml.cc
7778
${CMAKE_CURRENT_SOURCE_DIR}/../utils/normalize_engine.cc
7879
${CMAKE_CURRENT_SOURCE_DIR}/../utils/file_logger.cc
7980
${CMAKE_CURRENT_SOURCE_DIR}/../utils/dylib_path_manager.cc

engine/utils/hardware/gguf/ggml.cc

Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
#include "ggml.h"
2+
3+
namespace hardware {
4+
5+
float GetQuantBit(GGMLType gt) {
6+
switch (gt) {
7+
case GGML_TYPE_I32:
8+
case GGML_TYPE_F32:
9+
return 32.0f;
10+
case GGML_TYPE_I16:
11+
case GGML_TYPE_BF16:
12+
case GGML_TYPE_F16:
13+
return 16.0f;
14+
case GGML_TYPE_IQ2_S:
15+
case GGML_TYPE_IQ2_XXS:
16+
case GGML_TYPE_IQ2_XS:
17+
return 2.31f;
18+
case GGML_TYPE_Q2_K:
19+
return 2.5625f;
20+
case GGML_TYPE_IQ3_XXS:
21+
case GGML_TYPE_IQ3_S:
22+
case GGML_TYPE_Q3_K:
23+
return 3.4375f;
24+
case GGML_TYPE_Q4_0_4_4:
25+
case GGML_TYPE_Q4_0_4_8:
26+
case GGML_TYPE_Q4_0_8_8:
27+
case GGML_TYPE_IQ4_NL:
28+
case GGML_TYPE_IQ4_XS:
29+
case GGML_TYPE_Q4_0:
30+
case GGML_TYPE_Q4_1:
31+
case GGML_TYPE_Q4_K:
32+
return 4.5f;
33+
case GGML_TYPE_Q5_0:
34+
case GGML_TYPE_Q5_1:
35+
case GGML_TYPE_Q5_K:
36+
return 5.5f;
37+
case GGML_TYPE_Q6_K:
38+
return 6.5625f;
39+
case GGML_TYPE_I8:
40+
case GGML_TYPE_Q8_0:
41+
case GGML_TYPE_Q8_1:
42+
case GGML_TYPE_Q8_K:
43+
return 8.0f;
44+
case GGML_TYPE_I64:
45+
case GGML_TYPE_F64:
46+
return 64.0f;
47+
default:
48+
return 8.0f;
49+
}
50+
}
51+
52+
std::string to_string(GGMLType t) {
53+
switch (t) {
54+
case GGML_TYPE_F32:
55+
return "F32";
56+
case GGML_TYPE_F16:
57+
return "F16";
58+
case GGML_TYPE_Q4_0:
59+
return "Q4_0";
60+
case GGML_TYPE_Q4_1:
61+
return "Q4_1";
62+
case GGML_TYPE_Q5_0:
63+
return "Q5_0";
64+
case GGML_TYPE_Q5_1:
65+
return "Q5_1";
66+
case GGML_TYPE_Q8_0:
67+
return "Q8_0";
68+
case GGML_TYPE_Q8_1:
69+
return "Q8_1";
70+
case GGML_TYPE_Q2_K:
71+
return "Q2_K";
72+
case GGML_TYPE_Q3_K:
73+
return "Q3_K";
74+
case GGML_TYPE_Q4_K:
75+
return "Q4_K";
76+
case GGML_TYPE_Q5_K:
77+
return "Q5_K";
78+
case GGML_TYPE_Q6_K:
79+
return "Q6_K";
80+
case GGML_TYPE_Q8_K:
81+
return "Q8_K";
82+
case GGML_TYPE_IQ2_XXS:
83+
return "IQ2_XXS";
84+
case GGML_TYPE_IQ2_XS:
85+
return "IQ2_XS";
86+
case GGML_TYPE_IQ3_XXS:
87+
return "IQ3_XXS";
88+
case GGML_TYPE_IQ1_S:
89+
return "IQ1_S";
90+
case GGML_TYPE_IQ4_NL:
91+
return "IQ4_NL";
92+
case GGML_TYPE_IQ3_S:
93+
return "IQ3_S";
94+
case GGML_TYPE_IQ2_S:
95+
return "IQ2_S";
96+
case GGML_TYPE_IQ4_XS:
97+
return "IQ4_XS";
98+
case GGML_TYPE_I8:
99+
return "I8";
100+
case GGML_TYPE_I16:
101+
return "I16";
102+
case GGML_TYPE_I32:
103+
return "I32";
104+
case GGML_TYPE_I64:
105+
return "I64";
106+
case GGML_TYPE_F64:
107+
return "F64";
108+
case GGML_TYPE_IQ1_M:
109+
return "IQ1_M";
110+
case GGML_TYPE_BF16:
111+
return "BF16";
112+
case GGML_TYPE_Q4_0_4_4:
113+
return "Q4_0_4_4";
114+
case GGML_TYPE_Q4_0_4_8:
115+
return "Q4_0_4_8";
116+
case GGML_TYPE_Q4_0_8_8:
117+
return "Q4_0_8_8";
118+
case GGML_TYPE_TQ1_0:
119+
return "TQ1_0";
120+
case GGML_TYPE_TQ2_0:
121+
return "TQ2_0";
122+
default:
123+
return "Invalid";
124+
}
125+
}
126+
127+
const std::unordered_map<GGMLType, GGMLTypeTrait> kGGMLTypeTraits = {
128+
{GGML_TYPE_F32, {.block_size = 1, .type_size = 4}},
129+
{GGML_TYPE_F16, {.block_size = 1, .type_size = 2}},
130+
{GGML_TYPE_Q4_0, {.block_size = 32, .type_size = 18, .is_quantized = true}},
131+
{GGML_TYPE_Q4_1, {.block_size = 32, .type_size = 20, .is_quantized = true}},
132+
{GGML_TYPE_Q5_0, {.block_size = 32, .type_size = 22, .is_quantized = true}},
133+
{GGML_TYPE_Q5_1, {.block_size = 32, .type_size = 24, .is_quantized = true}},
134+
{GGML_TYPE_Q8_0, {.block_size = 32, .type_size = 34, .is_quantized = true}},
135+
{GGML_TYPE_Q8_1, {.block_size = 32, .type_size = 36, .is_quantized = true}},
136+
{GGML_TYPE_Q2_K,
137+
{.block_size = 256, .type_size = 84, .is_quantized = true}},
138+
{GGML_TYPE_Q3_K,
139+
{.block_size = 256, .type_size = 110, .is_quantized = true}},
140+
{GGML_TYPE_Q4_K,
141+
{.block_size = 256, .type_size = 144, .is_quantized = true}},
142+
{GGML_TYPE_Q5_K,
143+
{.block_size = 256, .type_size = 176, .is_quantized = true}},
144+
{GGML_TYPE_Q6_K,
145+
{.block_size = 256, .type_size = 210, .is_quantized = true}},
146+
{GGML_TYPE_Q8_K,
147+
{.block_size = 256, .type_size = 292, .is_quantized = true}},
148+
{GGML_TYPE_IQ2_XXS,
149+
{.block_size = 256, .type_size = 66, .is_quantized = true}},
150+
{GGML_TYPE_IQ2_XS,
151+
{.block_size = 256, .type_size = 74, .is_quantized = true}},
152+
{GGML_TYPE_IQ3_XXS,
153+
{.block_size = 256, .type_size = 98, .is_quantized = true}},
154+
{GGML_TYPE_IQ1_S,
155+
{.block_size = 256, .type_size = 50, .is_quantized = true}},
156+
{GGML_TYPE_IQ4_NL,
157+
{.block_size = 32, .type_size = 18, .is_quantized = true}},
158+
{GGML_TYPE_IQ3_S,
159+
{.block_size = 256, .type_size = 110, .is_quantized = true}},
160+
{GGML_TYPE_IQ2_S,
161+
{.block_size = 256, .type_size = 82, .is_quantized = true}},
162+
{GGML_TYPE_IQ4_XS,
163+
{.block_size = 256, .type_size = 136, .is_quantized = true}},
164+
{GGML_TYPE_I8, {.block_size = 1, .type_size = 1}},
165+
{GGML_TYPE_I16, {.block_size = 1, .type_size = 2}},
166+
{GGML_TYPE_I32, {.block_size = 1, .type_size = 4}},
167+
{GGML_TYPE_I64, {.block_size = 1, .type_size = 8}},
168+
{GGML_TYPE_F64, {.block_size = 1, .type_size = 8}},
169+
{GGML_TYPE_IQ1_M,
170+
{.block_size = 256, .type_size = 56, .is_quantized = true}},
171+
{GGML_TYPE_BF16, {.block_size = 1, .type_size = 2}},
172+
{GGML_TYPE_Q4_0_4_4,
173+
{.block_size = 32, .type_size = 18, .is_quantized = true}},
174+
{GGML_TYPE_Q4_0_4_8,
175+
{.block_size = 32, .type_size = 18, .is_quantized = true}},
176+
{GGML_TYPE_Q4_0_8_8,
177+
{.block_size = 32, .type_size = 18, .is_quantized = true}},
178+
{GGML_TYPE_TQ1_0,
179+
{.block_size = 256, .type_size = 54, .is_quantized = true}},
180+
{GGML_TYPE_TQ2_0,
181+
{.block_size = 256, .type_size = 66, .is_quantized = true}},
182+
};
183+
184+
} // namespace hardware

0 commit comments

Comments
 (0)