Skip to content

Commit 2016284

Browse files
FanShupeiarthw
authored andcommitted
ggml-cpu: support IQ4_NL_4_4 by runtime repack (ggml-org#10541)
* ggml-cpu: support IQ4_NL_4_4 by runtime repack * ggml-cpu: add __ARM_FEATURE_DOTPROD guard
1 parent e6f1bc2 commit 2016284

File tree

8 files changed

+352
-19
lines changed

8 files changed

+352
-19
lines changed

ggml/include/ggml-cpu.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ extern "C" {
9191
GGML_BACKEND_API int ggml_cpu_has_neon (void);
9292
GGML_BACKEND_API int ggml_cpu_has_arm_fma (void);
9393
GGML_BACKEND_API int ggml_cpu_has_fp16_va (void);
94+
GGML_BACKEND_API int ggml_cpu_has_dotprod (void);
9495
GGML_BACKEND_API int ggml_cpu_has_matmul_int8(void);
9596
GGML_BACKEND_API int ggml_cpu_has_sve (void);
9697
GGML_BACKEND_API int ggml_cpu_get_sve_cnt (void); // sve vector length in bytes

ggml/include/ggml.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,9 @@ extern "C" {
389389
GGML_TYPE_Q4_0_8_8 = 33,
390390
GGML_TYPE_TQ1_0 = 34,
391391
GGML_TYPE_TQ2_0 = 35,
392+
GGML_TYPE_IQ4_NL_4_4 = 36,
393+
// GGML_TYPE_IQ4_NL_4_8 = 37,
394+
// GGML_TYPE_IQ4_NL_8_8 = 38,
392395
GGML_TYPE_COUNT,
393396
};
394397

ggml/src/ggml-common.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,12 @@ typedef struct {
418418
} block_iq4_xs;
419419
static_assert(sizeof(block_iq4_xs) == sizeof(ggml_half) + sizeof(uint16_t) + QK_K/64 + QK_K/2, "wrong iq4_xs block size/padding");
420420

421+
typedef struct {
422+
ggml_half d[4]; // deltas for 4 iq4_nl blocks
423+
uint8_t qs[QK4_NL * 2];// nibbles / quants for 4 iq4_nl blocks
424+
} block_iq4_nlx4;
425+
static_assert(sizeof(block_iq4_nlx4) == 4 * sizeof(ggml_half) + QK4_NL * 2, "wrong iq4_nlx4 block size/padding");
426+
421427
#endif // GGML_COMMON_DECL
422428
#endif // GGML_COMMON_DECL
423429

0 commit comments

Comments
 (0)