Skip to content

Commit 0dc2504

Browse files
committed
add back dummy imatrax & run clang-format
1 parent 1c869e5 commit 0dc2504

File tree

1 file changed

+16
-12
lines changed

1 file changed

+16
-12
lines changed

model.cpp

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -111,11 +111,11 @@ const char* unused_tensors[] = {
111111
"embedding_manager",
112112
"denoiser.sigmas",
113113
"text_encoders.t5xxl.transformer.encoder.embed_tokens.weight", // only used during training
114-
"text_encoders.t5xxl.logit_scale", // only used during training
114+
"text_encoders.t5xxl.logit_scale", // only used during training
115115
"text_encoders.t5xxl.transformer.scaled_fp8",
116116
"text_encoders.qwen2vl.output.weight",
117117
"text_encoders.qwen2vl.lm_head.",
118-
"text_encoders.qwen2vl.logit_scale", // only used during training
118+
"text_encoders.qwen2vl.logit_scale", // only used during training
119119
"text_encoders.qwen2vl.transformer.scaled_fp8",
120120
};
121121

@@ -819,8 +819,8 @@ float f8_e4m3fn_to_f32(uint8_t f8) {
819819
}
820820

821821
const uint32_t exponent_bias_delta = 127 - 7;
822-
uint32_t exponent = ((f8 >> 3) & 15) + exponent_bias_delta;
823-
uint32_t mantissa = f8 & 7;
822+
uint32_t exponent = ((f8 >> 3) & 15) + exponent_bias_delta;
823+
uint32_t mantissa = f8 & 7;
824824

825825
// subnormal
826826
if (exponent == exponent_bias_delta) {
@@ -840,7 +840,7 @@ float f8_e4m3fn_to_f32(uint8_t f8) {
840840
mantissa <<= 1;
841841
}
842842

843-
const uint32_t sign = f8 >> 7;
843+
const uint32_t sign = f8 >> 7;
844844
const uint32_t result = (sign << 31) | (exponent << 23) | (mantissa << 20);
845845
return *reinterpret_cast<const float*>(&result);
846846
}
@@ -890,7 +890,7 @@ void convert_tensor(void* src,
890890
ptr = static_cast<float*>(dst);
891891
}
892892

893-
switch(src_type) {
893+
switch (src_type) {
894894
case GGML_TYPE_F64:
895895
for (int i = 0; i < n; i++) {
896896
ptr[i] = static_cast<double*>(src)[i];
@@ -928,17 +928,17 @@ void convert_tensor(void* src,
928928
ggml_type_name(src_type)));
929929
}
930930
if (ptr == src) {
931-
buffer.resize(n);
932-
ptr = buffer.data();
931+
buffer.resize(n);
932+
ptr = buffer.data();
933933
}
934934
qtype->to_float(src, ptr, n);
935935
break;
936-
}
936+
}
937937
}
938938

939939
// convert f32 to dst_type
940940
// int dst types are forbidden as they can overflow
941-
switch(dst_type) {
941+
switch (dst_type) {
942942
case GGML_TYPE_F64:
943943
for (int i = n - 1; i >= 0; i--) {
944944
static_cast<double*>(dst)[i] = ptr[i];
@@ -949,6 +949,10 @@ void convert_tensor(void* src,
949949
break;
950950
default:
951951
GGML_ASSERT(ptr != dst);
952+
std::vector<float> imatrix(n_per_row, 1.0f); // dummy importance matrix
953+
const float* im = imatrix.data();
954+
ggml_quantize_chunk(dst_type, (float*)src, dst, 0, nrows, n_per_row, im);
955+
952956
ggml_quantize_chunk(dst_type, ptr, dst, 0, nrows, n_per_row, nullptr);
953957
break;
954958
}
@@ -2112,7 +2116,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
21122116
scale_count[tensor->name]--;
21132117
}
21142118
}
2115-
for(auto& x : scale_count) {
2119+
for (auto& x : scale_count) {
21162120
if (x.second > 0) {
21172121
LOG_ERROR("f8 weight not found for scale_weight: '%s'", x.first.c_str());
21182122
return false;
@@ -2187,7 +2191,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
21872191
continue;
21882192
}
21892193

2190-
ggml_tensor* dst_tensor = nullptr;
2194+
ggml_tensor* dst_tensor = nullptr;
21912195

21922196
t0 = ggml_time_ms();
21932197

0 commit comments

Comments
 (0)