@@ -111,11 +111,11 @@ const char* unused_tensors[] = {
111111 " embedding_manager" ,
112112 " denoiser.sigmas" ,
113113 " text_encoders.t5xxl.transformer.encoder.embed_tokens.weight" , // only used during training
114- " text_encoders.t5xxl.logit_scale" , // only used during training
114+ " text_encoders.t5xxl.logit_scale" , // only used during training
115115 " text_encoders.t5xxl.transformer.scaled_fp8" ,
116116 " text_encoders.qwen2vl.output.weight" ,
117117 " text_encoders.qwen2vl.lm_head." ,
118- " text_encoders.qwen2vl.logit_scale" , // only used during training
118+ " text_encoders.qwen2vl.logit_scale" , // only used during training
119119 " text_encoders.qwen2vl.transformer.scaled_fp8" ,
120120};
121121
@@ -819,8 +819,8 @@ float f8_e4m3fn_to_f32(uint8_t f8) {
819819 }
820820
821821 const uint32_t exponent_bias_delta = 127 - 7 ;
822- uint32_t exponent = ((f8 >> 3 ) & 15 ) + exponent_bias_delta;
823- uint32_t mantissa = f8 & 7 ;
822+ uint32_t exponent = ((f8 >> 3 ) & 15 ) + exponent_bias_delta;
823+ uint32_t mantissa = f8 & 7 ;
824824
825825 // subnormal
826826 if (exponent == exponent_bias_delta) {
@@ -840,7 +840,7 @@ float f8_e4m3fn_to_f32(uint8_t f8) {
840840 mantissa <<= 1 ;
841841 }
842842
843- const uint32_t sign = f8 >> 7 ;
843+ const uint32_t sign = f8 >> 7 ;
844844 const uint32_t result = (sign << 31 ) | (exponent << 23 ) | (mantissa << 20 );
845845 return *reinterpret_cast <const float *>(&result);
846846}
@@ -890,7 +890,7 @@ void convert_tensor(void* src,
890890 ptr = static_cast <float *>(dst);
891891 }
892892
893- switch (src_type) {
893+ switch (src_type) {
894894 case GGML_TYPE_F64:
895895 for (int i = 0 ; i < n; i++) {
896896 ptr[i] = static_cast <double *>(src)[i];
@@ -928,17 +928,17 @@ void convert_tensor(void* src,
928928 ggml_type_name (src_type)));
929929 }
930930 if (ptr == src) {
931- buffer.resize (n);
932- ptr = buffer.data ();
931+ buffer.resize (n);
932+ ptr = buffer.data ();
933933 }
934934 qtype->to_float (src, ptr, n);
935935 break ;
936- }
936+ }
937937 }
938938
939939 // convert f32 to dst_type
940940 // int dst types are forbidden as they can overflow
941- switch (dst_type) {
941+ switch (dst_type) {
942942 case GGML_TYPE_F64:
943943 for (int i = n - 1 ; i >= 0 ; i--) {
944944 static_cast <double *>(dst)[i] = ptr[i];
@@ -949,6 +949,10 @@ void convert_tensor(void* src,
949949 break ;
950950 default :
951951 GGML_ASSERT (ptr != dst);
952+ std::vector<float > imatrix (n_per_row, 1 .0f ); // dummy importance matrix
953+ const float * im = imatrix.data ();
954+ ggml_quantize_chunk (dst_type, (float *)src, dst, 0 , nrows, n_per_row, im);
955+
952956 ggml_quantize_chunk (dst_type, ptr, dst, 0 , nrows, n_per_row, nullptr );
953957 break ;
954958 }
@@ -2112,7 +2116,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
21122116 scale_count[tensor->name ]--;
21132117 }
21142118 }
2115- for (auto & x : scale_count) {
2119+ for (auto & x : scale_count) {
21162120 if (x.second > 0 ) {
21172121 LOG_ERROR (" f8 weight not found for scale_weight: '%s'" , x.first .c_str ());
21182122 return false ;
@@ -2187,7 +2191,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb, int n_thread
21872191 continue ;
21882192 }
21892193
2190- ggml_tensor* dst_tensor = nullptr ;
2194+ ggml_tensor* dst_tensor = nullptr ;
21912195
21922196 t0 = ggml_time_ms ();
21932197
0 commit comments