@@ -136,19 +136,11 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
136136 self .remote_hf_model_id = remote_hf_model_id
137137 self .sentence_transformers_dense_modules = sentence_transformers_dense_modules
138138 self .hparams = ModelBase .load_hparams (self .dir_model , self .is_mistral_format ) if hparams is None else hparams
139- self .rope_parameters = self .hparams .get ("rope_parameters" , self .hparams .get ("rope_scaling" )) or {}
140139 self .model_tensors = self .index_tensors (remote_hf_model_id = remote_hf_model_id )
141140 self .metadata_override = metadata_override
142141 self .model_name = model_name
143142 self .dir_model_card = dir_model # overridden in convert_lora_to_gguf.py
144143
145- # Ensure "rope_theta" and "rope_type" is mirrored in rope_parameters
146- if "full_attention" not in self .rope_parameters and "sliding_attention" not in self .rope_parameters :
147- if "rope_theta" not in self .rope_parameters and (rope_theta := self .find_hparam (["rope_theta" , "global_rope_theta" , "rotary_emb_base" ], optional = True )) is not None :
148- self .rope_parameters ["rope_theta" ] = rope_theta
149- if "rope_type" not in self .rope_parameters and (rope_type := self .rope_parameters .get ("type" )) is not None :
150- self .rope_parameters ["rope_type" ] = rope_type
151-
152144 # Apply heuristics to figure out typical tensor encoding based on first layer tensor encoding type
153145 if self .ftype == gguf .LlamaFileType .GUESSED :
154146 # NOTE: can't use field "torch_dtype" in config.json, because some finetunes lie.
@@ -765,6 +757,15 @@ def __init__(self, *args, **kwargs):
765757 self .block_count = self .find_hparam (["n_layers" , "num_hidden_layers" , "n_layer" , "num_layers" ])
766758 self .tensor_map = gguf .get_tensor_name_map (self .model_arch , self .block_count )
767759
760+ self .rope_parameters = self .hparams .get ("rope_parameters" , self .hparams .get ("rope_scaling" )) or {}
761+
762+ # Ensure "rope_theta" and "rope_type" is mirrored in rope_parameters
763+ if "full_attention" not in self .rope_parameters and "sliding_attention" not in self .rope_parameters :
764+ if "rope_theta" not in self .rope_parameters and (rope_theta := self .find_hparam (["rope_theta" , "global_rope_theta" , "rotary_emb_base" ], optional = True )) is not None :
765+ self .rope_parameters ["rope_theta" ] = rope_theta
766+ if "rope_type" not in self .rope_parameters and (rope_type := self .rope_parameters .get ("type" )) is not None :
767+ self .rope_parameters ["rope_type" ] = rope_type
768+
768769 @classmethod
769770 def __init_subclass__ (cls ):
770771 # can't use an abstract property, because overriding it without type errors
0 commit comments