Skip to content

Commit d6a1e18

Browse files
authored
convert : move rope_parameters to TextModel class (ggml-org#18061)
* make sure to search text_config for rope parameters * move rope_parameters to TextModel class
1 parent c45f89d commit d6a1e18

File tree

1 file changed

+9
-8
lines changed

1 file changed

+9
-8
lines changed

convert_hf_to_gguf.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -136,19 +136,11 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path,
136136
self.remote_hf_model_id = remote_hf_model_id
137137
self.sentence_transformers_dense_modules = sentence_transformers_dense_modules
138138
self.hparams = ModelBase.load_hparams(self.dir_model, self.is_mistral_format) if hparams is None else hparams
139-
self.rope_parameters = self.hparams.get("rope_parameters", self.hparams.get("rope_scaling")) or {}
140139
self.model_tensors = self.index_tensors(remote_hf_model_id=remote_hf_model_id)
141140
self.metadata_override = metadata_override
142141
self.model_name = model_name
143142
self.dir_model_card = dir_model # overridden in convert_lora_to_gguf.py
144143

145-
# Ensure "rope_theta" and "rope_type" is mirrored in rope_parameters
146-
if "full_attention" not in self.rope_parameters and "sliding_attention" not in self.rope_parameters:
147-
if "rope_theta" not in self.rope_parameters and (rope_theta := self.find_hparam(["rope_theta", "global_rope_theta", "rotary_emb_base"], optional=True)) is not None:
148-
self.rope_parameters["rope_theta"] = rope_theta
149-
if "rope_type" not in self.rope_parameters and (rope_type := self.rope_parameters.get("type")) is not None:
150-
self.rope_parameters["rope_type"] = rope_type
151-
152144
# Apply heuristics to figure out typical tensor encoding based on first layer tensor encoding type
153145
if self.ftype == gguf.LlamaFileType.GUESSED:
154146
# NOTE: can't use field "torch_dtype" in config.json, because some finetunes lie.
@@ -765,6 +757,15 @@ def __init__(self, *args, **kwargs):
765757
self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer", "num_layers"])
766758
self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
767759

760+
self.rope_parameters = self.hparams.get("rope_parameters", self.hparams.get("rope_scaling")) or {}
761+
762+
# Ensure "rope_theta" and "rope_type" is mirrored in rope_parameters
763+
if "full_attention" not in self.rope_parameters and "sliding_attention" not in self.rope_parameters:
764+
if "rope_theta" not in self.rope_parameters and (rope_theta := self.find_hparam(["rope_theta", "global_rope_theta", "rotary_emb_base"], optional=True)) is not None:
765+
self.rope_parameters["rope_theta"] = rope_theta
766+
if "rope_type" not in self.rope_parameters and (rope_type := self.rope_parameters.get("type")) is not None:
767+
self.rope_parameters["rope_type"] = rope_type
768+
768769
@classmethod
769770
def __init_subclass__(cls):
770771
# can't use an abstract property, because overriding it without type errors

0 commit comments

Comments
 (0)