Skip to content

Commit dd66aba

Browse files
committed
process mrope_section on TextModel base class
1 parent b81c03c commit dd66aba

File tree

1 file changed

+10
-53
lines changed

1 file changed

+10
-53
lines changed

convert_hf_to_gguf.py

Lines changed: 10 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -861,6 +861,14 @@ def set_gguf_parameters(self):
861861
logger.warning(f"Unknown RoPE type: {rope_type}")
862862
logger.info(f"gguf: rope scaling type = {rope_gguf_type.name}")
863863

864+
if "mrope_section" in self.rope_parameters:
865+
mrope_section = self.rope_parameters["mrope_section"]
866+
# Pad to 4 dimensions [time, height, width, extra]
867+
while len(mrope_section) < 4:
868+
mrope_section.append(0)
869+
self.gguf_writer.add_rope_dimension_sections(mrope_section[:4])
870+
logger.info(f"gguf: mrope sections: {mrope_section[:4]}")
871+
864872
if (rope_theta := rope_params.get("rope_theta")) is not None:
865873
self.gguf_writer.add_rope_freq_base(rope_theta)
866874
logger.info(f"gguf: rope theta = {rope_theta}")
@@ -3738,9 +3746,6 @@ class Qwen2VLModel(TextModel):
37383746

37393747
def set_gguf_parameters(self):
37403748
super().set_gguf_parameters()
3741-
mrope_section = self.hparams["rope_scaling"]["mrope_section"]
3742-
mrope_section += [0] * max(0, 4 - len(mrope_section))
3743-
self.gguf_writer.add_rope_dimension_sections(mrope_section)
37443749

37453750
def set_vocab(self):
37463751
try:
@@ -4408,20 +4413,6 @@ def set_gguf_parameters(self):
44084413
super().set_gguf_parameters()
44094414

44104415
# Handle MRoPE (Multi-axis Rotary Position Embedding) for Qwen3-VL
4411-
text_config = self.hparams.get("text_config", {})
4412-
# rope_scaling is deprecated in V5, use rope_parameters instead
4413-
rope_scaling = text_config.get("rope_scaling") or text_config.get("rope_parameters") or {}
4414-
4415-
if rope_scaling.get("mrope_section"):
4416-
# mrope_section contains [time, height, width] dimensions
4417-
mrope_section = rope_scaling["mrope_section"]
4418-
# Pad to 4 dimensions [time, height, width, extra]
4419-
while len(mrope_section) < 4:
4420-
mrope_section.append(0)
4421-
self.gguf_writer.add_rope_dimension_sections(mrope_section[:4])
4422-
4423-
logger.info(f"MRoPE sections: {mrope_section[:4]}")
4424-
44254416
vision_config = self.hparams.get("vision_config", {})
44264417
deepstack_layer_num = len(vision_config.get("deepstack_visual_indexes", []))
44274418
self.gguf_writer.add_num_deepstack_layers(deepstack_layer_num)
@@ -4440,22 +4431,6 @@ class Qwen3VLMoeTextModel(Qwen3MoeModel):
44404431

44414432
def set_gguf_parameters(self):
44424433
super().set_gguf_parameters()
4443-
4444-
# Handle MRoPE (Multi-axis Rotary Position Embedding) for Qwen3-VL
4445-
text_config = self.hparams.get("text_config", {})
4446-
# rope_scaling is deprecated in V5, use rope_parameters instead
4447-
rope_scaling = text_config.get("rope_scaling") or text_config.get("rope_parameters") or {}
4448-
4449-
if rope_scaling.get("mrope_section"):
4450-
# mrope_section contains [time, height, width] dimensions
4451-
mrope_section = rope_scaling["mrope_section"]
4452-
# Pad to 4 dimensions [time, height, width, extra]
4453-
while len(mrope_section) < 4:
4454-
mrope_section.append(0)
4455-
self.gguf_writer.add_rope_dimension_sections(mrope_section[:4])
4456-
4457-
logger.info(f"MRoPE sections: {mrope_section[:4]}")
4458-
44594434
vision_config = self.hparams.get("vision_config", {})
44604435
deepstack_layer_num = len(vision_config.get("deepstack_visual_indexes", []))
44614436
self.gguf_writer.add_num_deepstack_layers(deepstack_layer_num)
@@ -7826,7 +7801,7 @@ def __init__(self, *args, **kwargs):
78267801
self.partial_rotary_factor = self.rope_parameters.get("partial_rotary_factor", 0.5)
78277802
if "mrope_section" in self.rope_parameters:
78287803
self.use_mrope = True
7829-
logger.info("Using M-RoPE")
7804+
logger.info("Q/K weight will need to be permuted for M-RoPE")
78307805

78317806
def set_vocab(self):
78327807
from transformers import AutoTokenizer
@@ -7849,14 +7824,6 @@ def set_gguf_parameters(self):
78497824
if (rope_dim := self.hparams.get("head_dim")) is None:
78507825
rope_dim = self.hparams["hidden_size"] // self.hparams["num_attention_heads"]
78517826
self.gguf_writer.add_rope_dimension_count(int(rope_dim * self.partial_rotary_factor))
7852-
# handle M-RoPE, the same as Qwen-VL
7853-
if self.use_mrope:
7854-
mrope_section = self.rope_parameters["mrope_section"]
7855-
# Pad to 4 dimensions [time, height, width, extra]
7856-
while len(mrope_section) < 4:
7857-
mrope_section.append(0)
7858-
self.gguf_writer.add_rope_dimension_sections(mrope_section[:4])
7859-
logger.info(f"MRoPE sections: {mrope_section[:4]}")
78607827

78617828
@staticmethod
78627829
def normal_to_neox(weights: Tensor, n_head: int, n_head_kv: int, head_dim: int, partial_rotary_factor: float) -> Tensor:
@@ -7963,19 +7930,9 @@ def set_gguf_parameters(self):
79637930
if (num_nextn_predict_layers := self.hparams.get("num_nextn_predict_layers")) is not None:
79647931
self.gguf_writer.add_nextn_predict_layers(num_nextn_predict_layers)
79657932

7966-
# handle M-RoPE, the same as Qwen-VL
7967-
# note: unlike GLM4 non-MoE, we don't need to permute the weights here since GLM4_MOE uses Neox ordering already
7968-
rope_scaling = self.hparams.get("rope_scaling") or self.hparams.get("rope_parameters") or {}
7969-
if "mrope_section" in rope_scaling:
7970-
mrope_section = rope_scaling["mrope_section"]
7971-
# Pad to 4 dimensions [time, height, width, extra]
7972-
while len(mrope_section) < 4:
7973-
mrope_section.append(0)
7974-
self.gguf_writer.add_rope_dimension_sections(mrope_section[:4])
7975-
logger.info(f"MRoPE sections: {mrope_section[:4]}")
7976-
79777933
_experts: list[dict[str, Tensor]] | None = None
79787934

7935+
# note: unlike GLM4V non-MoE, we don't need to permute Q/K here since GLM4V_MOE uses Neox ordering already
79797936
def modify_tensors(
79807937
self, data_torch: Tensor, name: str, bid: int | None
79817938
) -> Iterable[tuple[str, Tensor]]:

0 commit comments

Comments
 (0)