@@ -5,11 +5,20 @@ llm_build_glm4_moe::llm_build_glm4_moe(const llama_model & model, const llm_grap
55
66 GGML_ASSERT (n_embd_head == hparams.n_embd_head_k );
77
8+ int sections[4 ];
9+ std::copy (std::begin (hparams.rope_sections ), std::begin (hparams.rope_sections ) + 4 , sections);
10+
811 ggml_tensor * cur;
912 ggml_tensor * inpL;
1013
1114 inpL = build_inp_embd (model.tok_embd );
1215
16+ bool use_mrope = hparams.use_mrope ();
17+ if (ubatch.embd && !use_mrope) {
18+ // unfortunately, we need to forcefully stop here, to avoid users complaining about wrong results
19+ GGML_ABORT (" This GGUF does not support multimodal. Please reconvert it." );
20+ }
21+
1322 // inp_pos - contains the positions
1423 ggml_tensor * inp_pos = build_inp_pos ();
1524
@@ -60,17 +69,25 @@ llm_build_glm4_moe::llm_build_glm4_moe(const llama_model & model, const llm_grap
6069 Kcur = build_norm (Kcur, model.layers [il].attn_k_norm , NULL , LLM_NORM_RMS, il);
6170 cb (Kcur, " Kcur_normed" , il);
6271 }
63- Qcur = ggml_rope_ext (
64- ctx0, Qcur, inp_pos, nullptr ,
65- n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
66- ext_factor, attn_factor, beta_fast, beta_slow
67- );
68-
69- Kcur = ggml_rope_ext (
70- ctx0, Kcur, inp_pos, nullptr ,
71- n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
72- ext_factor, attn_factor, beta_fast, beta_slow
73- );
72+
73+ if (use_mrope) {
74+ Qcur = ggml_rope_multi (ctx0, Qcur, inp_pos, nullptr ,
75+ n_rot, sections, rope_type, n_ctx_orig, freq_base, freq_scale,
76+ ext_factor, attn_factor, beta_fast, beta_slow);
77+
78+ Kcur = ggml_rope_multi (ctx0, Kcur, inp_pos, nullptr ,
79+ n_rot, sections, rope_type, n_ctx_orig, freq_base, freq_scale,
80+ ext_factor, attn_factor, beta_fast, beta_slow);
81+ } else {
82+ // Normal RoPE
83+ Qcur = ggml_rope_ext (ctx0, Qcur, inp_pos, nullptr , n_rot,
84+ rope_type, n_ctx_orig, freq_base, freq_scale,
85+ ext_factor, attn_factor, beta_fast, beta_slow);
86+
87+ Kcur = ggml_rope_ext (ctx0, Kcur, inp_pos, nullptr , n_rot,
88+ rope_type, n_ctx_orig, freq_base, freq_scale,
89+ ext_factor, attn_factor, beta_fast, beta_slow);
90+ }
7491
7592 cb (Qcur, " Qcur" , il);
7693 cb (Kcur, " Kcur" , il);
0 commit comments