Skip to content

Commit 9453202

Browse files
committed
Add first pass at GraniteMoeHybridForCausalLM support
1 parent 0027c5c commit 9453202

File tree

2 files changed

+100
-0
lines changed

2 files changed

+100
-0
lines changed
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
{
2+
"model_type": "granitemoehybrid",
3+
"architectures": [
4+
"GraniteMoeHybridForCausalLM"
5+
],
6+
"pre_weights": [
7+
{
8+
"name": "model.embed_tokens.weight",
9+
"is_embed": true
10+
}
11+
],
12+
"post_weights": [
13+
{
14+
"name": "model.norm.weight"
15+
},
16+
{
17+
"name": "lm_head.weight",
18+
"is_embed": true,
19+
"optional": true,
20+
"tied_names": [
21+
"model.embed_tokens.weight"
22+
]
23+
}
24+
],
25+
"num_layers_config_key": "num_hidden_layers",
26+
"layer_templates": {
27+
"weights": [
28+
{
29+
"name": "model.layers.${layer_index}.input_layernorm.weight"
30+
},
31+
{
32+
"name": "model.layers.${layer_index}.shared_mlp.input_linear.weight"
33+
},
34+
{
35+
"name": "model.layers.${layer_index}.shared_mlp.output_linear.weight"
36+
},
37+
{
38+
"name": "model.layers.${layer_index}.block_sparse_moe.input_linear.weight"
39+
},
40+
{
41+
"name": "model.layers.${layer_index}.block_sparse_moe.router.layer.weight"
42+
},
43+
{
44+
"name": "model.layers.${layer_index}.block_sparse_moe.output_linear.weight"
45+
},
46+
{
47+
"name": "model.layers.${layer_index}.post_attention_layernorm.weight"
48+
},
49+
{
50+
"name": "model.layers.${layer_index}.self_attn.k_proj.weight",
51+
"optional": true
52+
},
53+
{
54+
"name": "model.layers.${layer_index}.self_attn.o_proj.weight",
55+
"optional": true
56+
},
57+
{
58+
"name": "model.layers.${layer_index}.self_attn.q_proj.weight",
59+
"optional": true
60+
},
61+
{
62+
"name": "model.layers.${layer_index}.self_attn.v_proj.weight",
63+
"optional": true
64+
},
65+
{
66+
"name": "model.layers.${layer_index}.mamba.A_log",
67+
"optional": true
68+
},
69+
{
70+
"name": "model.layers.${layer_index}.mamba.conv1d.bias",
71+
"optional": true
72+
},
73+
{
74+
"name": "model.layers.${layer_index}.mamba.conv1d.weight",
75+
"optional": true
76+
},
77+
{
78+
"name": "model.layers.${layer_index}.mamba.D",
79+
"optional": true
80+
},
81+
{
82+
"name": "model.layers.${layer_index}.mamba.dt_bias",
83+
"optional": true
84+
},
85+
{
86+
"name": "model.layers.${layer_index}.mamba.in_proj.weight",
87+
"optional": true
88+
},
89+
{
90+
"name": "model.layers.${layer_index}.mamba.norm.weight",
91+
"optional": true
92+
},
93+
{
94+
"name": "model.layers.${layer_index}.mamba.out_proj.weight",
95+
"optional": true
96+
}
97+
]
98+
}
99+
}

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ packages = [
6060
"mergekit.scripts",
6161
"mergekit.evo",
6262
"mergekit.tokenizer",
63+
"mergekit.tokensurgeon",
6364
"mergekit.architecture",
6465
"mergekit._data",
6566
"mergekit._data.architectures",

0 commit comments

Comments
 (0)