Skip to content

Commit 5b67d01

Browse files
authored
Better handle tied weights when MoE-merging (#617)
Should resolve #615.
1 parent bfc409a commit 5b67d01

File tree

1 file changed

+10
-2
lines changed

1 file changed

+10
-2
lines changed

mergekit/moe/common.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,11 +76,19 @@ def copy_tensor_out(
7676
clone: bool = False,
7777
):
7878
out_tensor_name = output_name or weight_info.name
79+
aliases = weight_info.aliases or []
80+
if not weight_info.optional:
81+
aliases += weight_info.tied_names or []
7982
try:
80-
tensor = loader.get_tensor(weight_info.name, aliases=weight_info.aliases)
83+
tensor = loader.get_tensor(
84+
weight_info.name,
85+
aliases=aliases,
86+
)
8187
except KeyError:
8288
tensor = None
83-
if tensor is None and not weight_info.optional:
89+
if tensor is None:
90+
if weight_info.optional:
91+
return
8492
logging.error(f"Missing weight: {weight_info.name} / {out_tensor_name}")
8593
raise KeyError(out_tensor_name)
8694

0 commit comments

Comments
 (0)