diff --git a/toolkits/model_checkpoints_convertor/deepseek/hf2mcore_deepseek_v3_moe.py b/toolkits/model_checkpoints_convertor/deepseek/hf2mcore_deepseek_v3_moe.py index e47963df..19b7569e 100644 --- a/toolkits/model_checkpoints_convertor/deepseek/hf2mcore_deepseek_v3_moe.py +++ b/toolkits/model_checkpoints_convertor/deepseek/hf2mcore_deepseek_v3_moe.py @@ -501,7 +501,10 @@ def save_mgmodel(mgmodel, args): else: target_v = v - if "embedding.word_embeddings" in k and "mtp_embedding" not in k: + if 'experts' in k and 'shared_experts' not in k: + if tp_rank == 0: + model_split[k] = target_v + elif "embedding.word_embeddings" in k and "mtp_embedding" not in k: if pp_rank == 0: model_split[k] = target_v elif "mtp_embedding.word_embeddings" in k: @@ -544,4 +547,4 @@ def main(): save_mgmodel(mg_model, args) if __name__ == "__main__": - main() \ No newline at end of file + main()