From 2bab90ee45c193a6179d7108ceeab4952b40b3c7 Mon Sep 17 00:00:00 2001 From: ShareLer <48175490+ShareLer@users.noreply.github.com> Date: Wed, 30 Apr 2025 19:19:11 +0800 Subject: [PATCH] fix typo for hidden_size --- .../model_checkpoints_convertor/qwen/hf2mcore_qwen2_moe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen2_moe.py b/toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen2_moe.py index 80d8ecd7..eedd0d91 100644 --- a/toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen2_moe.py +++ b/toolkits/model_checkpoints_convertor/qwen/hf2mcore_qwen2_moe.py @@ -460,7 +460,7 @@ def convert_checkpoint_from_transformers_to_megatron(hfmodel: Qwen2MoeForCausalL elif args.bf16: mgmodel = mgmodel.bfloat16() - head_dim = hidden_size // args.num_attention_heads if args.kv_channels is None else args.kv_channels + head_dim = args.hidden_size // args.num_attention_heads if args.kv_channels is None else args.kv_channels group_per_split = args.num_query_groups // args.target_tensor_model_parallel_size with torch.no_grad(): @@ -690,4 +690,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main()