reformat

wcrzlh · wcrzlh · commit 6e30e1e108f6 · 2025-11-04T20:38:55.000+08:00
diff --git a/examples/transformers/qwen3_omni_moe/README.md b/examples/transformers/qwen3_omni_moe/README.md
@@ -16,8 +16,7 @@ The abstract from the technical report is the following:
 
 ### Installation:
 ```
-# TODO modify path before mergeing
-git clone https://github.com/wcrzlh/mindone.git -b vllm_patch
+git clone https://github.com/mindspore-lab/mindone.git
 cd mindone
 pip install -e .
 
@@ -122,7 +121,7 @@ text_ids, audio = model.generate(
     thinker_return_dict_in_generate=True,
     use_audio_in_video=USE_AUDIO_IN_VIDEO,
     return_audio=False,
-    talker_do_sampe=False,
+    talker_do_sample=False,
 )
 
 text = processor.batch_decode(
@@ -148,5 +147,5 @@ If `return_audio=True` is set, besides that above text generation results, a pie
 ## Inference Speed
 |          model name	           | mindspore version | precision* | cards | Model part | attention type | 	tokens/s	 |
 |:------------------------------:|:-----------------:|:----------:|:-----:|:----------:|:--------------:|:----------:|
-|   Qwen3-Omni-30B-A3B-Instruct   |       2.7.0       |    bf16     |   2   |  Thinker   |   flash_attn   |    0.36    |
+|   Qwen3-Omni-30B-A3B-Instruct   |       2.7.0       |    bf16     |   2   |  Thinker   |   flash_attn   |    0.73     |
 |   Qwen3-Omni-30B-A3B-Instruct   |       2.7.0       |    bf16     |   2   |   Talker   |   flash_attn   |    0.88    |
diff --git a/mindone/transformers/modeling_utils.py b/mindone/transformers/modeling_utils.py
@@ -170,23 +170,23 @@ def _convert_state_dict(m, state_dict_pt, prefix=""):
     state_dict_ms = {}
     while state_dict_pt:
         name_pt, data_pt = state_dict_pt.popitem()
-        # TODO For models contains a lot of paramters, going through state_dict and model at the same time
-        # would cause performance decrease significantly. This part for aligning prefix would need to be optimized.
-        # for name, param in m.parameters_and_names():
-        #     name_ms = param.name
-        #     length = len(prefix) + 1
-        #     if name_pt.startswith(prefix):
-        #         # When name_ms and name_pt match and name_pt has prefix, name_pt would be sliced
-        #         if name_ms.rsplit(".", 1)[0] == name_pt.rsplit(".", 1)[0][length:] or name_ms == name_pt[length:]:
-        #             name_pt = name_pt[length:]
-        #     elif not name_pt.startswith(prefix):
-        #         # When name_ms and name_pt match and name_ms has prefix, prefix would be added to name_pt
-        #         if name_pt.rsplit(".", 1)[0] == name_ms.rsplit(".", 1)[0][length:] or name_pt == name_ms[length:]:
-        #             name_pt = ".".join([prefix, name_pt])
         name_ms, data_mapping = pt2ms_mappings.get(name_pt, (name_pt, lambda x: x))
         data_ms = data_mapping(data_pt)
         if name_ms is not None:
             state_dict_ms[name_ms] = data_ms
+
+    length = len(prefix) + 1
+    model_ckpt_key = m.state_dict().keys()
+    for key in state_dict_ms.keys():
+        # When model name and state dict name match and state dict name has prefix, state dict name would be sliced
+        if key[length:] in model_ckpt_key:
+            data_ms = state_dict_ms.pop(key)
+            state_dict_ms[key[length:]] = data_ms
+        # When model name and state dict name match and model name has prefix, prefix would be added to state dict name
+        elif ".".join([prefix, key]) in model_ckpt_key:
+            data_ms = state_dict_ms.pop(key)
+            state_dict_ms[".".join([prefix, key])] = data_ms
+
     return state_dict_ms
 
 
diff --git a/tests/transformers_tests/models/qwen3_omni_moe/test_modeling_qwen3_omni_moe.py b/tests/transformers_tests/models/qwen3_omni_moe/test_modeling_qwen3_omni_moe.py
@@ -34,7 +34,7 @@
 MODES = [1]
 
 if transformers.__version__ >= "4.57.0":
-    from transformers.models.qwen3_omni_moe import Qwen3OmniMoeTalkerConfig, Qwen3OmniMoeThinkerConfig
+    from transformers.models.qwen3_omni_moe import Qwen3OmniMoeThinkerConfig
 
     class Qwen3OmniModelTester:
         def __init__(