fix api_cli & qwen25 parser (#1085)

shihaobai · web-flow · commit e308e329426c · 2025-10-23T15:11:41.000+08:00
diff --git a/lightllm/common/fused_moe/grouped_fused_moe.py b/lightllm/common/fused_moe/grouped_fused_moe.py
@@ -1000,13 +1000,13 @@ def outplace_fused_experts_impl_fake(
     hidden_states: torch.Tensor,
     w1: torch.Tensor,
     w2: torch.Tensor,
-    # optional bias for w1 and w2
-    w1_bias: Optional[torch.Tensor],
-    w2_bias: Optional[torch.Tensor],
     topk_weights: torch.Tensor,
     topk_ids: torch.Tensor,
     use_fp8_w8a8: bool = False,
     use_int8_w8a16: bool = False,
+    # optional bias for w1 and w2
+    w1_bias: Optional[torch.Tensor] = None,
+    w2_bias: Optional[torch.Tensor] = None,
     w1_scale: Optional[torch.Tensor] = None,
     w2_scale: Optional[torch.Tensor] = None,
     a1_scale: Optional[torch.Tensor] = None,
diff --git a/lightllm/server/api_cli.py b/lightllm/server/api_cli.py
@@ -128,7 +128,7 @@ def make_argument_parser() -> argparse.ArgumentParser:
     parser.add_argument(
         "--tool_call_parser",
         type=str,
-        choices=["qwen25", "llama3", "mistral"],
+        choices=["qwen25", "llama3", "mistral", "deepseekv3", "qwen"],
         default=None,
         help="tool call parser type",
     )
diff --git a/lightllm/server/api_models.py b/lightllm/server/api_models.py
@@ -168,7 +168,7 @@ class ChatMessage(BaseModel):
 class ChatCompletionResponseChoice(BaseModel):
     index: int
     message: ChatMessage
-    finish_reason: Optional[Literal["stop", "length", "function_call"]] = None
+    finish_reason: Optional[Literal["stop", "length", "tool_calls"]] = None
 
 
 class ChatCompletionResponse(BaseModel):
diff --git a/lightllm/server/function_call_parser.py b/lightllm/server/function_call_parser.py
@@ -393,8 +393,8 @@ def __init__(self):
         Initializes the detector with necessary state variables.
         """
         super().__init__()
-        self.bot_token = "<tool_call>\n"
-        self.eot_token = "\n</tool_call>"
+        self.bot_token = "<tool_call>"
+        self.eot_token = "</tool_call>"
         self.tool_call_separator = "\n"
         self._normal_text_buffer = ""  # Buffer for handling partial end tokens
 
@@ -440,7 +440,7 @@ def parse_streaming_increment(self, new_text: str, tools: List[Tool]) -> Streami
             self._normal_text_buffer += result.normal_text
 
             # Check if buffer contains complete end token (without leading newline)
-            end_token_without_newline = self.eot_token[1:]  # "</tool_call>"
+            end_token_without_newline = self.eot_token  # "</tool_call>"
             if end_token_without_newline in self._normal_text_buffer:
                 cleaned_text = self._normal_text_buffer.replace(end_token_without_newline, "")
                 self._normal_text_buffer = ""

Original file line number	Diff line number	Diff line change
`@@ -128,7 +128,7 @@ def make_argument_parser() -> argparse.ArgumentParser:`
`128`	`128`	`parser.add_argument(`
`129`	`129`	`"--tool_call_parser",`
`130`	`130`	`type=str,`
`131`		`- choices=["qwen25", "llama3", "mistral"],`
	`131`	`+ choices=["qwen25", "llama3", "mistral", "deepseekv3", "qwen"],`
`132`	`132`	`default=None,`
`133`	`133`	`help="tool call parser type",`
`134`	`134`	`)`