Skip to content

Commit e308e32

Browse files
authored
fix api_cli & qwen25 parser (#1085)
1 parent 230d9d8 commit e308e32

File tree

4 files changed

+8
-8
lines changed

4 files changed

+8
-8
lines changed

lightllm/common/fused_moe/grouped_fused_moe.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1000,13 +1000,13 @@ def outplace_fused_experts_impl_fake(
10001000
hidden_states: torch.Tensor,
10011001
w1: torch.Tensor,
10021002
w2: torch.Tensor,
1003-
# optional bias for w1 and w2
1004-
w1_bias: Optional[torch.Tensor],
1005-
w2_bias: Optional[torch.Tensor],
10061003
topk_weights: torch.Tensor,
10071004
topk_ids: torch.Tensor,
10081005
use_fp8_w8a8: bool = False,
10091006
use_int8_w8a16: bool = False,
1007+
# optional bias for w1 and w2
1008+
w1_bias: Optional[torch.Tensor] = None,
1009+
w2_bias: Optional[torch.Tensor] = None,
10101010
w1_scale: Optional[torch.Tensor] = None,
10111011
w2_scale: Optional[torch.Tensor] = None,
10121012
a1_scale: Optional[torch.Tensor] = None,

lightllm/server/api_cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ def make_argument_parser() -> argparse.ArgumentParser:
128128
parser.add_argument(
129129
"--tool_call_parser",
130130
type=str,
131-
choices=["qwen25", "llama3", "mistral"],
131+
choices=["qwen25", "llama3", "mistral", "deepseekv3", "qwen"],
132132
default=None,
133133
help="tool call parser type",
134134
)

lightllm/server/api_models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ class ChatMessage(BaseModel):
168168
class ChatCompletionResponseChoice(BaseModel):
169169
index: int
170170
message: ChatMessage
171-
finish_reason: Optional[Literal["stop", "length", "function_call"]] = None
171+
finish_reason: Optional[Literal["stop", "length", "tool_calls"]] = None
172172

173173

174174
class ChatCompletionResponse(BaseModel):

lightllm/server/function_call_parser.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -393,8 +393,8 @@ def __init__(self):
393393
Initializes the detector with necessary state variables.
394394
"""
395395
super().__init__()
396-
self.bot_token = "<tool_call>\n"
397-
self.eot_token = "\n</tool_call>"
396+
self.bot_token = "<tool_call>"
397+
self.eot_token = "</tool_call>"
398398
self.tool_call_separator = "\n"
399399
self._normal_text_buffer = "" # Buffer for handling partial end tokens
400400

@@ -440,7 +440,7 @@ def parse_streaming_increment(self, new_text: str, tools: List[Tool]) -> Streami
440440
self._normal_text_buffer += result.normal_text
441441

442442
# Check if buffer contains complete end token (without leading newline)
443-
end_token_without_newline = self.eot_token[1:] # "</tool_call>"
443+
end_token_without_newline = self.eot_token # "</tool_call>"
444444
if end_token_without_newline in self._normal_text_buffer:
445445
cleaned_text = self._normal_text_buffer.replace(end_token_without_newline, "")
446446
self._normal_text_buffer = ""

0 commit comments

Comments
 (0)