Skip to content

Commit dd82588

Browse files
committed
fix : 修复图片上传与修复工具异常问题
1 parent 5510125 commit dd82588

4 files changed

Lines changed: 341 additions & 128 deletions

File tree

internal/chat.go

Lines changed: 18 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,18 @@ func makeUpstreamRequest(token string, messages []Message, model string, imageUR
156156
autoWebSearch = false
157157
}
158158

159+
// 所有请求添加图片处理MCP服务器
160+
vlmServers := []string{"vlm-image-search", "vlm-image-recognition", "vlm-image-processing"}
161+
existingSet := make(map[string]bool)
162+
for _, s := range mcpServers {
163+
existingSet[s] = true
164+
}
165+
for _, s := range vlmServers {
166+
if !existingSet[s] {
167+
mcpServers = append(mcpServers, s)
168+
}
169+
}
170+
159171
latestUserContent := extractLatestUserContent(messages)
160172

161173
signature := GenerateSignature(userID, requestID, latestUserContent, timestamp)
@@ -988,16 +1000,18 @@ func handleStreamResponse(w http.ResponseWriter, body io.ReadCloser, completionI
9881000
if !hasContent {
9891001
LogError("Stream response 200 but no content received")
9901002
}
991-
992-
// 检测工具调用
9931003
stopReason := "stop"
9941004
var toolCalls []ToolCall
9951005
if len(tools) > 0 {
996-
toolCalls = ExtractToolInvocations(fullContent.String())
1006+
rawContent := fullContent.String()
1007+
toolCalls = ExtractToolInvocations(rawContent)
9971008
if len(toolCalls) > 0 {
9981009
stopReason = "tool_calls"
999-
// 发送工具调用 chunk
1010+
LogDebug("[Stream] Detected %d tool calls, sending tool_calls chunks", len(toolCalls))
10001011
for i, tc := range toolCalls {
1012+
if tc.ID == "" {
1013+
tc.ID = generateCallID()
1014+
}
10011015
toolChunk := ChatCompletionChunk{
10021016
ID: completionID,
10031017
Object: "chat.completion.chunk",
@@ -1037,8 +1051,6 @@ func handleStreamResponse(w http.ResponseWriter, body io.ReadCloser, completionI
10371051

10381052
finalData, _ := json.Marshal(finalChunk)
10391053
fmt.Fprintf(w, "data: %s\n\n", finalData)
1040-
1041-
// 发送 usage chunk(如果请求了)
10421054
if includeUsage {
10431055
usageChunk := ChatCompletionChunkResponse{
10441056
ID: completionID,
@@ -1062,7 +1074,6 @@ func handleStreamResponse(w http.ResponseWriter, body io.ReadCloser, completionI
10621074
}
10631075

10641076
func handleNonStreamResponse(w http.ResponseWriter, body io.ReadCloser, completionID, modelName string, inputTokens int64, tools []Tool) int64 {
1065-
// 先设置响应头并 flush,让客户端知道请求已被接受
10661077
w.Header().Set("Content-Type", "application/json")
10671078
w.Header().Set("X-Request-Id", completionID)
10681079
if flusher, ok := w.(http.Flusher); ok {
@@ -1095,8 +1106,6 @@ func handleNonStreamResponse(w http.ResponseWriter, body io.ReadCloser, completi
10951106
if err := json.Unmarshal([]byte(payload), &upstream); err != nil {
10961107
continue
10971108
}
1098-
1099-
// 检测上游错误
11001109
if upstream.HasError() {
11011110
LogError("Upstream error: %s", upstream.GetErrorMessage())
11021111
chunks = append(chunks, fmt.Sprintf("[上游服务错误: %s]", upstream.GetErrorMessage()))
@@ -1143,7 +1152,6 @@ func handleNonStreamResponse(w http.ResponseWriter, body io.ReadCloser, completi
11431152
if textBeforeBlock != "" {
11441153
chunks = append(chunks, textBeforeBlock)
11451154
}
1146-
// 解析图片搜索结果
11471155
if results := ParseImageSearchResults(editContent); len(results) > 0 {
11481156
pendingImageSearchMarkdown = FormatImageSearchResults(results)
11491157
}
@@ -1209,20 +1217,15 @@ func handleNonStreamResponse(w http.ResponseWriter, body io.ReadCloser, completi
12091217
if fullContent == "" && fullReasoning == "" {
12101218
LogError("Non-stream response 200 but no content received")
12111219
}
1212-
1213-
// 检测工具调用
12141220
stopReason := "stop"
12151221
var toolCalls []ToolCall
12161222
if len(tools) > 0 {
12171223
toolCalls = ExtractToolInvocations(fullContent)
12181224
if len(toolCalls) > 0 {
12191225
stopReason = "tool_calls"
1220-
// 移除工具调用 JSON
12211226
fullContent = RemoveToolJSONContent(fullContent)
12221227
}
12231228
}
1224-
1225-
// 计算输出 token
12261229
outputTokens = CountTokens(fullContent) + CountTokens(fullReasoning)
12271230

12281231
response := ChatCompletionResponse{
@@ -1250,15 +1253,11 @@ func handleNonStreamResponse(w http.ResponseWriter, body io.ReadCloser, completi
12501253
json.NewEncoder(w).Encode(response)
12511254
return outputTokens
12521255
}
1253-
1254-
// handleStreamResponseWithRetry 流式响应处理(带重试支持)
12551256
func handleStreamResponseWithRetry(w http.ResponseWriter, body io.ReadCloser, completionID, modelName string, inputTokens int64, includeUsage bool, tools []Tool, isFirstAttempt bool) UpstreamResult {
12561257
result := UpstreamResult{Success: true, HasContent: false}
12571258
var outputTokens int64
12581259
var fullContent strings.Builder
12591260
var upstreamError string
1260-
1261-
// 只在第一次尝试时设置响应头
12621261
if isFirstAttempt {
12631262
w.Header().Set("Content-Type", "text/event-stream")
12641263
w.Header().Set("Cache-Control", "no-cache")
@@ -1272,8 +1271,6 @@ func handleStreamResponseWithRetry(w http.ResponseWriter, body io.ReadCloser, co
12721271
result.ErrorMessage = "streaming not supported"
12731272
return result
12741273
}
1275-
1276-
// 发送第一个 chunk 带 role
12771274
firstChunk := ChatCompletionChunk{
12781275
ID: completionID,
12791276
Object: "chat.completion.chunk",
@@ -1315,8 +1312,6 @@ func handleStreamResponseWithRetry(w http.ResponseWriter, body io.ReadCloser, co
13151312
if err := json.Unmarshal([]byte(payload), &upstream); err != nil {
13161313
continue
13171314
}
1318-
1319-
// 检测上游错误
13201315
if upstream.HasError() {
13211316
upstreamError = upstream.GetErrorMessage()
13221317
LogError("Upstream error: %s", upstreamError)

internal/models.go

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -64,43 +64,31 @@ func GetTargetModel(model string) string {
6464
return model
6565
}
6666

67-
// IsValidModel 验证模型是否有效
6867
func IsValidModel(model string) bool {
6968
baseModel, _, _ := ParseModelName(model)
70-
// 检查基础模型映射
7169
if _, ok := BaseModelMapping[baseModel]; ok {
7270
return true
7371
}
74-
// 检查动态模型配置
7572
if GetUpstreamConfig(model) != nil {
7673
return true
7774
}
7875
return false
7976
}
80-
81-
// OpenAI 格式的消息内容项
8277
type ContentPart struct {
8378
Type string `json:"type"`
8479
Text string `json:"text,omitempty"`
8580
ImageURL *MediaURL `json:"image_url,omitempty"`
8681
VideoURL *MediaURL `json:"video_url,omitempty"`
8782
}
88-
89-
// MediaURL 媒体 URL(图片或视频)
9083
type MediaURL struct {
9184
URL string `json:"url"`
9285
}
9386

94-
// ImageURL 兼容旧版本
9587
type ImageURL = MediaURL
96-
97-
// Message 支持纯文本和多模态内容
9888
type Message struct {
9989
Role string `json:"role"`
10090
Content interface{} `json:"content"` // string 或 []ContentPart
10191
}
102-
103-
// ParseContent 解析消息内容,返回文本、图片URL列表和视频URL列表
10492
func (m *Message) ParseContent() (text string, imageURLs []string) {
10593
_, imageURLs, _ = m.ParseContentFull()
10694
text, _, _ = m.ParseContentFull()
@@ -147,13 +135,8 @@ func (m *Message) ToUpstreamMessage(urlToFileID map[string]string) map[string]in
147135
"content": text,
148136
}
149137
}
138+
150139
var content []interface{}
151-
if text != "" {
152-
content = append(content, map[string]interface{}{
153-
"type": "text",
154-
"text": text,
155-
})
156-
}
157140
for _, imgURL := range imageURLs {
158141
urlPreview := imgURL
159142
if len(urlPreview) > 60 {
@@ -171,18 +154,35 @@ func (m *Message) ToUpstreamMessage(urlToFileID map[string]string) map[string]in
171154
LogDebug("[ToUpstreamMessage] Image NOT matched: %s", urlPreview)
172155
}
173156
}
174-
// 添加视频
175157
for _, vidURL := range videoURLs {
158+
urlPreview := vidURL
159+
if len(urlPreview) > 60 {
160+
urlPreview = urlPreview[:60] + "..."
161+
}
176162
if fileID, ok := urlToFileID[vidURL]; ok {
163+
LogDebug("[ToUpstreamMessage] Video MATCHED: %s -> %s", urlPreview, fileID)
177164
content = append(content, map[string]interface{}{
178165
"type": "video_url",
179166
"video_url": map[string]interface{}{
180167
"url": fileID,
181168
},
182169
})
170+
} else {
171+
LogDebug("[ToUpstreamMessage] Video NOT matched: %s", urlPreview)
172+
}
173+
}
174+
if text != "" {
175+
content = append(content, map[string]interface{}{
176+
"type": "text",
177+
"text": text,
178+
})
179+
}
180+
if len(content) == 0 || (len(content) == 1 && text != "") {
181+
return map[string]interface{}{
182+
"role": m.Role,
183+
"content": text,
183184
}
184185
}
185-
186186
return map[string]interface{}{
187187
"role": m.Role,
188188
"content": content,

0 commit comments

Comments
 (0)