From 87c231e7363b5d04cd7856b6e226fed3905e3694 Mon Sep 17 00:00:00 2001 From: "CJACK." Date: Sun, 22 Mar 2026 22:17:40 +0800 Subject: [PATCH 1/2] Sanitize leaked tool-call wire format in assistant text --- internal/adapter/openai/tool_history_sanitize.go | 6 ++++++ .../adapter/openai/tool_history_sanitize_test.go | 16 ++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/internal/adapter/openai/tool_history_sanitize.go b/internal/adapter/openai/tool_history_sanitize.go index 126414a..18b05e9 100644 --- a/internal/adapter/openai/tool_history_sanitize.go +++ b/internal/adapter/openai/tool_history_sanitize.go @@ -6,6 +6,9 @@ import ( var leakedToolHistoryPattern = regexp.MustCompile(`(?is)\[TOOL_CALL_HISTORY\][\s\S]*?\[/TOOL_CALL_HISTORY\]|\[TOOL_RESULT_HISTORY\][\s\S]*?\[/TOOL_RESULT_HISTORY\]`) var emptyJSONFencePattern = regexp.MustCompile("(?is)```json\\s*```") +var leakedToolCallArrayPattern = regexp.MustCompile(`(?is)\[\{\s*"function"\s*:\s*\{[\s\S]*?\}\s*,\s*"id"\s*:\s*"call[^"]*"\s*,\s*"type"\s*:\s*"function"\s*}\]`) +var leakedToolResultBlobPattern = regexp.MustCompile(`(?is)<\s*\|\s*tool\s*\|\s*>\s*\{[\s\S]*?"tool_call_id"\s*:\s*"call[^"]*"\s*}`) +var leakedMetaMarkerPattern = regexp.MustCompile(`(?is)<\s*\|\s*(?:assistant|tool|end_of_sentence|end_of_thinking)\s*\|\s*>`) func sanitizeLeakedToolHistory(text string) string { if text == "" { @@ -13,5 +16,8 @@ func sanitizeLeakedToolHistory(text string) string { } out := leakedToolHistoryPattern.ReplaceAllString(text, "") out = emptyJSONFencePattern.ReplaceAllString(out, "") + out = leakedToolCallArrayPattern.ReplaceAllString(out, "") + out = leakedToolResultBlobPattern.ReplaceAllString(out, "") + out = leakedMetaMarkerPattern.ReplaceAllString(out, "") return out } diff --git a/internal/adapter/openai/tool_history_sanitize_test.go b/internal/adapter/openai/tool_history_sanitize_test.go index 7c10ad2..3eb434a 100644 --- a/internal/adapter/openai/tool_history_sanitize_test.go +++ b/internal/adapter/openai/tool_history_sanitize_test.go @@ -77,6 +77,22 @@ func TestFlushToolSieveDropsToolResultHistoryLeak(t *testing.T) { } } +func TestSanitizeLeakedToolHistoryRemovesLeakedWireToolCallAndResult(t *testing.T) { + raw := "开始\n[{\"function\":{\"arguments\":\"{\\\"command\\\":\\\"java -version\\\"}\",\"name\":\"exec\"},\"id\":\"callb9a321\",\"type\":\"function\"}]< | Tool | >{\"content\":\"openjdk version 21\",\"tool_call_id\":\"callb9a321\"}\n结束" + got := sanitizeLeakedToolHistory(raw) + if got != "开始\n\n结束" { + t.Fatalf("unexpected sanitize result for leaked wire format: %q", got) + } +} + +func TestSanitizeLeakedToolHistoryRemovesStandaloneMetaMarkers(t *testing.T) { + raw := "A<| end_of_sentence |><| Assistant |>B<| end_of_thinking |>C" + got := sanitizeLeakedToolHistory(raw) + if got != "ABC" { + t.Fatalf("unexpected sanitize result for meta markers: %q", got) + } +} + func TestProcessToolSieveChunkSplitsResultHistoryBoundary(t *testing.T) { var state toolStreamSieveState parts := []string{ From 0925e83b9b5e48a51cd97a3dbc3128261a2ceb13 Mon Sep 17 00:00:00 2001 From: "CJACK." Date: Sun, 22 Mar 2026 22:36:15 +0800 Subject: [PATCH 2/2] Stop embedding tool-call envelopes into prompt content --- internal/adapter/openai/message_normalize.go | 39 ++------------- .../adapter/openai/message_normalize_test.go | 50 +++++++------------ 2 files changed, 20 insertions(+), 69 deletions(-) diff --git a/internal/adapter/openai/message_normalize.go b/internal/adapter/openai/message_normalize.go index 0e844c9..27d1b2b 100644 --- a/internal/adapter/openai/message_normalize.go +++ b/internal/adapter/openai/message_normalize.go @@ -2,7 +2,6 @@ package openai import ( "encoding/json" - "fmt" "strings" "ds2api/internal/prompt" @@ -56,45 +55,13 @@ func normalizeOpenAIMessagesForPrompt(raw []any, traceID string) []map[string]an } func buildAssistantContentForPrompt(msg map[string]any) string { - content := normalizeOpenAIContentForPrompt(msg["content"]) - toolCalls := normalizeAssistantToolCallsForPrompt(msg["tool_calls"]) - if toolCalls == "" { - return strings.TrimSpace(content) - } - if strings.TrimSpace(content) == "" { - return toolCalls - } - return strings.TrimSpace(content + "\n" + toolCalls) -} - -func normalizeAssistantToolCallsForPrompt(v any) string { - calls, ok := v.([]any) - if !ok || len(calls) == 0 { - return "" - } - b, err := json.Marshal(calls) - if err != nil { - return strings.TrimSpace(fmt.Sprintf("%v", calls)) - } - return strings.TrimSpace(string(b)) + return strings.TrimSpace(normalizeOpenAIContentForPrompt(msg["content"])) } func buildToolContentForPrompt(msg map[string]any) string { - payload := map[string]any{ - "content": msg["content"], - } - if id := strings.TrimSpace(asString(msg["tool_call_id"])); id != "" { - payload["tool_call_id"] = id - } - if id := strings.TrimSpace(asString(msg["id"])); id != "" { - payload["id"] = id - } - if name := strings.TrimSpace(asString(msg["name"])); name != "" { - payload["name"] = name - } - content := normalizeOpenAIContentForPrompt(payload) + content := normalizeOpenAIContentForPrompt(msg["content"]) if strings.TrimSpace(content) == "" { - return `{"content":"null"}` + return "null" } return content } diff --git a/internal/adapter/openai/message_normalize_test.go b/internal/adapter/openai/message_normalize_test.go index 857e75c..31c37d4 100644 --- a/internal/adapter/openai/message_normalize_test.go +++ b/internal/adapter/openai/message_normalize_test.go @@ -34,11 +34,11 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantToolCallsAndToolResult(t *tes } normalized := normalizeOpenAIMessagesForPrompt(raw, "") - if len(normalized) != 4 { - t.Fatalf("expected 4 normalized messages with assistant tool_call history preserved, got %d", len(normalized)) + if len(normalized) != 3 { + t.Fatalf("expected 3 normalized messages with tool-call-only assistant turn omitted, got %d", len(normalized)) } - toolContent, _ := normalized[3]["content"].(string) - if !strings.Contains(toolContent, `\"temp\":18`) { + toolContent, _ := normalized[2]["content"].(string) + if !strings.Contains(toolContent, `"temp":18`) { t.Fatalf("tool result should be transparently forwarded, got %q", toolContent) } if strings.Contains(toolContent, "[TOOL_RESULT_HISTORY]") { @@ -87,8 +87,8 @@ func TestNormalizeOpenAIMessagesForPrompt_ToolArrayBlocksJoined(t *testing.T) { normalized := normalizeOpenAIMessagesForPrompt(raw, "") got, _ := normalized[0]["content"].(string) - if !strings.Contains(got, `"line-1"`) || !strings.Contains(got, `"line-2"`) || !strings.Contains(got, `"name":"read_file"`) { - t.Fatalf("expected tool envelope to preserve content blocks and metadata, got %q", got) + if !strings.Contains(got, `line-1`) || !strings.Contains(got, `line-2`) { + t.Fatalf("expected tool content blocks preserved, got %q", got) } } @@ -112,7 +112,7 @@ func TestNormalizeOpenAIMessagesForPrompt_FunctionRoleCompatible(t *testing.T) { t.Fatalf("expected function role normalized as tool, got %#v", normalized[0]["role"]) } got, _ := normalized[0]["content"].(string) - if !strings.Contains(got, `"name":"legacy_tool"`) || !strings.Contains(got, `"ok":true`) { + if !strings.Contains(got, `"ok":true`) || strings.Contains(got, `"name":"legacy_tool"`) { t.Fatalf("unexpected normalized function-role content: %q", got) } } @@ -139,8 +139,8 @@ func TestNormalizeOpenAIMessagesForPrompt_EmptyToolContentPreservedAsNull(t *tes t.Fatalf("expected tool role preserved, got %#v", normalized[0]["role"]) } got, _ := normalized[0]["content"].(string) - if !strings.Contains(got, `"content":""`) || !strings.Contains(got, `"name":"noop_tool"`) || !strings.Contains(got, `"tool_call_id":"call_5"`) { - t.Fatalf("expected tool metadata preserved in content envelope, got %q", got) + if got != "null" { + t.Fatalf("expected empty tool content normalized as null string, got %q", got) } } @@ -170,12 +170,8 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantMultipleToolCallsRemainSepara } normalized := normalizeOpenAIMessagesForPrompt(raw, "") - if len(normalized) != 1 { - t.Fatalf("expected assistant tool_call-only message to be preserved, got %#v", normalized) - } - got, _ := normalized[0]["content"].(string) - if !strings.Contains(got, `"name":"search_web"`) || !strings.Contains(got, `"name":"eval_javascript"`) { - t.Fatalf("expected tool_calls payload preserved in assistant content, got %q", got) + if len(normalized) != 0 { + t.Fatalf("expected assistant tool_call-only message omitted, got %#v", normalized) } } @@ -196,12 +192,8 @@ func TestNormalizeOpenAIMessagesForPrompt_PreservesConcatenatedToolArguments(t * } normalized := normalizeOpenAIMessagesForPrompt(raw, "") - if len(normalized) != 1 { - t.Fatalf("expected assistant tool_call-only content to be preserved, got %#v", normalized) - } - got, _ := normalized[0]["content"].(string) - if !strings.Contains(got, `{}{\"query\":\"测试工具调用\"}`) { - t.Fatalf("expected concatenated arguments preserved verbatim, got %q", got) + if len(normalized) != 0 { + t.Fatalf("expected assistant tool_call-only content omitted, got %#v", normalized) } } @@ -222,12 +214,8 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantToolCallsMissingNameAreDroppe } normalized := normalizeOpenAIMessagesForPrompt(raw, "") - if len(normalized) != 1 { - t.Fatalf("expected assistant tool_calls history to be preserved even when name missing, got %#v", normalized) - } - got, _ := normalized[0]["content"].(string) - if !strings.Contains(got, "call_missing_name") { - t.Fatalf("expected raw tool_call payload preserved, got %q", got) + if len(normalized) != 0 { + t.Fatalf("expected assistant tool_calls without text omitted, got %#v", normalized) } } @@ -249,12 +237,8 @@ func TestNormalizeOpenAIMessagesForPrompt_AssistantNilContentDoesNotInjectNullLi } normalized := normalizeOpenAIMessagesForPrompt(raw, "") - if len(normalized) != 1 { - t.Fatalf("expected nil-content assistant tool_call-only message to be preserved, got %#v", normalized) - } - got, _ := normalized[0]["content"].(string) - if !strings.Contains(got, "send_file_to_user") { - t.Fatalf("expected tool call payload preserved, got %q", got) + if len(normalized) != 0 { + t.Fatalf("expected nil-content assistant tool_call-only message omitted, got %#v", normalized) } }