Skip to content
15 changes: 10 additions & 5 deletions internal/adapter/claude/handler_stream_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ func TestHandleClaudeStreamRealtimeToolSafetyAcrossStructuredFormats(t *testing.
}
}

func TestHandleClaudeStreamRealtimePromotesUnclosedFencedToolExample(t *testing.T) {
func TestHandleClaudeStreamRealtimeIgnoresUnclosedFencedToolExample(t *testing.T) {
h := &Handler{}
resp := makeClaudeSSEHTTPResponse(
"data: {\"p\":\"response/content\",\"v\":\"Here is an example:\\n```json\\n{\\\"tool_calls\\\":[{\\\"name\\\":\\\"Bash\\\",\\\"input\\\":{\\\"command\\\":\\\"pwd\\\"}}]}\"}",
Expand All @@ -379,8 +379,8 @@ func TestHandleClaudeStreamRealtimePromotesUnclosedFencedToolExample(t *testing.
break
}
}
if !foundToolUse {
t.Fatalf("expected tool_use for fenced example, body=%s", rec.Body.String())
if foundToolUse {
t.Fatalf("expected no tool_use for fenced example, body=%s", rec.Body.String())
}

foundToolStop := false
Expand All @@ -391,7 +391,12 @@ func TestHandleClaudeStreamRealtimePromotesUnclosedFencedToolExample(t *testing.
break
}
}
if !foundToolStop {
t.Fatalf("expected stop_reason=tool_use, body=%s", rec.Body.String())
if foundToolStop {
t.Fatalf("expected stop_reason to remain content-only, body=%s", rec.Body.String())
}
}

// Backward-compatible alias for historical test name used in CI logs.
func TestHandleClaudeStreamRealtimePromotesUnclosedFencedToolExample(t *testing.T) {
TestHandleClaudeStreamRealtimeIgnoresUnclosedFencedToolExample(t)
}
19 changes: 12 additions & 7 deletions internal/adapter/openai/handler_toolcall_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ func TestHandleNonStreamEmbeddedToolCallExamplePromotesToolCall(t *testing.T) {
}
}

func TestHandleNonStreamFencedToolCallExamplePromotesToolCall(t *testing.T) {
func TestHandleNonStreamFencedToolCallExampleDoesNotPromoteToolCall(t *testing.T) {
h := &Handler{}
resp := makeSSEHTTPResponse(
"data: {\"p\":\"response/content\",\"v\":\"```json\\n{\\\"tool_calls\\\":[{\\\"name\\\":\\\"search\\\",\\\"input\\\":{\\\"q\\\":\\\"go\\\"}}]}\\n```\"}",
Expand All @@ -259,20 +259,25 @@ func TestHandleNonStreamFencedToolCallExamplePromotesToolCall(t *testing.T) {
out := decodeJSONBody(t, rec.Body.String())
choices, _ := out["choices"].([]any)
choice, _ := choices[0].(map[string]any)
if choice["finish_reason"] != "tool_calls" {
t.Fatalf("expected finish_reason=tool_calls, got %#v", choice["finish_reason"])
if choice["finish_reason"] == "tool_calls" {
t.Fatalf("expected fenced example to remain content-only, got finish_reason=%#v", choice["finish_reason"])
}
msg, _ := choice["message"].(map[string]any)
toolCalls, _ := msg["tool_calls"].([]any)
if len(toolCalls) != 1 {
t.Fatalf("expected one tool_call field for fenced example: %#v", msg["tool_calls"])
if len(toolCalls) != 0 {
t.Fatalf("expected no tool_call field for fenced example: %#v", msg["tool_calls"])
}
content, _ := msg["content"].(string)
if strings.Contains(content, `"tool_calls"`) {
t.Fatalf("expected raw tool_calls json stripped from content, got %q", content)
if !strings.Contains(content, `"tool_calls"`) {
t.Fatalf("expected fenced example content preserved, got %q", content)
}
}

// Backward-compatible alias for historical test name used in CI logs.
func TestHandleNonStreamFencedToolCallExamplePromotesToolCall(t *testing.T) {
TestHandleNonStreamFencedToolCallExampleDoesNotPromoteToolCall(t)
}

func TestHandleStreamToolCallInterceptsWithoutRawContentLeak(t *testing.T) {
h := &Handler{}
resp := makeSSEHTTPResponse(
Expand Down
18 changes: 12 additions & 6 deletions internal/format/openai/render_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package openai

import (
"encoding/json"
"strings"
"testing"
)

Expand Down Expand Up @@ -69,7 +70,7 @@ func TestBuildResponseObjectPromotesMixedProseToolPayloadToFunctionCall(t *testi
}
}

func TestBuildResponseObjectPromotesFencedToolPayloadToFunctionCall(t *testing.T) {
func TestBuildResponseObjectKeepsFencedToolPayloadAsText(t *testing.T) {
obj := BuildResponseObject(
"resp_test",
"gpt-4o",
Expand All @@ -80,19 +81,24 @@ func TestBuildResponseObjectPromotesFencedToolPayloadToFunctionCall(t *testing.T
)

outputText, _ := obj["output_text"].(string)
if outputText != "" {
t.Fatalf("expected output_text hidden for fenced tool payload, got %q", outputText)
if !strings.Contains(outputText, "\"tool_calls\"") {
t.Fatalf("expected output_text to preserve fenced tool payload, got %q", outputText)
}
output, _ := obj["output"].([]any)
if len(output) != 1 {
t.Fatalf("expected one function_call output item, got %#v", obj["output"])
t.Fatalf("expected one message output item, got %#v", obj["output"])
}
first, _ := output[0].(map[string]any)
if first["type"] != "function_call" {
t.Fatalf("expected function_call output type, got %#v", first["type"])
if first["type"] != "message" {
t.Fatalf("expected message output type, got %#v", first["type"])
}
}

// Backward-compatible alias for historical test name used in CI logs.
func TestBuildResponseObjectPromotesFencedToolPayloadToFunctionCall(t *testing.T) {
TestBuildResponseObjectKeepsFencedToolPayloadAsText(t)
}

func TestBuildResponseObjectReasoningOnlyFallsBackToOutputText(t *testing.T) {
obj := BuildResponseObject(
"resp_test",
Expand Down
24 changes: 19 additions & 5 deletions internal/js/helpers/stream-tool-sieve/parse.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,12 @@ const {
parseToolCallsPayload,
parseMarkupToolCalls,
parseTextKVToolCalls,
stripFencedCodeBlocks,
} = require('./parse_payload');
const { TOOL_SEGMENT_KEYWORDS } = require('./tool-keywords');

const TOOL_NAME_LOOSE_PATTERN = /[^a-z0-9]+/g;
const TOOL_MARKUP_PREFIXES = ['<tool_call', '<function_call', '<invoke'];

function extractToolNames(tools) {
if (!Array.isArray(tools) || tools.length === 0) {
Expand Down Expand Up @@ -44,6 +47,9 @@ function parseToolCallsDetailed(text, toolNames) {
return result;
}
result.sawToolCallSyntax = looksLikeToolCallSyntax(normalized);
if (shouldSkipToolCallParsingForCodeFenceExample(normalized)) {
return result;
}

const candidates = buildToolCallCandidates(normalized);
let parsed = [];
Expand Down Expand Up @@ -89,6 +95,9 @@ function parseStandaloneToolCallsDetailed(text, toolNames) {
return result;
}
result.sawToolCallSyntax = looksLikeToolCallSyntax(trimmed);
if (shouldSkipToolCallParsingForCodeFenceExample(trimmed)) {
return result;
}
const candidates = buildToolCallCandidates(trimmed);
let parsed = [];
for (const c of candidates) {
Expand Down Expand Up @@ -223,11 +232,16 @@ function resolveAllowedToolName(name, allowed, allowedCanonical) {

function looksLikeToolCallSyntax(text) {
const lower = toStringSafe(text).toLowerCase();
return lower.includes('tool_calls')
|| lower.includes('<tool_call')
|| lower.includes('<function_call')
|| lower.includes('<invoke')
|| lower.includes('function.name:');
return TOOL_SEGMENT_KEYWORDS.some((kw) => lower.includes(kw))
|| TOOL_MARKUP_PREFIXES.some((prefix) => lower.includes(prefix));
}

function shouldSkipToolCallParsingForCodeFenceExample(text) {
if (!looksLikeToolCallSyntax(text)) {
return false;
}
const stripped = stripFencedCodeBlocks(text);
return !looksLikeToolCallSyntax(stripped);
}

module.exports = {
Expand Down
18 changes: 18 additions & 0 deletions internal/js/helpers/stream-tool-sieve/parse_payload.js
Original file line number Diff line number Diff line change
Expand Up @@ -114,13 +114,31 @@ function parseToolCallsPayload(payload) {
return [];
}
if (decoded.tool_calls) {
if (isLikelyChatMessageEnvelope(decoded)) {
return [];
}
return parseToolCallList(decoded.tool_calls);
}

const one = parseToolCallItem(decoded);
return one ? [one] : [];
}

function isLikelyChatMessageEnvelope(value) {
if (!value || typeof value !== 'object' || Array.isArray(value)) {
return false;
}
if (!Object.prototype.hasOwnProperty.call(value, 'tool_calls')) {
return false;
}
const role = toStringSafe(value.role).trim().toLowerCase();
if (role === 'assistant' || role === 'tool' || role === 'user' || role === 'system') {
return true;
}
return Object.prototype.hasOwnProperty.call(value, 'tool_call_id')
|| Object.prototype.hasOwnProperty.call(value, 'content');
}

function parseMarkupToolCalls(text) {
const raw = toStringSafe(text).trim();
if (!raw) {
Expand Down
40 changes: 12 additions & 28 deletions internal/js/helpers/stream-tool-sieve/sieve.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
'use strict';
const { resetIncrementalToolState, noteText, insideCodeFence } = require('./state');
const {
resetIncrementalToolState,
noteText,
insideCodeFenceWithState,
} = require('./state');
const { parseStandaloneToolCallsDetailed } = require('./parse');
const { extractJSONObjectFrom } = require('./jsonscan');

const { TOOL_SEGMENT_KEYWORDS, earliestKeywordIndex } = require('./tool-keywords');
function processToolSieveChunk(state, chunk, toolNames) {
if (!state) {
return [];
Expand Down Expand Up @@ -53,7 +57,7 @@ function processToolSieveChunk(state, chunk, toolNames) {
if (!pending) {
break;
}
const start = findToolSegmentStart(pending);
const start = findToolSegmentStart(state, pending);
if (start >= 0) {
const prefix = pending.slice(0, start);
if (prefix) {
Expand Down Expand Up @@ -143,32 +147,21 @@ function findSuspiciousPrefixStart(s) {
return start;
}

function findToolSegmentStart(s) {
function findToolSegmentStart(state, s) {
if (!s) {
return -1;
}
const lower = s.toLowerCase();
const keywords = ['tool_calls', 'function.name:', '[tool_call_history]', '[tool_result_history]'];
let offset = 0;
while (true) {
let bestKeyIdx = -1;
let matchedKeyword = '';
for (const kw of keywords) {
const idx = lower.indexOf(kw, offset);
if (idx >= 0) {
if (bestKeyIdx < 0 || idx < bestKeyIdx) {
bestKeyIdx = idx;
matchedKeyword = kw;
}
}
}
const { index: bestKeyIdx, keyword: matchedKeyword } = earliestKeywordIndex(lower, TOOL_SEGMENT_KEYWORDS, offset);
if (bestKeyIdx < 0) {
return -1;
}
const keyIdx = bestKeyIdx;
const start = s.slice(0, keyIdx).lastIndexOf('{');
const candidateStart = start >= 0 ? start : keyIdx;
if (!insideCodeFence(s.slice(0, candidateStart))) {
if (!insideCodeFenceWithState(state, s.slice(0, candidateStart))) {
return candidateStart;
}
offset = keyIdx + matchedKeyword.length;
Expand All @@ -181,14 +174,7 @@ function consumeToolCapture(state, toolNames) {
return { ready: false, prefix: '', calls: [], suffix: '' };
}
const lower = captured.toLowerCase();
let keyIdx = -1;
const keywords = ['tool_calls', 'function.name:', '[tool_call_history]', '[tool_result_history]'];
for (const kw of keywords) {
const idx = lower.indexOf(kw);
if (idx >= 0 && (keyIdx < 0 || idx < keyIdx)) {
keyIdx = idx;
}
}
const { index: keyIdx } = earliestKeywordIndex(lower, TOOL_SEGMENT_KEYWORDS);
if (keyIdx < 0) {
return { ready: false, prefix: '', calls: [], suffix: '' };
}
Expand All @@ -211,7 +197,7 @@ function consumeToolCapture(state, toolNames) {
}
const prefixPart = captured.slice(0, actualStart);
const suffixPart = captured.slice(obj.end);
if (insideCodeFence((state.recentTextTail || '') + prefixPart)) {
if (insideCodeFenceWithState(state, prefixPart)) {
return {
ready: true,
prefix: captured,
Expand Down Expand Up @@ -281,7 +267,6 @@ function trimWrappingJSONFence(prefix, suffix) {
if (header && header !== 'json') {
return { prefix, suffix };
}

const leftTrimmedSuffix = (suffix || '').replace(/^[ \t\r\n]+/g, '');
if (!leftTrimmedSuffix.startsWith('```')) {
return { prefix, suffix };
Expand All @@ -292,7 +277,6 @@ function trimWrappingJSONFence(prefix, suffix) {
suffix: (suffix || '').slice(consumed + 3),
};
}

module.exports = {
processToolSieveChunk,
flushToolSieve,
Expand Down
Loading
Loading