diff --git a/AGENTS.md b/AGENTS.md index 8604263..3538eef 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -41,6 +41,7 @@ Each agent implements the `Agent` interface in `types.ts` (`name`, async `run(pr - `claude.ts` / `codex.ts` / `copilot.ts` / `pi.ts`: spawn the CLI per iteration in non-interactive mode. Codex uses `--output-schema` pointing at the run's schema file; Claude uses `--json-schema`, treats the last successful structured result as terminal, raises `PermanentAgentError` for low credit balance exits, and after a short grace period shuts down a lingering Claude process tree if it stays alive. Copilot uses JSONL output plus prompt-level schema instructions, then parses the final `assistant.message` content. Pi runs in JSON mode, appends the final output schema to the prompt, and parses the assistant JSON reply from Pi's streamed events. - `rovodev.ts` / `opencode.ts`: long-running local HTTP servers managed via `managed-process.ts` (start once, reuse across iterations, close on shutdown). OpenCode creates a per-run session and applies a blanket allow rule to avoid prompt blocking. - `acp.ts`: handles `acp:` specs through the bundled `acpx` runtime and registry. It keeps a persistent per-run session keyed by run ID under `.gnhf/runs//acp-sessions`, embeds the output schema in the prompt, parses only output text deltas as final JSON, records ACP lifecycle events in `gnhf.log`, and reports per-iteration token usage from ACP `used` deltas when available with prompt-length plus tool-call estimates as a fallback. Estimated ACP usage is marked for the renderer so totals are prefixed with `~`. Path and arg overrides are native-agent-only; ACP targets are customized via `acpRegistryOverrides` in config (a target-name -> spawn-command map fed into acpx's agent registry) or by passing a raw ACP server command directly after `acp:`. Raw command specs are redacted to `acp:custom`/`custom` in debug logs, errors, and telemetry. The e2e suite exercises the full wire path against the `acp-mock` package registered through that same override mechanism. +- `json-extract.ts`: shared recovery for final agent JSON that may be fenced or prose-wrapped; use it before adding ad-hoc parsing to integrations that must validate output against the agent schema. - `stream-utils.ts`: shared JSONL parsing, `AbortSignal` wiring, and child-process lifecycle helpers. When touching agent streaming, start here. Reserved args managed by gnhf are rejected in `config.ts` via `isReservedAgentArg` - if you add a new flag that gnhf controls, add it to that list so user overrides can't shadow it. diff --git a/src/core/agents/acp.ts b/src/core/agents/acp.ts index 09fd8d1..4737aac 100644 --- a/src/core/agents/acp.ts +++ b/src/core/agents/acp.ts @@ -11,6 +11,7 @@ import { } from "acpx/runtime"; import { appendDebugLog, serializeError } from "../debug-log.js"; import { redactAcpTargetForLogs } from "../config.js"; +import { parseAgentJson } from "./json-extract.js"; import { PermanentAgentError, validateAgentOutput, @@ -49,88 +50,6 @@ When the iteration is complete, your final assistant message must be a single JS ${JSON.stringify(schema, null, 2)}`; } -function stripJsonFences(text: string): string { - const trimmed = text.trim(); - if (!trimmed.startsWith("```")) return trimmed; - - const withoutOpen = trimmed.replace(/^```(?:json)?\s*\n?/, ""); - return withoutOpen.replace(/\n?```\s*$/, "").trim(); -} - -/** - * Walk forward from `start` (which must be `{`) and return the substring of - * the first balanced JSON object, or null if no balanced object is found. - * Tracks string state and escape sequences so braces inside strings don't - * affect depth. - */ -function tryExtractBalancedObject(text: string, start: number): string | null { - if (text[start] !== "{") return null; - let depth = 0; - let inString = false; - let escape = false; - for (let i = start; i < text.length; i++) { - const ch = text[i]; - if (escape) { - escape = false; - continue; - } - if (inString) { - if (ch === "\\") escape = true; - else if (ch === '"') inString = false; - continue; - } - if (ch === '"') { - inString = true; - continue; - } - if (ch === "{") depth++; - else if (ch === "}") { - depth--; - if (depth === 0) { - const candidate = text.slice(start, i + 1); - try { - JSON.parse(candidate); - return candidate; - } catch { - return null; - } - } - } - } - return null; -} - -/** - * Look for a balanced JSON object inside `text`, preferring the rightmost one - * (since the agent is supposed to end the message with the structured answer). - */ -function extractLastJsonObject(text: string): string | null { - let cursor = text.lastIndexOf("{"); - while (cursor >= 0) { - const candidate = tryExtractBalancedObject(text, cursor); - if (candidate !== null) return candidate; - cursor = text.lastIndexOf("{", cursor - 1); - } - return null; -} - -function parseAgentJson(text: string): unknown | null { - const cleaned = stripJsonFences(text); - if (!cleaned) return null; - try { - return JSON.parse(cleaned); - } catch { - // fall through to extraction - } - const extracted = extractLastJsonObject(cleaned); - if (!extracted) return null; - try { - return JSON.parse(extracted); - } catch { - return null; - } -} - function isAbortError(error: unknown): boolean { return ( error instanceof Error && diff --git a/src/core/agents/json-extract.test.ts b/src/core/agents/json-extract.test.ts new file mode 100644 index 0000000..715bd95 --- /dev/null +++ b/src/core/agents/json-extract.test.ts @@ -0,0 +1,138 @@ +import { describe, expect, it } from "vitest"; +import { + extractLastJsonObject, + parseAgentJson, + stripJsonFences, + tryExtractBalancedObject, +} from "./json-extract.js"; + +describe("stripJsonFences", () => { + it("returns trimmed text when there are no fences", () => { + expect(stripJsonFences(' {"a":1} ')).toBe('{"a":1}'); + }); + + it("strips a ```json fence", () => { + expect(stripJsonFences('```json\n{"a":1}\n```')).toBe('{"a":1}'); + }); + + it("strips a bare ``` fence", () => { + expect(stripJsonFences('```\n{"a":1}\n```')).toBe('{"a":1}'); + }); +}); + +describe("tryExtractBalancedObject", () => { + it("returns the balanced object starting at the index", () => { + const text = 'noise {"a":1} more'; + expect(tryExtractBalancedObject(text, 6)).toBe('{"a":1}'); + }); + + it("respects nested braces", () => { + const text = '{"a":{"b":2}}'; + expect(tryExtractBalancedObject(text, 0)).toBe('{"a":{"b":2}}'); + }); + + it("ignores braces inside strings", () => { + const text = '{"a":"{not real}"}'; + expect(tryExtractBalancedObject(text, 0)).toBe('{"a":"{not real}"}'); + }); + + it("returns null when start is not an opening brace", () => { + expect(tryExtractBalancedObject('"a":1', 0)).toBeNull(); + }); + + it("returns null when the object is unterminated", () => { + expect(tryExtractBalancedObject('{"a":1', 0)).toBeNull(); + }); +}); + +describe("extractLastJsonObject", () => { + it("returns the rightmost balanced JSON object", () => { + const text = 'first {"a":1} then {"b":2}'; + expect(extractLastJsonObject(text)).toBe('{"b":2}'); + }); + + it("returns null when there is no balanced object", () => { + expect(extractLastJsonObject("plain prose with no braces")).toBeNull(); + }); + + it("falls back when the rightmost candidate is unterminated", () => { + const text = 'good {"a":1} bad {"b":'; + expect(extractLastJsonObject(text)).toBe('{"a":1}'); + }); + + it("continues scanning when the rightmost parsed object is rejected", () => { + const text = + 'final {"success":true,"summary":"mentions {}","key_changes_made":[],"key_learnings":[]} trailing'; + + expect( + extractLastJsonObject(text, (value) => + Boolean( + value && + typeof value === "object" && + "success" in value && + "summary" in value, + ), + ), + ).toBe( + '{"success":true,"summary":"mentions {}","key_changes_made":[],"key_learnings":[]}', + ); + }); +}); + +describe("parseAgentJson", () => { + it("parses pure JSON", () => { + expect(parseAgentJson('{"success":true}')).toEqual({ success: true }); + }); + + it("parses JSON wrapped in markdown fences", () => { + expect(parseAgentJson('```json\n{"success":true}\n```')).toEqual({ + success: true, + }); + }); + + it("recovers JSON when the agent prepends prose (the rovodev #144 case)", () => { + const text = + 'BUILD SUCCESS confirms the Java changes are valid.\n\n{"success": true, "summary": "x", "key_changes_made": [], "key_learnings": []}'; + expect(parseAgentJson(text)).toEqual({ + success: true, + summary: "x", + key_changes_made: [], + key_learnings: [], + }); + }); + + it("recovers JSON when the agent appends trailing prose", () => { + const text = '{"success":true}\n\nDone!'; + expect(parseAgentJson(text)).toEqual({ success: true }); + }); + + it("prefers the last JSON object when multiple are present", () => { + const text = 'log: {"step":1}\nfinal: {"step":2}'; + expect(parseAgentJson(text)).toEqual({ step: 2 }); + }); + + it("does not extract nested objects after a parsed top-level object is rejected", () => { + const text = + '{"success":true,"summary":{"success":true,"summary":"nested","key_changes_made":[],"key_learnings":[]},"key_changes_made":[],"key_learnings":[]}'; + + expect( + parseAgentJson(text, (value) => + Boolean( + value && + typeof value === "object" && + "summary" in value && + typeof value.summary === "string", + ), + ), + ).toBeNull(); + }); + + it("returns null for unparseable text", () => { + expect(parseAgentJson("just prose, no json here")).toBeNull(); + }); + + it("returns null for empty input", () => { + expect(parseAgentJson("")).toBeNull(); + expect(parseAgentJson(" ")).toBeNull(); + }); +}); diff --git a/src/core/agents/json-extract.ts b/src/core/agents/json-extract.ts new file mode 100644 index 0000000..eb7295a --- /dev/null +++ b/src/core/agents/json-extract.ts @@ -0,0 +1,108 @@ +/** + * Helpers for pulling a JSON object out of an agent's final assistant message. + * + * Agents are instructed to return JSON only, but several (rovodev, ACP targets) + * sometimes prepend prose or wrap the JSON in markdown fences. These helpers + * recover the structured payload in those cases without changing behaviour for + * the well-formed pure-JSON path. + */ + +export function stripJsonFences(text: string): string { + const trimmed = text.trim(); + if (!trimmed.startsWith("```")) return trimmed; + + const withoutOpen = trimmed.replace(/^```(?:json)?\s*\n?/, ""); + return withoutOpen.replace(/\n?```\s*$/, "").trim(); +} + +/** + * Walk forward from `start` (which must be `{`) and return the substring of + * the first balanced JSON object, or null if no balanced object is found. + * Tracks string state and escape sequences so braces inside strings don't + * affect depth. + */ +export function tryExtractBalancedObject( + text: string, + start: number, +): string | null { + if (text[start] !== "{") return null; + let depth = 0; + let inString = false; + let escape = false; + for (let i = start; i < text.length; i++) { + const ch = text[i]; + if (escape) { + escape = false; + continue; + } + if (inString) { + if (ch === "\\") escape = true; + else if (ch === '"') inString = false; + continue; + } + if (ch === '"') { + inString = true; + continue; + } + if (ch === "{") depth++; + else if (ch === "}") { + depth--; + if (depth === 0) { + const candidate = text.slice(start, i + 1); + try { + JSON.parse(candidate); + return candidate; + } catch { + return null; + } + } + } + } + return null; +} + +/** + * Look for a balanced JSON object inside `text`, preferring the rightmost one + * (since the agent is supposed to end the message with the structured answer). + */ +export function extractLastJsonObject( + text: string, + accepts?: (value: unknown) => boolean, +): string | null { + let cursor = text.lastIndexOf("{"); + while (cursor >= 0) { + const candidate = tryExtractBalancedObject(text, cursor); + if (candidate !== null) { + if (!accepts) return candidate; + try { + if (accepts(JSON.parse(candidate))) return candidate; + } catch { + // Keep scanning earlier objects when a candidate is not valid JSON. + } + } + cursor = text.lastIndexOf("{", cursor - 1); + } + return null; +} + +export function parseAgentJson( + text: string, + accepts?: (value: unknown) => boolean, +): unknown | null { + const cleaned = stripJsonFences(text); + if (!cleaned) return null; + try { + const parsed = JSON.parse(cleaned); + if (!accepts || accepts(parsed)) return parsed; + return null; + } catch { + // fall through to extraction + } + const extracted = extractLastJsonObject(cleaned, accepts); + if (!extracted) return null; + try { + return JSON.parse(extracted); + } catch { + return null; + } +} diff --git a/src/core/agents/rovodev.test.ts b/src/core/agents/rovodev.test.ts index 38e384e..e2986d9 100644 --- a/src/core/agents/rovodev.test.ts +++ b/src/core/agents/rovodev.test.ts @@ -441,6 +441,73 @@ describe("RovoDevAgent", () => { ); }); + it("rejects when extracted JSON does not match the output schema", async () => { + const proc = createMockProcess(); + mockSpawn.mockReturnValue(proc); + + fetchMock + .mockResolvedValueOnce(jsonResponse({ status: "healthy" })) + .mockResolvedValueOnce( + jsonResponse({ session_id: "session-123", title: "gnhf" }), + ) + .mockResolvedValueOnce(jsonResponse({ message: "ok", prompt_set: true })) + .mockResolvedValueOnce(jsonResponse({ response: "Chat message set" })) + .mockResolvedValueOnce( + textResponse( + [ + "event: part_start", + 'data: {"index":0,"part":{"content":"Here is a detail: {\\"success\\":true}","part_kind":"text"},"event_kind":"part_start"}', + "", + "event: close", + "data: ", + "", + ].join("\n"), + ), + ) + .mockResolvedValueOnce(jsonResponse({ message: "deleted" })); + + await expect(agent.run("test", "/repo")).rejects.toThrow( + "Failed to parse rovodev output: summary is required", + ); + }); + + it("recovers JSON when rovodev streams a prose preamble before the JSON (issue #144)", async () => { + const proc = createMockProcess(); + mockSpawn.mockReturnValue(proc); + + const finalContent = + 'BUILD SUCCESS confirms the Java changes are valid.\\n\\n{\\"success\\": true, \\"summary\\": \\"x\\", \\"key_changes_made\\": [], \\"key_learnings\\": []}'; + + fetchMock + .mockResolvedValueOnce(jsonResponse({ status: "healthy" })) + .mockResolvedValueOnce( + jsonResponse({ session_id: "session-123", title: "gnhf" }), + ) + .mockResolvedValueOnce(jsonResponse({ message: "ok", prompt_set: true })) + .mockResolvedValueOnce(jsonResponse({ response: "Chat message set" })) + .mockResolvedValueOnce( + textResponse( + [ + "event: part_start", + `data: {"index":0,"part":{"content":"${finalContent}","part_kind":"text"},"event_kind":"part_start"}`, + "", + "event: close", + "data: ", + "", + ].join("\n"), + ), + ) + .mockResolvedValueOnce(jsonResponse({ message: "deleted" })); + + const result = await agent.run("test", "/repo"); + expect(result.output).toEqual({ + success: true, + summary: "x", + key_changes_made: [], + key_learnings: [], + }); + }); + it("treats text separated by tool activity as distinct message segments", async () => { const proc = createMockProcess(); mockSpawn.mockReturnValue(proc); diff --git a/src/core/agents/rovodev.ts b/src/core/agents/rovodev.ts index 687f661..5fc571b 100644 --- a/src/core/agents/rovodev.ts +++ b/src/core/agents/rovodev.ts @@ -7,12 +7,14 @@ import { createWriteStream, readFileSync, type WriteStream } from "node:fs"; import { createServer } from "node:net"; import type { Agent, - AgentOutput, + AgentOutputSchema, AgentResult, AgentRunOptions, TokenUsage, } from "./types.js"; +import { validateAgentOutput } from "./types.js"; import { appendDebugLog, serializeError } from "../debug-log.js"; +import { parseAgentJson } from "./json-extract.js"; import { shutdownChildProcess } from "./managed-process.js"; interface RovoDevRequestUsageEvent { @@ -55,6 +57,7 @@ function buildSystemPrompt(schema: string): string { "When you finish, reply with only valid JSON.", "Do not wrap the JSON in markdown fences.", "Do not include any prose before or after the JSON.", + "Your final assistant message must contain the JSON object only - no preamble, no commentary, no build-status lines, nothing else.", `The JSON must match this schema exactly: ${schema}`, ].join(" "); } @@ -740,27 +743,54 @@ export class RovoDevAgent implements Agent { throw new Error("rovodev returned no text output"); } - try { - const output = JSON.parse(finalText) as AgentOutput; - appendDebugLog("rovodev:output:parsed", { - sessionId, - outputTextLength: finalText.length, - }); - return { - output, - usage, - }; - } catch (error) { + const schema = JSON.parse( + readFileSync(this.schemaPath, "utf-8"), + ) as AgentOutputSchema; + const parsed = parseAgentJson(finalText, (value) => { + try { + validateAgentOutput(value, schema); + return true; + } catch { + return false; + } + }); + if (parsed === null) { + const fallbackParsed = parseAgentJson(finalText); + if (fallbackParsed !== null) { + try { + validateAgentOutput(fallbackParsed, schema); + } catch (error) { + const message = + error instanceof Error ? error.message : String(error); + throw new Error(`Failed to parse rovodev output: ${message}`); + } + } + const parseError = new SyntaxError( + "rovodev output did not contain a parseable JSON object", + ); appendDebugLog("rovodev:output:parse-error", { sessionId, outputTextLength: finalText.length, outputTextSample: finalText.slice(0, 512), - error: serializeError(error), + error: serializeError(parseError), }); - throw new Error( - `Failed to parse rovodev output: ${error instanceof Error ? error.message : String(error)}`, - ); + throw new Error(`Failed to parse rovodev output: ${parseError.message}`); + } + appendDebugLog("rovodev:output:parsed", { + sessionId, + outputTextLength: finalText.length, + }); + let output; + try { + output = validateAgentOutput(parsed, schema); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + throw new Error(`Failed to parse rovodev output: ${message}`); } + return { + output, + usage, + }; } private async shutdownServer(): Promise {