kunchenguid · kunchenguid · May 7, 2026 · May 7, 2026 · May 7, 2026 · May 7, 2026
diff --git a/AGENTS.md b/AGENTS.md
@@ -41,6 +41,7 @@ Each agent implements the `Agent` interface in `types.ts` (`name`, async `run(pr
 - `claude.ts` / `codex.ts` / `copilot.ts` / `pi.ts`: spawn the CLI per iteration in non-interactive mode. Codex uses `--output-schema` pointing at the run's schema file; Claude uses `--json-schema`, treats the last successful structured result as terminal, raises `PermanentAgentError` for low credit balance exits, and after a short grace period shuts down a lingering Claude process tree if it stays alive. Copilot uses JSONL output plus prompt-level schema instructions, then parses the final `assistant.message` content. Pi runs in JSON mode, appends the final output schema to the prompt, and parses the assistant JSON reply from Pi's streamed events.
 - `rovodev.ts` / `opencode.ts`: long-running local HTTP servers managed via `managed-process.ts` (start once, reuse across iterations, close on shutdown). OpenCode creates a per-run session and applies a blanket allow rule to avoid prompt blocking.
 - `acp.ts`: handles `acp:<target-or-command>` specs through the bundled `acpx` runtime and registry. It keeps a persistent per-run session keyed by run ID under `.gnhf/runs/<runId>/acp-sessions`, embeds the output schema in the prompt, parses only output text deltas as final JSON, records ACP lifecycle events in `gnhf.log`, and reports per-iteration token usage from ACP `used` deltas when available with prompt-length plus tool-call estimates as a fallback. Estimated ACP usage is marked for the renderer so totals are prefixed with `~`. Path and arg overrides are native-agent-only; ACP targets are customized via `acpRegistryOverrides` in config (a target-name -> spawn-command map fed into acpx's agent registry) or by passing a raw ACP server command directly after `acp:`. Raw command specs are redacted to `acp:custom`/`custom` in debug logs, errors, and telemetry. The e2e suite exercises the full wire path against the `acp-mock` package registered through that same override mechanism.
+- `json-extract.ts`: shared recovery for final agent JSON that may be fenced or prose-wrapped; use it before adding ad-hoc parsing to integrations that must validate output against the agent schema.
 - `stream-utils.ts`: shared JSONL parsing, `AbortSignal` wiring, and child-process lifecycle helpers. When touching agent streaming, start here.
 
 Reserved args managed by gnhf are rejected in `config.ts` via `isReservedAgentArg` - if you add a new flag that gnhf controls, add it to that list so user overrides can't shadow it.

diff --git a/src/core/agents/acp.ts b/src/core/agents/acp.ts
@@ -11,6 +11,7 @@ import {
 } from "acpx/runtime";
 import { appendDebugLog, serializeError } from "../debug-log.js";
 import { redactAcpTargetForLogs } from "../config.js";
+import { parseAgentJson } from "./json-extract.js";
 import {
   PermanentAgentError,
   validateAgentOutput,
@@ -49,88 +50,6 @@ When the iteration is complete, your final assistant message must be a single JS
 ${JSON.stringify(schema, null, 2)}`;
 }
 
-function stripJsonFences(text: string): string {
-  const trimmed = text.trim();
-  if (!trimmed.startsWith("```")) return trimmed;
-
-  const withoutOpen = trimmed.replace(/^```(?:json)?\s*\n?/, "");
-  return withoutOpen.replace(/\n?```\s*$/, "").trim();
-}
-
-/**
- * Walk forward from `start` (which must be `{`) and return the substring of
- * the first balanced JSON object, or null if no balanced object is found.
- * Tracks string state and escape sequences so braces inside strings don't
- * affect depth.
- */
-function tryExtractBalancedObject(text: string, start: number): string | null {
-  if (text[start] !== "{") return null;
-  let depth = 0;
-  let inString = false;
-  let escape = false;
-  for (let i = start; i < text.length; i++) {
-    const ch = text[i];
-    if (escape) {
-      escape = false;
-      continue;
-    }
-    if (inString) {
-      if (ch === "\\") escape = true;
-      else if (ch === '"') inString = false;
-      continue;
-    }
-    if (ch === '"') {
-      inString = true;
-      continue;
-    }
-    if (ch === "{") depth++;
-    else if (ch === "}") {
-      depth--;
-      if (depth === 0) {
-        const candidate = text.slice(start, i + 1);
-        try {
-          JSON.parse(candidate);
-          return candidate;
-        } catch {
-          return null;
-        }
-      }
-    }
-  }
-  return null;
-}
-
-/**
- * Look for a balanced JSON object inside `text`, preferring the rightmost one
- * (since the agent is supposed to end the message with the structured answer).
- */
-function extractLastJsonObject(text: string): string | null {
-  let cursor = text.lastIndexOf("{");
-  while (cursor >= 0) {
-    const candidate = tryExtractBalancedObject(text, cursor);
-    if (candidate !== null) return candidate;
-    cursor = text.lastIndexOf("{", cursor - 1);
-  }
-  return null;
-}
-
-function parseAgentJson(text: string): unknown | null {
-  const cleaned = stripJsonFences(text);
-  if (!cleaned) return null;
-  try {
-    return JSON.parse(cleaned);
-  } catch {
-    // fall through to extraction
-  }
-  const extracted = extractLastJsonObject(cleaned);
-  if (!extracted) return null;
-  try {
-    return JSON.parse(extracted);
-  } catch {
-    return null;
-  }
-}
-
 function isAbortError(error: unknown): boolean {
   return (
     error instanceof Error &&

diff --git a/src/core/agents/json-extract.test.ts b/src/core/agents/json-extract.test.ts
@@ -0,0 +1,138 @@
+import { describe, expect, it } from "vitest";
+import {
+  extractLastJsonObject,
+  parseAgentJson,
+  stripJsonFences,
+  tryExtractBalancedObject,
+} from "./json-extract.js";
+
+describe("stripJsonFences", () => {
+  it("returns trimmed text when there are no fences", () => {
+    expect(stripJsonFences('  {"a":1}  ')).toBe('{"a":1}');
+  });
+
+  it("strips a ```json fence", () => {
+    expect(stripJsonFences('```json\n{"a":1}\n```')).toBe('{"a":1}');
+  });
+
+  it("strips a bare ``` fence", () => {
+    expect(stripJsonFences('```\n{"a":1}\n```')).toBe('{"a":1}');
+  });
+});
+
+describe("tryExtractBalancedObject", () => {
+  it("returns the balanced object starting at the index", () => {
+    const text = 'noise {"a":1} more';
+    expect(tryExtractBalancedObject(text, 6)).toBe('{"a":1}');
+  });
+
+  it("respects nested braces", () => {
+    const text = '{"a":{"b":2}}';
+    expect(tryExtractBalancedObject(text, 0)).toBe('{"a":{"b":2}}');
+  });
+
+  it("ignores braces inside strings", () => {
+    const text = '{"a":"{not real}"}';
+    expect(tryExtractBalancedObject(text, 0)).toBe('{"a":"{not real}"}');
+  });
+
+  it("returns null when start is not an opening brace", () => {
+    expect(tryExtractBalancedObject('"a":1', 0)).toBeNull();
+  });
+
+  it("returns null when the object is unterminated", () => {
+    expect(tryExtractBalancedObject('{"a":1', 0)).toBeNull();
+  });
+});
+
+describe("extractLastJsonObject", () => {
+  it("returns the rightmost balanced JSON object", () => {
+    const text = 'first {"a":1} then {"b":2}';
+    expect(extractLastJsonObject(text)).toBe('{"b":2}');
+  });
+
+  it("returns null when there is no balanced object", () => {
+    expect(extractLastJsonObject("plain prose with no braces")).toBeNull();
+  });
+
+  it("falls back when the rightmost candidate is unterminated", () => {
+    const text = 'good {"a":1} bad {"b":';
+    expect(extractLastJsonObject(text)).toBe('{"a":1}');
+  });
+
+  it("continues scanning when the rightmost parsed object is rejected", () => {
+    const text =
+      'final {"success":true,"summary":"mentions {}","key_changes_made":[],"key_learnings":[]} trailing';
+
+    expect(
+      extractLastJsonObject(text, (value) =>
+        Boolean(
+          value &&
+          typeof value === "object" &&
+          "success" in value &&
+          "summary" in value,
+        ),
+      ),
+    ).toBe(
+      '{"success":true,"summary":"mentions {}","key_changes_made":[],"key_learnings":[]}',
+    );
+  });
+});
+
+describe("parseAgentJson", () => {
+  it("parses pure JSON", () => {
+    expect(parseAgentJson('{"success":true}')).toEqual({ success: true });
+  });
+
+  it("parses JSON wrapped in markdown fences", () => {
+    expect(parseAgentJson('```json\n{"success":true}\n```')).toEqual({
+      success: true,
+    });
+  });
+
+  it("recovers JSON when the agent prepends prose (the rovodev #144 case)", () => {
+    const text =
+      'BUILD SUCCESS confirms the Java changes are valid.\n\n{"success": true, "summary": "x", "key_changes_made": [], "key_learnings": []}';
+    expect(parseAgentJson(text)).toEqual({
+      success: true,
+      summary: "x",
+      key_changes_made: [],
+      key_learnings: [],
+    });
+  });
+
+  it("recovers JSON when the agent appends trailing prose", () => {
+    const text = '{"success":true}\n\nDone!';
+    expect(parseAgentJson(text)).toEqual({ success: true });
+  });
+
+  it("prefers the last JSON object when multiple are present", () => {
+    const text = 'log: {"step":1}\nfinal: {"step":2}';
+    expect(parseAgentJson(text)).toEqual({ step: 2 });
+  });
+
+  it("does not extract nested objects after a parsed top-level object is rejected", () => {
+    const text =
+      '{"success":true,"summary":{"success":true,"summary":"nested","key_changes_made":[],"key_learnings":[]},"key_changes_made":[],"key_learnings":[]}';
+
+    expect(
+      parseAgentJson(text, (value) =>
+        Boolean(
+          value &&
+          typeof value === "object" &&
+          "summary" in value &&
+          typeof value.summary === "string",
+        ),
+      ),
+    ).toBeNull();
+  });
+
+  it("returns null for unparseable text", () => {
+    expect(parseAgentJson("just prose, no json here")).toBeNull();
+  });
+
+  it("returns null for empty input", () => {
+    expect(parseAgentJson("")).toBeNull();
+    expect(parseAgentJson("   ")).toBeNull();
+  });
+});
diff --git a/src/core/agents/json-extract.ts b/src/core/agents/json-extract.ts
@@ -0,0 +1,108 @@
+/**
+ * Helpers for pulling a JSON object out of an agent's final assistant message.
+ *
+ * Agents are instructed to return JSON only, but several (rovodev, ACP targets)
+ * sometimes prepend prose or wrap the JSON in markdown fences. These helpers
+ * recover the structured payload in those cases without changing behaviour for
+ * the well-formed pure-JSON path.
+ */
+
+export function stripJsonFences(text: string): string {
+  const trimmed = text.trim();
+  if (!trimmed.startsWith("```")) return trimmed;
+
+  const withoutOpen = trimmed.replace(/^```(?:json)?\s*\n?/, "");
+  return withoutOpen.replace(/\n?```\s*$/, "").trim();
+}
+
+/**
+ * Walk forward from `start` (which must be `{`) and return the substring of
+ * the first balanced JSON object, or null if no balanced object is found.
+ * Tracks string state and escape sequences so braces inside strings don't
+ * affect depth.
+ */
+export function tryExtractBalancedObject(
+  text: string,
+  start: number,
+): string | null {
+  if (text[start] !== "{") return null;
+  let depth = 0;
+  let inString = false;
+  let escape = false;
+  for (let i = start; i < text.length; i++) {
+    const ch = text[i];
+    if (escape) {
+      escape = false;
+      continue;
+    }
+    if (inString) {
+      if (ch === "\\") escape = true;
+      else if (ch === '"') inString = false;
+      continue;
+    }
+    if (ch === '"') {
+      inString = true;
+      continue;
+    }
+    if (ch === "{") depth++;
+    else if (ch === "}") {
+      depth--;
+      if (depth === 0) {
+        const candidate = text.slice(start, i + 1);
+        try {
+          JSON.parse(candidate);
+          return candidate;
+        } catch {
+          return null;
+        }
+      }
+    }
+  }
+  return null;
+}
+
+/**
+ * Look for a balanced JSON object inside `text`, preferring the rightmost one
+ * (since the agent is supposed to end the message with the structured answer).
+ */
+export function extractLastJsonObject(
+  text: string,
+  accepts?: (value: unknown) => boolean,
+): string | null {
+  let cursor = text.lastIndexOf("{");
+  while (cursor >= 0) {
+    const candidate = tryExtractBalancedObject(text, cursor);
+    if (candidate !== null) {
+      if (!accepts) return candidate;
+      try {
+        if (accepts(JSON.parse(candidate))) return candidate;
+      } catch {
+        // Keep scanning earlier objects when a candidate is not valid JSON.
+      }
+    }
+    cursor = text.lastIndexOf("{", cursor - 1);
+  }
+  return null;
+}
+
+export function parseAgentJson(
+  text: string,
+  accepts?: (value: unknown) => boolean,
+): unknown | null {
+  const cleaned = stripJsonFences(text);
+  if (!cleaned) return null;
+  try {
+    const parsed = JSON.parse(cleaned);
+    if (!accepts || accepts(parsed)) return parsed;
+    return null;
+  } catch {
+    // fall through to extraction
+  }
+  const extracted = extractLastJsonObject(cleaned, accepts);
+  if (!extracted) return null;
+  try {
+    return JSON.parse(extracted);
+  } catch {
+    return null;
+  }
+}