emdash-cms · ascorbic · Jun 17, 2026 · Jun 17, 2026 · Jun 17, 2026 · Jun 17, 2026
diff --git a/infra/flue-review/.flue/app.ts b/infra/flue-review/.flue/app.ts
@@ -8,15 +8,12 @@
 // durable workflow run, and returns. The review and the GitHub post happen
 // inside the workflow's Durable Object, which is not bound by that budget.
 
-// @flue/runtime 0.11 removed the `@flue/runtime/app` entry; `createDefaultFlueApp`
-// is the replacement factory and still mounts the `/workflows/:name` admission
-// route we fetch internally below.
-import { createDefaultFlueApp } from "@flue/runtime/internal";
+import { flue } from "@flue/runtime/routing";
 import { Hono } from "hono";
 
 import { verifyWebhookSignature, gatePullRequestEvent } from "./lib/webhook.js";
 
-const flueApp = createDefaultFlueApp();
+const flueApp = flue();
 
 const app = new Hono<{ Bindings: Env }>();
 

diff --git a/infra/flue-review/.flue/cloudflare.ts b/infra/flue-review/.flue/cloudflare.ts
@@ -1,8 +1,8 @@
 // Worker-level Cloudflare exports for the Flue build.
 //
-// @flue 0.11 no longer auto-wires the container Durable Object class into the
-// generated Worker bundle (0.8 did, via the "class_name ends in Sandbox"
-// heuristic). Re-export @cloudflare/sandbox's `Sandbox` class here so the
-// `Sandbox` DO binding + container in wrangler.jsonc resolve to a real exported
-// class. Flue re-exports these named values from the generated Worker entry.
+// Re-export @cloudflare/sandbox's `Sandbox` class so the `Sandbox` DO binding +
+// container in wrangler.jsonc resolve to a real exported class. Flue re-exports
+// these named values from the generated Worker entry. This is the documented
+// "Connecting a remote sandbox" pattern:
+// https://flueframework.com/docs/ecosystem/deploy/cloudflare/#connecting-a-remote-sandbox
 export { Sandbox } from "@cloudflare/sandbox";
diff --git a/infra/flue-review/.flue/lib/github.ts b/infra/flue-review/.flue/lib/github.ts
@@ -223,10 +223,29 @@ function findingToComment(finding: ReviewResult["findings"][number]) {
 	return base;
 }
 
+/** GitHub rejects a COMMENT review with an empty body, so we must never send one. */
+const FALLBACK_SUMMARY = "Automated review completed.";
+
+/**
+ * Render findings as a markdown list. Used for the body-only fallback so that
+ * when GitHub can't anchor inline comments (a finding points outside the diff),
+ * the findings still reach the PR in the review body instead of being dropped.
+ */
+function renderFindingsMarkdown(findings: ReviewResult["findings"]): string {
+	if (findings.length === 0) return "";
+	const lines = findings.map((f) => {
+		const label = f.severity === "needs_fixing" ? "needs fixing" : "suggestion";
+		const range = f.startLine && f.startLine < f.line ? `${f.startLine}-${f.line}` : `${f.line}`;
+		return `- **[${label}]** \`${f.path}:${range}\`\n\n  ${f.body.replace(/\n/g, "\n  ")}`;
+	});
+	return `\n\n---\n\n### Findings\n\n${lines.join("\n\n")}`;
+}
+
 /**
  * Post the review. Maps verdict -> review event and findings -> line comments.
- * If GitHub rejects the request because a comment anchors to a line outside the
- * diff, retry body-only so the summary still lands.
+ * If GitHub rejects the inline comments (a comment anchors outside the diff),
+ * retry body-only with the findings folded into the body so nothing is lost.
+ * The body is always non-empty -- GitHub 422s a blank COMMENT review body.
  */
 export async function postReview(
 	token: string,
@@ -237,6 +256,7 @@ export async function postReview(
 ): Promise<void> {
 	const url = `${GITHUB_API}/repos/${owner}/${repo}/pulls/${prNumber}/reviews`;
 	const event = verdictToEvent(result.verdict);
+	const summary = result.summary.trim() || FALLBACK_SUMMARY;
 	const headers = {
 		authorization: `Bearer ${token}`,
 		accept: "application/vnd.github+json",
@@ -246,17 +266,18 @@ export async function postReview(
 	};
 
 	const withComments = {
-		body: result.summary,
+		body: summary,
 		event,
 		comments: result.findings.map(findingToComment),
 	};
 	let res = await fetch(url, { method: "POST", headers, body: JSON.stringify(withComments) });
 	if (res.ok) return;
 
-	// Most likely cause: a comment line isn't part of the diff. Fall back to a
-	// body-only review so the summary is never lost.
+	// Most likely cause: a comment anchors to a line outside the diff
+	// ("Path could not be resolved"). Fall back to a body-only review that
+	// carries the summary AND the findings inline, so the review still lands.
 	const firstError = await res.text();
-	const bodyOnly = { body: result.summary, event };
+	const bodyOnly = { body: summary + renderFindingsMarkdown(result.findings), event };
 	res = await fetch(url, { method: "POST", headers, body: JSON.stringify(bodyOnly) });
 	if (!res.ok) {
 		throw new Error(

diff --git a/infra/flue-review/.flue/workflows/review.ts b/infra/flue-review/.flue/workflows/review.ts
@@ -14,8 +14,13 @@
 // space.
 
 import { getSandbox, type Sandbox } from "@cloudflare/sandbox";
-import { createAgent, type FlueContext, type WorkflowRouteHandler } from "@flue/runtime";
-import { cfSandboxToSessionEnv } from "@flue/runtime/cloudflare";
+import {
+	createAgent,
+	type FlueContext,
+	type SandboxFactory,
+	type WorkflowRouteHandler,
+} from "@flue/runtime";
+import { cloudflareSandbox } from "@flue/runtime/cloudflare";
 
 import { withCapacityRetry } from "../lib/capacity.js";
 import {
@@ -43,30 +48,51 @@ interface ReviewPayload {
 	repo: string;
 }
 
-// Kimi via the Cloudflare Workers AI binding: the `cloudflare/` prefix is
-// reserved by Flue's generated CF entry and routed through `env.AI`, so no
-// model API key is needed anywhere.
-const reviewAgent = createAgent<ReviewPayload, Env>(({ env }) => ({
-	model: "cloudflare/@cf/moonshotai/kimi-k2.7-code",
-	// The container's working dir is the checked-out PR. AGENTS.md at the repo
-	// root is auto-discovered into the agent's context from here.
+/**
+ * Container sandbox factory, wrapping Flue's `cloudflareSandbox(...)` to drop the
+ * `AbortSignal` before each `exec()`.
+ *
+ * `getSandbox(...).exec(command, options)` is a Worker -> Durable Object RPC call
+ * (the @cloudflare/sandbox SDK forwards the whole options bag, including any
+ * `AbortSignal`, to the Sandbox DO). An `AbortSignal` created outside the target
+ * DO cannot cross that RPC boundary -- the call hangs forever and the command
+ * never runs. Flue's session/agent shell path *always* attaches a signal (via
+ * `createCallHandle`), so every container command -- our git setup AND the
+ * agent's own bash/grep/find tool calls -- would hang. Verified: an identical
+ * `exec` with the signal omitted (or `undefined`) returns in ~50ms; with a live
+ * signal it never returns. Holds across @cloudflare/sandbox 0.10.3/0.12.1 and
+ * both the `http` and `rpc` transports, so it is not a version/transport issue.
+ *
+ * We don't need cooperative exec cancellation: the model-call timeout in
+ * `withCapacityRetry` bounds a stalled review, and the container has its own
+ * lifecycle. So we strip the signal. (Upstream: Flue's `cloudflareSandbox`
+ * adapter should not forward a cross-DO `AbortSignal` to `exec`.)
+ */
+function reviewSandbox(stub: DurableObjectNamespace<Sandbox>, id: string): SandboxFactory {
+	const base = cloudflareSandbox(getSandbox(stub, id), { cwd: "/workspace" });
+	return {
+		createSessionEnv: async (options) => {
+			const sessionEnv = await base.createSessionEnv(options);
+			const exec = sessionEnv.exec.bind(sessionEnv);
+			return {
+				...sessionEnv,
+				exec: (command, execOptions) => exec(command, { ...execOptions, signal: undefined }),
+			};
-			return {
-				...sessionEnv,
-				exec: (command, execOptions) => exec(command, { ...execOptions, signal: undefined }),
-			};
+			sessionEnv.exec = (command, execOptions) => exec(command, { ...execOptions, signal: undefined });
+			return sessionEnv;
-			return {
-				...sessionEnv,
-				exec: (command, execOptions) => exec(command, { ...execOptions, signal: undefined }),
-			};
+			sessionEnv.exec = (command, execOptions) => exec(command, { ...execOptions, signal: undefined });
+			return sessionEnv;
+		},
+	};
+}
+
+// GLM-5.2 (Z.ai's agentic-coding model) via the Cloudflare Workers AI binding:
+// the `cloudflare/` prefix is reserved by Flue's generated CF entry and routed
+// through `env.AI`, so no model API key is needed anywhere. Workers AI 429s are
+// handled by `withCapacityRetry` below.
+const reviewAgent = createAgent<ReviewPayload, Env>(({ id, env }) => ({
+	model: "cloudflare/@cf/zai-org/glm-5.2",
+	// Container-backed Linux sandbox (git/rg). `id` is the per-instance id, so
+	// each review run gets its own container. `cwd` is the checked-out PR root.
+	// oxlint-disable-next-line typescript/no-unsafe-type-assertion
+	sandbox: reviewSandbox(env.Sandbox as DurableObjectNamespace<Sandbox>, id),
 	cwd: "/workspace",
-	// Wire the @cloudflare/sandbox container into Flue via its CF adapter.
-	// (The deploy doc's bare `sandbox: getSandbox(...)` is unreleased sugar; the
-	// supported path is a SandboxFactory that calls cfSandboxToSessionEnv.) `id`
-	// here is the per-session id Flue supplies, so each review run gets its own
-	// container instance. As of @flue/runtime 0.11 the factory no longer receives
-	// a `cwd`; the session cwd matches the agent's `cwd` above.
-	sandbox: {
-		createSessionEnv: ({ id: sessionId }) =>
-			cfSandboxToSessionEnv(
-				// wrangler types the auto-wired DO as DurableObjectNamespace<undefined>;
-				// Flue re-exports the real Sandbox class into the bundle at build.
-				// oxlint-disable-next-line typescript/no-unsafe-type-assertion
-				getSandbox(env.Sandbox as DurableObjectNamespace<Sandbox>, sessionId),
-				"/workspace",
-			),
-	},
 	instructions: [
 		"You are EmDash's automated pull request reviewer.",
 		"You investigate one PR in depth and return structured, line-anchored findings plus an overall verdict.",
@@ -148,13 +174,20 @@ export async function run(ctx: FlueContext<ReviewPayload, Env>): Promise<ReviewR
 	}
 
 	try {
-		const harness = await init(reviewAgent);
-		const session = await harness.session();
-
-		// Set up the checkout inside the container: init in /workspace, fetch the
-		// base branch and the PR head, check out the PR head (detached). Full fetch
-		// (no shallow/depth) so `git diff origin/<base>...HEAD` can resolve a merge
-		// base. emdash is public, so anonymous https is sufficient.
+		// Check out the PR into the container BEFORE init(): init in /workspace,
+		// fetch the base branch and the PR head, check out the PR head (detached).
+		// Full fetch (no shallow/depth) so `git diff origin/<base>...HEAD` can
+		// resolve a merge base. emdash is public, so anonymous https is sufficient.
+		//
+		// Ordering matters: Flue's init-time workspace scan reads `<cwd>/AGENTS.md`
+		// and `<cwd>/.agents/skills/*` into the agent's context. The container must
+		// already hold the checkout when init() runs, or AGENTS.md is never
+		// discovered (the review skill checks the PR against AGENTS.md conventions).
+		//
+		// We run setup through the raw @cloudflare/sandbox stub (same container id
+		// as the agent's sandbox below) with a plain `exec` and no AbortSignal.
+		// Flue's shell path always attaches a signal, which hangs across the DO RPC
+		// boundary (see reviewSandbox); a direct signal-less exec does not.
 		const cloneUrl = `https://github.com/${payload.owner}/${payload.repo}.git`;
 		const setup = [
 			"set -euo pipefail",
@@ -166,21 +199,30 @@ export async function run(ctx: FlueContext<ReviewPayload, Env>): Promise<ReviewR
 			"git checkout -q -f refs/remotes/origin/pr",
 		].join("\n");
 
-		const setupResult = await session.shell(setup);
+		// oxlint-disable-next-line typescript/no-unsafe-type-assertion
+		const containerStub = getSandbox(env.Sandbox as DurableObjectNamespace<Sandbox>, ctx.id);
+		const setupResult = await containerStub.exec(setup);
 		if (setupResult.exitCode !== 0) {
 			throw new Error(
 				`git setup failed (exit ${setupResult.exitCode}): ${setupResult.stderr || setupResult.stdout}`,
 			);
 		}
 
-		// Workers AI returns 429 when kimi is over capacity, and under sustained
-		// load the binding can hold the request open indefinitely. Bound each
-		// attempt and retry genuine capacity errors so a transient spike doesn't
-		// silently hang the review forever (it used to: no result, no post, just
-		// the container's keep-alive alarm firing for minutes).
+		// Init now that /workspace holds the checkout (AGENTS.md is discovered
+		// here); the agent's tool calls run against this same container.
+		const harness = await init(reviewAgent);
+		const session = await harness.session();
+
+		// Workers AI returns 429 when the model is over capacity; retry genuine
+		// capacity errors with backoff. The per-attempt timeout is a backstop
+		// against a wedged inference call, not a budget for the review itself: a
+		// thorough agentic review (many tool calls + turns) legitimately runs many
+		// minutes, so 20m gives real headroom (Flue's submission durability caps
+		// the whole run at 1h). It is deliberately NOT 6m -- that killed real
+		// reviews mid-flight.
 		const { data } = await withCapacityRetry(
 			(signal) =>
-				session.skill(review, {
+				session.skill("review", {
 					args: {
 						prContext: buildPrContext(payload, priorReview),
 						owner: payload.owner,
@@ -195,7 +237,7 @@ export async function run(ctx: FlueContext<ReviewPayload, Env>): Promise<ReviewR
 			{
 				label: `review#${payload.prNumber}`,
 				attempts: 3,
-				perAttemptTimeoutMs: 6 * 60_000,
+				perAttemptTimeoutMs: 20 * 60_000,
 				onRetry: ({ attempt, delayMs, error }) =>
 					ctx.log.warn?.("[review] model over capacity, backing off", {
 						prNumber: payload.prNumber,
@@ -206,22 +248,29 @@ export async function run(ctx: FlueContext<ReviewPayload, Env>): Promise<ReviewR
 			},
 		);
 
+		// Telemetry (Workers Logs / dashboard): records what the model produced
+		// and whether we're about to post.
+		console.log("[review] result", {
+			prNumber: payload.prNumber,
+			hasToken: Boolean(token),
+			verdict: data.verdict,
+			summaryLen: data.summary.length,
+			findings: data.findings.length,
+		});
+
 		// Post from this trusted DO context (durable, not bound by the webhook's
 		// 30s waitUntil budget). In dev (no creds) we just log and return.
 		if (token) {
-			// Don't let a transient GitHub failure throw: that would discard the
-			// completed review AND trigger Flue's at-least-once workflow restart
-			// (a full re-review). Log and return the result instead.
 			try {
 				await postReview(token, payload.owner, payload.repo, payload.prNumber, data);
 			} catch (err) {
-				ctx.log.error?.("[review] postReview failed", {
-					error: String(err),
+				console.error("[review] postReview failed", {
+					error: err instanceof Error ? err.message : String(err),
 					prNumber: payload.prNumber,
 				});
 			}
 		} else {
-			ctx.log.info?.("[review] no GitHub App creds; skipping post", { prNumber: payload.prNumber });
+			console.log("[review] no GitHub App creds; skipping post", { prNumber: payload.prNumber });
 		}
 
 		return data;

diff --git a/infra/flue-review/Dockerfile b/infra/flue-review/Dockerfile
@@ -1,6 +1,13 @@
-# Pinned to match the @cloudflare/sandbox version in package.json (0.10.3);
+# Pinned to match the @cloudflare/sandbox version in package.json (0.12.1);
 # the image and the SDK are versioned together. The base bundles the
 # control-plane server, `node`, `git`, and `curl`, with a working dir at
 # /workspace. The reviewer is git-only (no `gh`): it clones the public repo
-# over https and diffs locally, so no extra tooling is required here.
-FROM docker.io/cloudflare/sandbox:0.10.3
+# over https and diffs locally.
+FROM docker.io/cloudflare/sandbox:0.12.1
+
+# ripgrep: the review agent reaches for `rg` to search the checkout. Without it,
+# `rg` exits 127, which kills the sandbox shell session ("cannot execute further
+# commands") and aborts the rest of the review. Install it so searches work.
+RUN apt-get update \
+	&& apt-get install -y --no-install-recommends ripgrep \
+	&& rm -rf /var/lib/apt/lists/*
diff --git a/infra/flue-review/package.json b/infra/flue-review/package.json
@@ -11,14 +11,14 @@
 		"typecheck": "tsc --noEmit"
 	},
 	"devDependencies": {
-		"@flue/cli": "^0.11.1",
+		"@flue/cli": "1.0.0-beta.1",
 		"typescript": "catalog:",
 		"wrangler": "catalog:"
 	},
 	"dependencies": {
-		"@cloudflare/sandbox": "^0.10.3",
-		"@flue/runtime": "^0.11.1",
-		"agents": "^0.13.3",
+		"@cloudflare/sandbox": "^0.12.1",
+		"@flue/runtime": "1.0.0-beta.1",
+		"agents": "^0.14.5",
 		"hono": "^4.12.23",
 		"valibot": "^1.4.1"
 	}