Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions infra/flue-review/.flue/app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,12 @@
// durable workflow run, and returns. The review and the GitHub post happen
// inside the workflow's Durable Object, which is not bound by that budget.

// @flue/runtime 0.11 removed the `@flue/runtime/app` entry; `createDefaultFlueApp`
// is the replacement factory and still mounts the `/workflows/:name` admission
// route we fetch internally below.
import { createDefaultFlueApp } from "@flue/runtime/internal";
import { flue } from "@flue/runtime/routing";
import { Hono } from "hono";

import { verifyWebhookSignature, gatePullRequestEvent } from "./lib/webhook.js";

const flueApp = createDefaultFlueApp();
const flueApp = flue();

const app = new Hono<{ Bindings: Env }>();

Expand Down
10 changes: 5 additions & 5 deletions infra/flue-review/.flue/cloudflare.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
// Worker-level Cloudflare exports for the Flue build.
//
// @flue 0.11 no longer auto-wires the container Durable Object class into the
// generated Worker bundle (0.8 did, via the "class_name ends in Sandbox"
// heuristic). Re-export @cloudflare/sandbox's `Sandbox` class here so the
// `Sandbox` DO binding + container in wrangler.jsonc resolve to a real exported
// class. Flue re-exports these named values from the generated Worker entry.
// Re-export @cloudflare/sandbox's `Sandbox` class so the `Sandbox` DO binding +
// container in wrangler.jsonc resolve to a real exported class. Flue re-exports
// these named values from the generated Worker entry. This is the documented
// "Connecting a remote sandbox" pattern:
// https://flueframework.com/docs/ecosystem/deploy/cloudflare/#connecting-a-remote-sandbox
export { Sandbox } from "@cloudflare/sandbox";
33 changes: 27 additions & 6 deletions infra/flue-review/.flue/lib/github.ts
Original file line number Diff line number Diff line change
Expand Up @@ -223,10 +223,29 @@ function findingToComment(finding: ReviewResult["findings"][number]) {
return base;
}

/** GitHub rejects a COMMENT review with an empty body, so we must never send one. */
const FALLBACK_SUMMARY = "Automated review completed.";

/**
* Render findings as a markdown list. Used for the body-only fallback so that
* when GitHub can't anchor inline comments (a finding points outside the diff),
* the findings still reach the PR in the review body instead of being dropped.
*/
function renderFindingsMarkdown(findings: ReviewResult["findings"]): string {
if (findings.length === 0) return "";
const lines = findings.map((f) => {
const label = f.severity === "needs_fixing" ? "needs fixing" : "suggestion";
const range = f.startLine && f.startLine < f.line ? `${f.startLine}-${f.line}` : `${f.line}`;
return `- **[${label}]** \`${f.path}:${range}\`\n\n ${f.body.replace(/\n/g, "\n ")}`;
});
return `\n\n---\n\n### Findings\n\n${lines.join("\n\n")}`;
}

/**
* Post the review. Maps verdict -> review event and findings -> line comments.
* If GitHub rejects the request because a comment anchors to a line outside the
* diff, retry body-only so the summary still lands.
* If GitHub rejects the inline comments (a comment anchors outside the diff),
* retry body-only with the findings folded into the body so nothing is lost.
* The body is always non-empty -- GitHub 422s a blank COMMENT review body.
*/
export async function postReview(
token: string,
Expand All @@ -237,6 +256,7 @@ export async function postReview(
): Promise<void> {
const url = `${GITHUB_API}/repos/${owner}/${repo}/pulls/${prNumber}/reviews`;
const event = verdictToEvent(result.verdict);
const summary = result.summary.trim() || FALLBACK_SUMMARY;
const headers = {
authorization: `Bearer ${token}`,
accept: "application/vnd.github+json",
Expand All @@ -246,17 +266,18 @@ export async function postReview(
};

const withComments = {
body: result.summary,
body: summary,
event,
comments: result.findings.map(findingToComment),
};
let res = await fetch(url, { method: "POST", headers, body: JSON.stringify(withComments) });
if (res.ok) return;

// Most likely cause: a comment line isn't part of the diff. Fall back to a
// body-only review so the summary is never lost.
// Most likely cause: a comment anchors to a line outside the diff
// ("Path could not be resolved"). Fall back to a body-only review that
// carries the summary AND the findings inline, so the review still lands.
const firstError = await res.text();
const bodyOnly = { body: result.summary, event };
const bodyOnly = { body: summary + renderFindingsMarkdown(result.findings), event };
res = await fetch(url, { method: "POST", headers, body: JSON.stringify(bodyOnly) });
if (!res.ok) {
throw new Error(
Expand Down
141 changes: 95 additions & 46 deletions infra/flue-review/.flue/workflows/review.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,13 @@
// space.

import { getSandbox, type Sandbox } from "@cloudflare/sandbox";
import { createAgent, type FlueContext, type WorkflowRouteHandler } from "@flue/runtime";
import { cfSandboxToSessionEnv } from "@flue/runtime/cloudflare";
import {
createAgent,
type FlueContext,
type SandboxFactory,
type WorkflowRouteHandler,
} from "@flue/runtime";
import { cloudflareSandbox } from "@flue/runtime/cloudflare";

import { withCapacityRetry } from "../lib/capacity.js";
import {
Expand Down Expand Up @@ -43,30 +48,51 @@ interface ReviewPayload {
repo: string;
}

// Kimi via the Cloudflare Workers AI binding: the `cloudflare/` prefix is
// reserved by Flue's generated CF entry and routed through `env.AI`, so no
// model API key is needed anywhere.
const reviewAgent = createAgent<ReviewPayload, Env>(({ env }) => ({
model: "cloudflare/@cf/moonshotai/kimi-k2.7-code",
// The container's working dir is the checked-out PR. AGENTS.md at the repo
// root is auto-discovered into the agent's context from here.
/**
* Container sandbox factory, wrapping Flue's `cloudflareSandbox(...)` to drop the
* `AbortSignal` before each `exec()`.
*
* `getSandbox(...).exec(command, options)` is a Worker -> Durable Object RPC call
* (the @cloudflare/sandbox SDK forwards the whole options bag, including any
* `AbortSignal`, to the Sandbox DO). An `AbortSignal` created outside the target
* DO cannot cross that RPC boundary -- the call hangs forever and the command
* never runs. Flue's session/agent shell path *always* attaches a signal (via
* `createCallHandle`), so every container command -- our git setup AND the
* agent's own bash/grep/find tool calls -- would hang. Verified: an identical
* `exec` with the signal omitted (or `undefined`) returns in ~50ms; with a live
* signal it never returns. Holds across @cloudflare/sandbox 0.10.3/0.12.1 and
* both the `http` and `rpc` transports, so it is not a version/transport issue.
*
* We don't need cooperative exec cancellation: the model-call timeout in
* `withCapacityRetry` bounds a stalled review, and the container has its own
* lifecycle. So we strip the signal. (Upstream: Flue's `cloudflareSandbox`
* adapter should not forward a cross-DO `AbortSignal` to `exec`.)
*/
function reviewSandbox(stub: DurableObjectNamespace<Sandbox>, id: string): SandboxFactory {
const base = cloudflareSandbox(getSandbox(stub, id), { cwd: "/workspace" });
return {
createSessionEnv: async (options) => {
const sessionEnv = await base.createSessionEnv(options);
const exec = sessionEnv.exec.bind(sessionEnv);
return {
...sessionEnv,
exec: (command, execOptions) => exec(command, { ...execOptions, signal: undefined }),
};
Comment on lines +77 to +80

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[suggestion] The wrapper builds a fresh plain object via { ...sessionEnv, exec } and returns that. Object spread copies only own enumerable properties, so if Flue's SessionEnv is a class instance this drops every prototype method and every private (#) field from the object Flue's runtime (or the agent's tools) subsequently calls against. The exercised path (the agent's shell tools → exec) survives because exec is the one member you re-define, but any other session-env method reached now or by a future Flue version would resolve to undefined or throw on private-field access.

I couldn't confirm this breaks today (no node_modules to inspect the SessionEnv shape, and the e2e verification only exercises exec), so treat this as low-confidence. If you want to remove the latent footgun, shadow exec on the original object so its prototype, private fields, and identity are preserved (or, if the framework returns a frozen instance, wrap it in a Proxy that intercepts only exec):

Suggested change
return {
...sessionEnv,
exec: (command, execOptions) => exec(command, { ...execOptions, signal: undefined }),
};
sessionEnv.exec = (command, execOptions) => exec(command, { ...execOptions, signal: undefined });
return sessionEnv;

},
};
}

// GLM-5.2 (Z.ai's agentic-coding model) via the Cloudflare Workers AI binding:
// the `cloudflare/` prefix is reserved by Flue's generated CF entry and routed
// through `env.AI`, so no model API key is needed anywhere. Workers AI 429s are
// handled by `withCapacityRetry` below.
const reviewAgent = createAgent<ReviewPayload, Env>(({ id, env }) => ({
model: "cloudflare/@cf/zai-org/glm-5.2",
// Container-backed Linux sandbox (git/rg). `id` is the per-instance id, so
// each review run gets its own container. `cwd` is the checked-out PR root.
// oxlint-disable-next-line typescript/no-unsafe-type-assertion
sandbox: reviewSandbox(env.Sandbox as DurableObjectNamespace<Sandbox>, id),
cwd: "/workspace",
// Wire the @cloudflare/sandbox container into Flue via its CF adapter.
// (The deploy doc's bare `sandbox: getSandbox(...)` is unreleased sugar; the
// supported path is a SandboxFactory that calls cfSandboxToSessionEnv.) `id`
// here is the per-session id Flue supplies, so each review run gets its own
// container instance. As of @flue/runtime 0.11 the factory no longer receives
// a `cwd`; the session cwd matches the agent's `cwd` above.
sandbox: {
createSessionEnv: ({ id: sessionId }) =>
cfSandboxToSessionEnv(
// wrangler types the auto-wired DO as DurableObjectNamespace<undefined>;
// Flue re-exports the real Sandbox class into the bundle at build.
// oxlint-disable-next-line typescript/no-unsafe-type-assertion
getSandbox(env.Sandbox as DurableObjectNamespace<Sandbox>, sessionId),
"/workspace",
),
},
instructions: [
"You are EmDash's automated pull request reviewer.",
"You investigate one PR in depth and return structured, line-anchored findings plus an overall verdict.",
Expand Down Expand Up @@ -148,13 +174,20 @@ export async function run(ctx: FlueContext<ReviewPayload, Env>): Promise<ReviewR
}

try {
const harness = await init(reviewAgent);
const session = await harness.session();

// Set up the checkout inside the container: init in /workspace, fetch the
// base branch and the PR head, check out the PR head (detached). Full fetch
// (no shallow/depth) so `git diff origin/<base>...HEAD` can resolve a merge
// base. emdash is public, so anonymous https is sufficient.
// Check out the PR into the container BEFORE init(): init in /workspace,
// fetch the base branch and the PR head, check out the PR head (detached).
// Full fetch (no shallow/depth) so `git diff origin/<base>...HEAD` can
// resolve a merge base. emdash is public, so anonymous https is sufficient.
//
// Ordering matters: Flue's init-time workspace scan reads `<cwd>/AGENTS.md`
// and `<cwd>/.agents/skills/*` into the agent's context. The container must
// already hold the checkout when init() runs, or AGENTS.md is never
// discovered (the review skill checks the PR against AGENTS.md conventions).
//
// We run setup through the raw @cloudflare/sandbox stub (same container id
// as the agent's sandbox below) with a plain `exec` and no AbortSignal.
// Flue's shell path always attaches a signal, which hangs across the DO RPC
// boundary (see reviewSandbox); a direct signal-less exec does not.
const cloneUrl = `https://github.com/${payload.owner}/${payload.repo}.git`;
const setup = [
"set -euo pipefail",
Expand All @@ -166,21 +199,30 @@ export async function run(ctx: FlueContext<ReviewPayload, Env>): Promise<ReviewR
"git checkout -q -f refs/remotes/origin/pr",
].join("\n");

const setupResult = await session.shell(setup);
// oxlint-disable-next-line typescript/no-unsafe-type-assertion
const containerStub = getSandbox(env.Sandbox as DurableObjectNamespace<Sandbox>, ctx.id);
const setupResult = await containerStub.exec(setup);
if (setupResult.exitCode !== 0) {
throw new Error(
`git setup failed (exit ${setupResult.exitCode}): ${setupResult.stderr || setupResult.stdout}`,
);
}

// Workers AI returns 429 when kimi is over capacity, and under sustained
// load the binding can hold the request open indefinitely. Bound each
// attempt and retry genuine capacity errors so a transient spike doesn't
// silently hang the review forever (it used to: no result, no post, just
// the container's keep-alive alarm firing for minutes).
// Init now that /workspace holds the checkout (AGENTS.md is discovered
// here); the agent's tool calls run against this same container.
const harness = await init(reviewAgent);
const session = await harness.session();

// Workers AI returns 429 when the model is over capacity; retry genuine
// capacity errors with backoff. The per-attempt timeout is a backstop
// against a wedged inference call, not a budget for the review itself: a
// thorough agentic review (many tool calls + turns) legitimately runs many
// minutes, so 20m gives real headroom (Flue's submission durability caps
// the whole run at 1h). It is deliberately NOT 6m -- that killed real
// reviews mid-flight.
const { data } = await withCapacityRetry(
(signal) =>
session.skill(review, {
session.skill("review", {
args: {
prContext: buildPrContext(payload, priorReview),
owner: payload.owner,
Expand All @@ -195,7 +237,7 @@ export async function run(ctx: FlueContext<ReviewPayload, Env>): Promise<ReviewR
{
label: `review#${payload.prNumber}`,
attempts: 3,
perAttemptTimeoutMs: 6 * 60_000,
perAttemptTimeoutMs: 20 * 60_000,
onRetry: ({ attempt, delayMs, error }) =>
ctx.log.warn?.("[review] model over capacity, backing off", {
prNumber: payload.prNumber,
Expand All @@ -206,22 +248,29 @@ export async function run(ctx: FlueContext<ReviewPayload, Env>): Promise<ReviewR
},
);

// Telemetry (Workers Logs / dashboard): records what the model produced
// and whether we're about to post.
console.log("[review] result", {
prNumber: payload.prNumber,
hasToken: Boolean(token),
verdict: data.verdict,
summaryLen: data.summary.length,
findings: data.findings.length,
});

// Post from this trusted DO context (durable, not bound by the webhook's
// 30s waitUntil budget). In dev (no creds) we just log and return.
if (token) {
// Don't let a transient GitHub failure throw: that would discard the
// completed review AND trigger Flue's at-least-once workflow restart
// (a full re-review). Log and return the result instead.
try {
await postReview(token, payload.owner, payload.repo, payload.prNumber, data);
} catch (err) {
ctx.log.error?.("[review] postReview failed", {
error: String(err),
console.error("[review] postReview failed", {
error: err instanceof Error ? err.message : String(err),
prNumber: payload.prNumber,
});
}
} else {
ctx.log.info?.("[review] no GitHub App creds; skipping post", { prNumber: payload.prNumber });
console.log("[review] no GitHub App creds; skipping post", { prNumber: payload.prNumber });
}

return data;
Expand Down
13 changes: 10 additions & 3 deletions infra/flue-review/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
# Pinned to match the @cloudflare/sandbox version in package.json (0.10.3);
# Pinned to match the @cloudflare/sandbox version in package.json (0.12.1);
# the image and the SDK are versioned together. The base bundles the
# control-plane server, `node`, `git`, and `curl`, with a working dir at
# /workspace. The reviewer is git-only (no `gh`): it clones the public repo
# over https and diffs locally, so no extra tooling is required here.
FROM docker.io/cloudflare/sandbox:0.10.3
# over https and diffs locally.
FROM docker.io/cloudflare/sandbox:0.12.1

# ripgrep: the review agent reaches for `rg` to search the checkout. Without it,
# `rg` exits 127, which kills the sandbox shell session ("cannot execute further
# commands") and aborts the rest of the review. Install it so searches work.
RUN apt-get update \
&& apt-get install -y --no-install-recommends ripgrep \
&& rm -rf /var/lib/apt/lists/*
8 changes: 4 additions & 4 deletions infra/flue-review/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@
"typecheck": "tsc --noEmit"
},
"devDependencies": {
"@flue/cli": "^0.11.1",
"@flue/cli": "1.0.0-beta.1",
"typescript": "catalog:",
"wrangler": "catalog:"
},
"dependencies": {
"@cloudflare/sandbox": "^0.10.3",
"@flue/runtime": "^0.11.1",
"agents": "^0.13.3",
"@cloudflare/sandbox": "^0.12.1",
"@flue/runtime": "1.0.0-beta.1",
"agents": "^0.14.5",
"hono": "^4.12.23",
"valibot": "^1.4.1"
}
Expand Down
Loading
Loading