diff --git a/README.md b/README.md
index 27a027f..adfde01 100644
--- a/README.md
+++ b/README.md
@@ -73,7 +73,7 @@ Please read ~/.agent-protocols/closeout/SKILL.md
 Scope: current implementation task
 ```
 
-For Claude Code structured-review reviewer passes, use the bundled runner:
+For structured-review reviewer passes, use the bundled runner:
 
 ```bash
 python structured-review/scripts/claude_structured_review.py \
@@ -89,6 +89,11 @@ python structured-review/scripts/claude_structured_review.py \
 The runner loads the shared skill from this repo and target-repo overlays from
 the explicit `--worktree`.
 
+The runner drives either Claude Code or Codex as the reviewer.
+`--reviewer-backend` defaults to `auto`, which picks the cross-vendor
+reviewer for the detected driver (Claude Code driver -> Codex reviewer, Codex
+driver -> Claude reviewer); pass `--reviewer-backend claude|codex` to pin it.
+
 `--protocol-dir` is optional and mainly for tests or intentional alternate
 checkouts; normal usage relies on the script's own `structured-review`
 directory.
diff --git a/closeout/SKILL.md b/closeout/SKILL.md
index 0d1f0f5..2e1f9db 100644
--- a/closeout/SKILL.md
+++ b/closeout/SKILL.md
@@ -44,10 +44,9 @@ This trigger list is the canonical Closeout Review trigger list. Maintain it
 through structured-review skill self-evolution when protocol rules change.
 
 When Closeout Review is triggered, use the `structured-review` protocol with
-`Type: closeout-review`. For repo-backed Closeout Review, use the bundled
-Claude runner when available. Default to `print-review` for durable closeout
-reports or reference artifacts; use `write-commit-to-plan` only when the driver
-explicitly provides a thread file with `## Review Threads`.
+`Type: closeout-review`. The structured-review skill's default runner rule and
+runner-mode guidance decide how the reviewer pass runs; this protocol does not
+duplicate that policy.
 
 Closeout Review checks whether the closeout evidence or report is accurate and
 complete. It returns to closeout with `ready to resume closeout` or named
diff --git a/docs/CURRENT.md b/docs/CURRENT.md
index 606d070..8aab697 100644
--- a/docs/CURRENT.md
+++ b/docs/CURRENT.md
@@ -1,7 +1,7 @@
 # Current Protocol Map
 
 Status: active source-of-truth map
-Last updated: 2026-06-06
+Last updated: 2026-06-10
 
 This file points humans and agents to the active protocol entrypoints. It does
 not replace root `AGENTS.md`; agents should start there, then use this map to
@@ -19,8 +19,8 @@ avoid treating historical plans as current instructions.
 
 - `structured-review/SKILL.md` - structured review for plans,
   implementations, optional closeout evidence reviews, and review-response
-  passes. Repo-backed review gates default to the bundled Claude runner when
-  available.
+  passes. Repo-backed review gates default to the bundled runner when
+  available, with cross-vendor Claude and Codex reviewer backends.
 - `closeout/SKILL.md` - final hygiene checklist and delivery handoff after
   implementation, documentation, review, or phase work.
 - `planning/SKILL.md` - discussion-to-plan workflow before implementation.
diff --git a/docs/agent_plans/2026-06-10_codex_reviewer_backend_plan.md b/docs/agent_plans/2026-06-10_codex_reviewer_backend_plan.md
new file mode 100644
index 0000000..7e36826
--- /dev/null
+++ b/docs/agent_plans/2026-06-10_codex_reviewer_backend_plan.md
@@ -0,0 +1,636 @@
+# Codex Reviewer Backend Plan
+
+Status: historical - implemented via PR #15 (2026-06-10)
+Branch: `feature/codex-reviewer-backend`
+Worktree: `agent-protocols-codex-reviewer`
+
+## Context
+
+The structured-review runner currently invokes only Claude Code (`claude -p`)
+as the reviewer. When the driver is also Claude, the reviewer shares the
+driver's model family and its blind spots. The human wants cross-vendor
+review: when the driver is Claude Code, the reviewer should default to Codex;
+when the driver is Codex, the reviewer should default to Claude Code.
+
+A spike (codex-cli 0.137.0, scratch repo) verified the key unknowns:
+
+- `codex exec -` reads the prompt from stdin, like the current runner feeds
+  `claude -p`.
+- `--json` emits parseable JSONL events (`thread.started`, `item.started`,
+  `item.completed` with `agent_message` / `command_execution` / `file_change`
+  items, `turn.completed` with token usage). `--output-last-message FILE`
+  writes the final agent message to a file.
+- Under `--sandbox workspace-write`, Codex edits files but cannot commit: the
+  sandbox blocks `.git/index.lock`. Adding `--add-dir <repo git dir>` was
+  empirically shown to allow a clean `git commit` with the expected
+  `structured-review:` message.
+- `codex exec` exits 0 even when an inner command failed, so the runner's
+  existing post-hoc write-mode verification stays the real gate. This matches
+  the existing Claude path, which also does not trust the agent's exit
+  behavior beyond returncode != 0.
+- Driver detection markers exist on both sides: Claude Code sets `CLAUDECODE`
+  (and `CLAUDE_CODE_*`) in child environments; Codex sets `CODEX_THREAD_ID`
+  and `CODEX_SANDBOX` in its child environments.
+
+Decisions already converged with the human:
+
+1. Single runner script with a backend switch; keep the existing
+   `structured-review/scripts/claude_structured_review.py` path because
+   downstream repos reference it via submodule.
+2. In write mode the Codex reviewer commits its own thread-file change
+   (symmetric with Claude); `verify_write_mode` stays the single shared gate.
+3. Review threads must record reviewer identity (backend) in pass headings.
+4. Default reviewer backend is cross-vendor: Claude Code driver -> codex
+   reviewer; Codex driver -> claude reviewer.
+5. Codex defaults are pinned in the runner: model `gpt-5.5`, reasoning effort
+   `xhigh`, overridable by flags.
+6. Protocol text becomes backend-neutral where it currently says the runner is
+   Claude-only.
+
+## Problem
+
+`claude_structured_review.py` hard-codes the Claude CLI: argv construction,
+stream parsing, version probe, and the `--claude-bin` flag. Everything else
+(prompt building, overlay loading, git snapshots, write/print verification,
+redaction, run logs, metadata) is backend-agnostic. The protocol text
+(`structured-review/SKILL.md`, `README.md`, `docs/CURRENT.md`) also assumes a
+Claude reviewer.
+
+## Goal
+
+Let the bundled runner drive either Claude Code or Codex as the reviewer, with
+a cross-vendor default keyed off the detected driver, without changing the
+write/print verification contract.
+
+## Non-Goals
+
+- Do not rename or move the runner script.
+- Do not change `verify_write_mode` / `verify_print_mode` semantics.
+- Do not add multi-reviewer panels (both backends on one artifact) in this
+  slice.
+- Do not build a generic N-backend plugin system; exactly two backends.
+- Do not touch Scout, closeout, planning, or backlog protocols beyond the
+  structured-review text updates below.
+- Do not weaken any SAFETY rule.
+
+## Proposed Design
+
+1. Backend abstraction in `claude_structured_review.py` (single file, stdlib
+   only). A small registry keyed by backend name providing:
+   - `argv(config, logs)`: full reviewer CLI argv;
+   - `parse_line(line) -> StreamLineResult`: per-backend JSONL parsing;
+   - `finalize(state, logs) -> review_text`: per-backend final text capture.
+     Claude keeps the current delta-preference / last-message-wins behavior
+     byte-for-byte; Codex accumulates `agent_message` items during the run and
+     then prefers the `--output-last-message` file when present and non-empty;
+   - `version(bin)`: version probe (`claude --version` / `codex --version`);
+   - driver env markers for auto-detection.
+
+   The existing module-level helpers keep their names: `claude_argv`,
+   `process_stream_line`, and `claude_version` become the Claude backend's
+   implementations, and the shared invocation loop keeps the `run_claude`
+   name, so existing tests that reference or patch them stay valid. Existing
+   tests are touched only where this plan requires it: pinning
+   `--reviewer-backend claude` (or a marker-free env) for determinism, and
+   the `reviewer_version` metadata rename below.
+
+2. Codex backend specifics:
+   - argv (write mode):
+     `codex exec - --json --sandbox workspace-write --add-dir <git-dir>
+     [--add-dir <git-common-dir>] -m <model>
+     -c model_reasoning_effort=<effort> --output-last-message
+     <run-log-dir>/last-message.txt`, run with `cwd=worktree`, prompt on
+     stdin. Resolve git dirs via `git rev-parse --git-dir --git-common-dir`
+     and pass both when they differ (linked worktrees).
+   - argv (print mode): same minus `--add-dir`, with `--sandbox read-only`,
+     keeping `--output-last-message`. The file is written by the codex
+     harness outside the sandbox, not by a sandboxed child command: a spike
+     probe under `--sandbox read-only` wrote the `-o` file successfully
+     (codex-cli 0.137.0). For the same reason a caller-supplied
+     `--run-log-dir` outside the worktree needs no extra `--add-dir`.
+   - Review text: accumulate `agent_message` items from `item.completed`
+     events; `finalize` prefers the `--output-last-message` file as the
+     authoritative final text when present and non-empty, with the
+     accumulated messages as fallback. Malformed JSONL lines keep using the
+     existing malformed-line counter.
+   - Stderr progress: print the existing heartbeat/event lines so long runs
+     stay observable; record `turn.completed` usage in metadata as
+     best-effort observability, not an acceptance contract.
+
+3. Backend selection:
+   - New flag `--reviewer-backend {auto,claude,codex}`, default `auto`.
+   - `auto`: `CLAUDECODE` set and no Codex marker -> `codex`;
+     `CODEX_THREAD_ID`/`CODEX_SANDBOX` set and no `CLAUDECODE` -> `claude`;
+     both kinds of markers set -> hard error instructing an explicit
+     `--reviewer-backend`; neither -> `claude` (today's behavior for humans
+     invoking from a plain terminal).
+   - When `auto` resolves through a detected driver marker, the runner
+     preflights the selected backend binary (`shutil.which`) and fails with
+     an actionable error naming `--reviewer-backend`, mirroring the
+     both-markers error. The neither-marker default deliberately keeps
+     today's no-preflight behavior so plain-terminal and CI invocations are
+     unchanged.
+   - `metadata.json` records the selected `backend`, the active backend's
+     effective model and effort values in the existing `model`/`effort`
+     fields, and a backend-neutral `reviewer_version` (renaming
+     `claude_version`; the existing test asserting that key is updated).
+     Codex token usage is recorded when seen, best-effort.
+
+4. Flags and defaults:
+   - Existing `--model` (default `opus`), `--effort` (default `xhigh`), and
+     `--claude-bin` stay Claude-only, fully backward compatible.
+   - New `--codex-bin` (default `codex`), `--codex-model` (default
+     `gpt-5.5`), `--codex-effort` (default `xhigh`), as constants near the
+     existing `DEFAULT_MODEL`. Rejected alternative: reusing `--model` for
+     both backends, because existing callers pin `--model opus` and would
+     silently misconfigure a Codex run under `auto`.
+
+5. Reviewer identity in threads:
+   - The prompt instructions gain one line for both backends: reviewer pass
+     headings must name the reviewer backend, for example
+     `### Reviewer pass 1 (impl, codex reviewer)`.
+   - Commit-message contract is unchanged (`structured-review:` prefix).
+
+6. Protocol and doc text:
+   - `structured-review/SKILL.md`: rename the `Default Claude Runner Rule`
+     section to a backend-neutral default-runner rule; state the cross-vendor
+     default and the `auto` detection; define availability per configured
+     backend binary; if the cross-vendor backend is unavailable, the runner
+     fails loudly and substituting the same-vendor reviewer is an explicit
+     human/driver decision via `--reviewer-backend`, not a silent fallback.
+     Update the driver-summary wording (`Claude reviewer suggestions` ->
+     reviewer suggestions with backend identity).
+   - `README.md` usage section: add a `--reviewer-backend` example.
+   - `docs/CURRENT.md`: structured-review entry mentions both reviewer
+     backends.
+
+7. Tests (`structured-review/tests/test_claude_structured_review.py`):
+   - Backend selection: explicit flags; `auto` under each env-marker case,
+     including the both-markers hard error and the marker-resolved
+     preflight-unavailable error (env and `shutil.which` patched per test).
+     Legacy tests pin `--reviewer-backend claude` or a marker-free env so
+     they stay deterministic regardless of which agent runs them.
+   - Codex argv construction: write vs print sandbox flags, `--add-dir`
+     resolution for linked worktrees, pinned model/effort and overrides.
+   - Codex stream parsing: agent_message extraction, last-message-file
+     preference, malformed-line counting.
+   - End-to-end fake `codex` bin (mirroring the existing fake `claude` test):
+     write mode appends threads and commits, passes `verify_write_mode`;
+     print mode prints and leaves the worktree untouched.
+   - Prompt contains the reviewer-identity instruction for both backends.
+   - Metadata records backend name and version string.
+
+## Acceptance Criteria
+
+- `--reviewer-backend codex` drives a fake `codex` bin through both modes and
+  all existing verification rules pass unchanged.
+- `auto` resolves: Claude driver env -> codex, Codex driver env -> claude,
+  both -> hard error naming the flag, neither -> claude.
+- A marker-resolved `auto` selection whose backend binary is missing fails
+  with an error naming `--reviewer-backend`; the neither-marker default and
+  explicit selections keep today's failure behavior.
+- Codex defaults are `gpt-5.5` / `xhigh`, overridable via `--codex-model` /
+  `--codex-effort`; Claude defaults and flags are byte-for-byte unchanged.
+- Write-mode Codex argv includes the git-dir `--add-dir` entries; print-mode
+  argv uses `--sandbox read-only` and no `--add-dir`.
+- The reviewer prompt instructs backend-named pass headings for both
+  backends.
+- `metadata.json` records the selected backend, the active backend's
+  effective model/effort, and a backend-neutral `reviewer_version`. Token
+  usage recording is best-effort and not part of acceptance.
+- `structured-review/SKILL.md`, `README.md`, and `docs/CURRENT.md` are
+  backend-neutral and state the cross-vendor default and no-silent-fallback
+  rule.
+- The full existing test suite keeps passing.
+
+## Validation Plan
+
+Run locally in the feature worktree:
+
+```bash
+python -m unittest discover -s structured-review/tests
+python -m unittest discover -s tests
+python -m unittest discover -s scout/tests
+python -m compileall -q structured-review scout scripts tests
+git diff --check
+```
+
+CI runs the same set on the PR. No real `codex` or `claude` binary is invoked
+by tests; both are faked, as today.
+
+## Review Gates
+
+1. Plan review gate: bundled runner, `write-commit-to-plan`, type
+   `impl-plan`, this plan as artifact and thread file. The Codex backend does
+   not exist yet, so this gate necessarily runs the Claude reviewer.
+2. Implementation review gate: type `impl` against this accepted plan. If the
+   implementation is green, prefer dogfooding the new Codex backend for this
+   gate (Claude driver -> codex reviewer) and record the backend identity in
+   the pass heading.
+3. Closeout per `closeout/SKILL.md` after review gates resolve, with PR and
+   human merge.
+
+## Risks
+
+- `codex exec` exits 0 despite inner failures; mitigated by the unchanged
+  post-hoc verification (exactly one commit, thread-file-only, append-only).
+- `--add-dir` grants the reviewer write access to the git dir; tampering is
+  bounded by the same post-hoc verification plus run logs, and is no broader
+  than the Claude path's `--permission-mode auto`.
+- Env auto-detection can see both markers in nested agent chains; handled by
+  the hard error plus explicit flag, never a silent guess.
+- Codex's sandbox disables network (`CODEX_SANDBOX_NETWORK_DISABLED=1`), so a
+  Codex reviewer cannot fetch external references. Reviews are repo-local by
+  design; this parity difference is recorded here and in run metadata rather
+  than in SKILL text, which stays lean.
+- Pinned `gpt-5.5` will go stale; it is a named constant with an override
+  flag, and the staleness signal is Codex rejecting the model name.
+- `--json` is marked experimental upstream; tolerant parsing, the
+  malformed-line counter, and the last-message file keep text capture robust.
+- Fake-bin tests cannot exercise real Codex sandbox semantics (read-only
+  blocking, `--add-dir` scope, harness-side `-o` writes); those claims rest
+  on the spike evidence at codex-cli 0.137.0 and are re-validated live by the
+  dogfooded implementation-review gate.
+
+## Scope Addendum (2026-06-10, human-approved)
+
+After the implementation review passed, the human approved two additions to
+this PR during closeout discussion:
+
+1. Closeout protocol delegation. `closeout/SKILL.md` carried a duplicated,
+   now-stale copy of the runner policy ("use the bundled Claude runner",
+   mode defaults). The duplicated mechanics are removed; closeout keeps its
+   Closeout Review trigger list and defers runner/backend/mode policy to the
+   structured-review skill. This was a non-goal of the original plan; the
+   human explicitly widened scope to fix the drift at its root in this PR.
+
+2. Runner-owned write mode. Field experience (roughly half of
+   write-commit-to-plan runs historically, and one of four runner passes in
+   this PR's own session) shows reviewer agents frequently violate the
+   append-only thread-file contract, typically by deleting placeholder
+   lines, invalidating otherwise-good reviews and forcing manual driver
+   recovery. The write-mode mechanics change from reviewer-writes-and-commits
+   to runner-writes-and-commits:
+   - The reviewer always runs read-only and returns the review text; in
+     write mode the runner itself appends that text verbatim under the
+     thread file's `## Review Threads` section and creates the
+     `structured-review:` commit. Append-only now holds by construction.
+   - Before committing, the runner requires the reviewer to have left the
+     worktree untouched, and checks the text is review-like, free of local
+     absolute paths, and free of secret-like material. `verify_write_mode`
+     is kept unchanged as a defense-in-depth self-check on the runner's own
+     commit.
+   - The Codex backend runs `--sandbox read-only` in both modes; the
+     write-mode `--add-dir` git-dir grant is removed as no longer needed.
+     The Claude backend's argv is unchanged.
+   - The reviewer prompt no longer instructs committing; it instructs
+     returning the complete review threads as markdown and explicitly
+     forbids writing files.
+   - The human's requirement driving this: the complete reviewer text must
+     land in the durable thread verbatim with zero driver transcription
+     burden, which the by-construction append guarantees.
+
+Updated acceptance for the addendum (as amended after reviewer pass 3):
+
+- Write mode produces exactly one runner-created commit whose added block is
+  the reviewer's returned text byte-for-byte inside the `## Review Threads`
+  section, for both backends, including when other `##` sections follow the
+  threads section. The runner adds only the separating blank line before the
+  block and a terminating newline when the returned text lacks one; an
+  exact-output test pins this.
+- A reviewer that modifies the worktree or HEAD in write mode fails the run
+  before any append happens.
+- Non-review-like, local-path-bearing, or secret-bearing reviewer output
+  fails before any commit, with the text preserved in run logs.
+- Codex argv contains `--sandbox workspace-write` and no `--add-dir` in both
+  modes. Pass 3 ran under `read-only` and was environment-blocked from
+  running the unittest suites (no writable temp dir), which would
+  permanently remove reviewer-rerun validation from Codex reviews;
+  `workspace-write` without any `.git` grant restores test execution (a
+  sandbox probe confirmed writable temp dirs) while commits remain
+  impossible and stray worktree writes still fail the run via the
+  untouched-worktree check.
+- `closeout/SKILL.md` keeps the trigger list and no longer states runner,
+  backend, or mode policy.
+
+## Review Threads
+
+### Reviewer pass 1 (impl-plan, claude reviewer)
+
+Original concern: is this plan concrete enough to implement a Codex reviewer
+backend in the single-file runner without guessing, and does it keep the
+write/print verification contract sound?
+
+**Blocking issues**
+
+None. Goal, non-goals, spike-backed assumptions, acceptance, and validation are
+specific and executable; scope is right-sized to exactly two backends. The
+items below are non-blocking refinements, not gaps that block starting.
+
+**Non-blocking issues**
+
+1. Backend-abstraction seam for review-text capture is under-specified against
+   the current run loop. The enumerated registry interface (Proposed Design 1:
+   `argv`, `parse_line`, `version`, env markers) does not name where Codex's
+   documented text-capture behavior lives, and the existing loop is more
+   entangled with backend specifics than that interface implies:
+   - `run_claude` assembles `review_text` inline
+     (`structured-review/scripts/claude_structured_review.py:698`) and
+     *overwrites* `message_text_candidate` per line (`:684-685`, last-wins).
+     Codex must *accumulate* multiple `agent_message` items and then *prefer the
+     `--output-last-message` file* — a post-run step `parse_line` cannot do (it
+     sees one line and cannot read a file). Name this seam (for example a
+     backend `finalize(state, logs) -> review_text` hook) so the Claude path's
+     delta-preference / last-wins behavior stays byte-for-byte while Codex gets
+     accumulate-then-file.
+   - Existing unit tests call `csr.claude_argv(...)`,
+     `csr.process_stream_line(...)`, and `csr.claude_version(...)` by name
+     (`structured-review/tests/test_claude_structured_review.py:237,290,298,301,308,313,319`).
+     Acceptance says "full existing test suite keeps passing" and "Claude ...
+     byte-for-byte unchanged." A registry refactor that renames these breaks
+     those tests. State that these module-level helpers are preserved (wrapped
+     as the Claude backend) or that the affected tests are updated, so the
+     acceptance is not self-contradictory.
+
+2. Print-mode `--output-last-message` under `--sandbox read-only` is unverified.
+   Proposed Design 2 lists `--output-last-message` in the write-mode
+   (workspace-write) argv and says print mode is "same minus `--add-dir`, with
+   `--sandbox read-only`." If the sandboxed child writes that file, `read-only`
+   blocks the write; if the codex harness writes it post-sandbox, it is fine.
+   The spike covered workspace-write only. Pin whether print mode keeps
+   `--output-last-message`, and if so confirm it works under `read-only`, or
+   document the `agent_message` accumulation as the print-mode fallback. As
+   written this ambiguity forces an implementer to guess.
+
+3. A custom `--run-log-dir` can fall outside the Codex sandbox grant. The
+   default run-log dir lives under the git-dir (`:542-546`), which write-mode
+   `--add-dir <git-dir>` covers. But a caller-supplied `--run-log-dir` outside
+   the worktree and git-dirs is in neither `workspace-write` (cwd) nor
+   `--add-dir`, so the real sandbox would block
+   `--output-last-message <run-log-dir>/last-message.txt`. Existing tests pass
+   `--run-log-dir` to a temp path (`tests:558`), and a fake `codex` is not
+   sandboxed, so the fake-bin e2e cannot catch this. Add the run-log dir as an
+   `--add-dir` entry in write mode (or document `--output-last-message` as
+   best-effort with the `agent_message` fallback), and call out that the faked
+   bin masks real-sandbox-only failures.
+
+4. Metadata semantics under Codex. `write_metadata` records `model`, `effort`,
+   and `claude_version` (`:588-617`). On a Codex run, `--model`/`--effort`
+   default to `opus`/`xhigh` and are unused; recording them verbatim mislabels a
+   `gpt-5.5` run, and the `claude_version` key name is backend-specific.
+   Acceptance says metadata records "the backend and its version" but is silent
+   on these existing fields. Specify that metadata reflects the active backend's
+   model/effort/version (namespaced or backend-keyed) and whether the
+   `claude_version` key is preserved for backward compatibility or renamed.
+
+5. An `auto`-selected-but-unavailable backend should fail with an actionable
+   message. The no-silent-fallback property holds by construction: `auto`
+   resolves to exactly one backend from env markers and the design has no
+   substitution path, so the SKILL rule is enforceable as written. But Proposed
+   Design 3 crafts a helpful hard error only for the both-markers case; an
+   `auto`->codex resolution when `codex` is absent would surface as a raw
+   `FileNotFoundError` ("fails loudly" but not clearly, and without naming the
+   flag). Add a preflight availability check that mirrors the both-markers error
+   and instructs an explicit `--reviewer-backend`, plus a test — the test plan
+   currently omits this case.
+
+**Overall judgment**
+
+Ready for implementation. The plan is detailed, correctly scoped, and its
+acceptance and validation are executable by the next agent. The write contract
+is sound: `--add-dir <git-dir>` brings Codex to parity with the existing Claude
+`--permission-mode auto` path, which already has full git-dir write access and
+commits today, and `verify_write_mode` (`:443-478`) remains the single shared
+gate bounding the HEAD movement, single-commit, thread-file-only, append-only,
+no-local-path, and no-secret outcomes — so the `codex exec` exit-0-on-failure
+behavior does not weaken the gate. Keeping `--model`/`--effort` Claude-only
+while adding `--codex-*` is the right backward-compatibility call and preserves
+existing callers that pin `--model opus`. The cross-vendor `auto` rule
+(both-markers hard error, neither->claude) is coherent and reads the driver's
+own inherited env, not the reviewer child's. The non-blocking items above are
+clarity and edge-case tightenings.
+
+**Residual risks / validation gaps**
+
+- Git-dir side effects outside HEAD (refs, config, hooks, stash) are not
+  verified by `verify_write_mode` for either backend; unchanged from today,
+  parity preserved, acceptable.
+- The fake-`codex` e2e cannot exercise real sandbox semantics (read-only
+  writes, `--add-dir` scope, `--output-last-message` placement); those rest on
+  the spike's empirical claims at codex-cli 0.137.0 plus the print-mode
+  confirmation requested in item 2.
+- Token-usage recording (`turn.completed`) appears in the design and risks but
+  not in the acceptance criteria or the test list; if it is a contract, add
+  coverage, otherwise mark it best-effort.
+
+### Driver response 1
+
+All five findings accepted; the plan body is updated accordingly.
+
+- N1: the registry now names a `finalize(state, logs)` hook; Claude keeps its
+  delta-preference/last-wins capture byte-for-byte, Codex accumulates then
+  prefers the `-o` file. Module-level helper names (`claude_argv`,
+  `process_stream_line`, `claude_version`, `run_claude`) are preserved as the
+  Claude backend/shared loop, so name-coupled tests stay valid; tests change
+  only for backend pinning and the `reviewer_version` rename.
+- N2: resolved empirically rather than by fallback wording. An existing spike
+  probe ran `codex exec --sandbox read-only` with `--output-last-message`
+  targeting a path the sandbox could not have written, and the file was
+  written: the codex harness writes `-o` outside the sandbox. Print mode
+  keeps the flag, with accumulated `agent_message` text as fallback.
+  Provenance: driver-reported spike at codex-cli 0.137.0.
+- N3: the same harness-side `-o` evidence dissolves the custom
+  `--run-log-dir` concern; no extra `--add-dir` is needed. The fake-bin
+  masking caveat the reviewer raised is now recorded in Risks, with the
+  dogfooded impl-review gate as the live re-validation.
+- N4: metadata semantics specified: `backend`, active model/effort in the
+  existing fields, backend-neutral `reviewer_version` (renamed from
+  `claude_version`, updating the one asserting test), usage best-effort.
+- N5: preflight added for marker-resolved `auto` selections with an error
+  naming `--reviewer-backend`. Scope refinement: the neither-marker default
+  and explicit selections keep today's no-preflight failure behavior, so
+  plain-terminal and CI invocations do not change. Both-markers error stays
+  as planned. A preflight test is added to the test plan.
+
+Proceeding to implementation without a re-review pass: the reviewer found no
+blocking issues and judged the plan ready for implementation; every change
+above adopts the reviewer's own recommendations without altering accepted
+scope. The human-mandated gate chain continues with the implementation
+review dogfooding the Codex backend.
+
+### Reviewer pass 2 (impl, codex reviewer)
+
+Original concern: does commit `fba6a45` implement the accepted Codex reviewer
+backend plan, including the driver response 1 refinements, without weakening
+the structured-review runner contract?
+
+**Blocking issues**
+
+None.
+
+**Non-blocking issues**
+
+None.
+
+**Traceability**
+
+- Done: the backend seam is finalized with preserved helper names.
+  `structured-review/scripts/claude_structured_review.py` keeps
+  `claude_argv`, `process_stream_line`, `claude_version`, and `run_claude`,
+  while adding `ReviewerBackend`, per-backend `finalize`, and Codex-specific
+  stream handling. The Claude path still prefers streamed text deltas and then
+  the last assistant message.
+- Done: cross-vendor `auto` resolution matches the accepted rules.
+  `resolve_reviewer_backend` maps Claude markers to `codex`, Codex markers to
+  `claude`, neither marker to `claude`, and both marker kinds to a hard error
+  naming `--reviewer-backend`. Marker-resolved missing binaries preflight with
+  `shutil.which` and name the same flag.
+- Done: Codex argv shape matches the plan for plain and linked worktrees.
+  Write mode uses `codex exec - --json --sandbox workspace-write` plus
+  `--add-dir` entries from `git rev-parse --git-dir --git-common-dir`; print
+  mode uses `--sandbox read-only` and no `--add-dir`. The tests cover normal
+  and linked worktree git-dir behavior.
+- Done: Codex text capture prefers `last-message.txt` and falls back to
+  accumulated `agent_message` items. The malformed-line counter and
+  best-effort `turn.completed` usage capture are covered by the new tests.
+- Done: metadata is backend-aware and uses `reviewer_version`. Metadata now
+  records `backend`, active backend model and effort, `reviewer_version`, and
+  optional token usage; the old `claude_version` test is updated
+  deterministically.
+- Done: reviewer identity is injected into the prompt, including the required
+  backend-named pass heading example for both backends.
+- Done: `structured-review/SKILL.md`, `README.md`, and `docs/CURRENT.md` are
+  backend-neutral and state the cross-vendor default and no-silent-fallback
+  rule.
+- Done: Claude path behavior is preserved for the accepted surface. Legacy
+  helper callers are pinned to `--reviewer-backend claude`, `--claude-bin`
+  behavior remains path-based, and the existing verification functions keep the
+  same write/print gate semantics.
+
+**Overall judgment**
+
+Ready for closeout. The implementation matches the accepted plan and the
+driver response 1 refinements, and I found no blocking or non-blocking issues.
+
+**Residual risks / validation gaps**
+
+The local suite uses fake Claude and Codex binaries, so real Codex CLI sandbox
+semantics for `--output-last-message` and `--add-dir` remain the spike-backed
+risk already documented in the plan. I did not check PR CI status in this
+review pass.
+
+Validation rerun by reviewer:
+
+```bash
+python -m unittest discover -s structured-review/tests
+python -m unittest discover -s tests
+python -m unittest discover -s scout/tests
+python -m compileall -q structured-review scout scripts tests
+git diff --check
+```
+
+All five commands passed locally.
+
+### Reviewer pass 3 (impl, codex reviewer)
+
+Original concern: re-review commit `083f42c` against the human-approved Scope Addendum for runner-owned write mode and closeout runner-policy delegation.
+
+**Blocking issues**
+
+1. `append_and_commit_review` does not preserve the reviewer output verbatim. The addendum requires the runner-created commit’s added lines to be the reviewer’s output verbatim inside `## Review Threads` (`docs/agent_plans/2026-06-10_codex_reviewer_backend_plan.md:295`). The implementation writes `review_text.strip()` and normalizes surrounding newlines (`structured-review/scripts/claude_structured_review.py:494`-`498`), so leading/trailing reviewer text is changed before commit. The current tests check insertion before a later `##` section, but not exact equality of the committed block to the returned reviewer text (`structured-review/tests/test_claude_structured_review.py:859`). This weakens the addendum’s central “zero transcription / verbatim append” guarantee.
+
+**Non-blocking issues**
+
+None.
+
+**Traceability**
+
+- Done: closeout now keeps the Closeout Review trigger list and delegates runner/backend/mode policy to structured-review (`closeout/SKILL.md:46`).
+- Partial: runner-owned write mode appends inside `## Review Threads`, including before later `##` sections, but not verbatim because of whitespace normalization.
+- Done: write mode rejects reviewer HEAD/worktree changes before append (`structured-review/scripts/claude_structured_review.py:465`).
+- Done: non-review-like, local-path, and secret-like output is rejected before append by `verify_reviewer_output`.
+- Done: Codex argv is `--sandbox read-only` with no `--add-dir` in both modes (`structured-review/scripts/claude_structured_review.py:624`).
+- Done: `verify_write_mode` remains as the post-commit self-check.
+- Done: the write-mode prompt tells reviewers not to write files, stage, or commit (`structured-review/scripts/claude_structured_review.py:298`).
+- Done: structured-review and closeout text are consistent on runner-policy delegation.
+
+**Overall judgment**
+
+Not ready for closeout until the verbatim append contract is fixed and covered by an exact-output test.
+
+**Residual risks / validation gaps**
+
+Reviewer validation was environment-blocked for the unittest commands: all three unittest suites failed before assertions because the read-only sandbox provided no writable temporary directory. `python -m compileall -q structured-review scout scripts tests` and `git diff --check` returned 0.
+
+Commands attempted:
+
+```bash
+python -m unittest discover -s structured-review/tests
+python -m unittest discover -s tests
+python -m unittest discover -s scout/tests
+python -m compileall -q structured-review scout scripts tests
+git diff --check
+```
+
+### Driver response 2
+
+Pass 3 findings, classified:
+
+- B1 accepted. `append_and_commit_review` no longer strips the reviewer
+  text: the returned text is appended byte-for-byte, with only a separating
+  blank line before it and a terminating newline added when the text lacks
+  one. The addendum acceptance wording is amended to state that exact
+  contract, and a new exact-output test asserts the committed file contains
+  the returned text unchanged.
+- The environment-blocked validation in pass 3's residuals is treated as a
+  finding in its own right: `--sandbox read-only` would permanently remove
+  reviewer-rerun validation from Codex reviews because unittest needs
+  writable temp dirs. Codex now runs `--sandbox workspace-write` with no
+  `.git` grant in both modes: a sandbox probe confirmed temp dirs are
+  writable, commits remain impossible (the original spike showed the sandbox
+  blocks `.git`), and any stray worktree write still fails the run through
+  `verify_reviewer_output`'s untouched-worktree check before any append.
+  The addendum acceptance is amended accordingly.
+- Validation after the fixes: full local suite green (structured-review,
+  tests, scout, backlog checker, compileall, `git diff --check`),
+  driver-reported; pass 4 should be able to rerun the suites under the new
+  sandbox.
+
+### Reviewer pass 4 (impl, codex reviewer)
+
+Original concern: implementation re-review of the pass-3 fixes in Driver response 2: verbatim runner append with exact-output coverage, and Codex `workspace-write` without `.git` grant so reviewer validation can run.
+
+**Blocking issues**
+
+None.
+
+**Non-blocking issues**
+
+None.
+
+**Traceability**
+
+- Done: pass-3 B1 is resolved. `append_and_commit_review` now appends `review_text` without stripping it, adding only the separator before the block and a final newline when missing.
+- Done: exact-output coverage was added in `test_append_preserves_review_text_byte_for_byte`, including indentation and trailing spaces inside the returned review text.
+- Done: Codex argv uses `--sandbox workspace-write` in both write and print modes and does not include `--add-dir`.
+- Done: the runner still verifies reviewer output before appending and runs `verify_write_mode` after the runner-created commit.
+- Done: earlier accepted backend-selection, prompt, metadata, fake Codex, and protocol-text behavior did not regress in the inspected artifacts.
+
+**Overall judgment**
+
+Ready for closeout. The amended addendum acceptance is satisfied, and there are no blocking issues.
+
+**Residual risks / validation gaps**
+
+Reviewer-rerun validation passed locally. I ran:
+
+```bash
+python -m unittest discover -s structured-review/tests
+python -m unittest discover -s tests
+python -m unittest discover -s scout/tests
+python -m compileall -q structured-review scout scripts tests
+git diff --check
+python scripts/check_backlog.py --root .
+```
+
+All passed. The worktree remained clean after validation.
diff --git a/docs/agent_plans/README.md b/docs/agent_plans/README.md
index 3eaeaf6..aabfa4a 100644
--- a/docs/agent_plans/README.md
+++ b/docs/agent_plans/README.md
@@ -28,3 +28,5 @@ responses, and closeout evidence for changes to this repo's protocols.
   provenance hardening plan.
 - `2026-06-10_scout_validation_provenance_closeout.md` - closeout report for
   the Scout validation provenance work (PR #13).
+- `2026-06-10_codex_reviewer_backend_plan.md` - Codex reviewer backend and
+  cross-vendor default runner plan.
diff --git a/structured-review/SKILL.md b/structured-review/SKILL.md
index a0b1dbf..0e99b32 100644
--- a/structured-review/SKILL.md
+++ b/structured-review/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: structured-review
-description: Use when a written reviewable artifact needs structured human-agent review, including plans, implementation plans, implementation reviews, optional closeout evidence reviews, and review-response passes. Defaults repo-backed review gates to the bundled Claude runner when available, with explicit reviewer/driver roles and driver-owned post-review decisions.
+description: Use when a written reviewable artifact needs structured human-agent review, including plans, implementation plans, implementation reviews, optional closeout evidence reviews, and review-response passes. Defaults repo-backed review gates to the bundled runner when available, with Claude and Codex reviewer backends, explicit reviewer/driver roles, and driver-owned post-review decisions.
 ---
 
 # Structured Review
@@ -48,18 +48,31 @@ skill manually, read these references before judging readiness.
 For high-touch product UI, dashboards, charts, data review tools, and operator
 consoles, also apply `references/ui-review.md`.
 
-## Default Claude Runner Rule
+## Default Runner Rule
 
 For repo-backed Plan Review, Plan Re-review, Implementation Review, and
 Implementation Re-review gates, the driver defaults to invoking the bundled
-Claude runner for the reviewer pass when it is available.
+runner for the reviewer pass when it is available.
 
 `Closeout Review` is not part of this default gate list. If closeout explicitly
 triggers a repo-backed Closeout Review, use the bundled runner when available;
 the trigger comes from closeout, not from structured-review by default.
 
+The runner supports two reviewer backends: `claude` (Claude Code) and `codex`
+(Codex CLI). `--reviewer-backend` defaults to `auto`, which selects the
+cross-vendor reviewer from the driver's environment: a Claude Code driver gets
+the `codex` reviewer and a Codex driver gets the `claude` reviewer. When both
+driver markers are present the runner fails and requires an explicit
+`--reviewer-backend`; when neither is present it uses `claude`. Review threads
+record the reviewer backend in each pass heading.
+
+If an auto-selected backend binary is unavailable, the runner fails with an
+actionable error. Substituting the same-vendor reviewer is an explicit
+human/driver decision via `--reviewer-backend`, never a silent fallback.
+
 `Default` means use the runner unless:
-- the human explicitly says not to use Claude;
+- the human explicitly says not to use the runner or a specific reviewer
+  backend;
 - the review is not repo-backed;
 - the runner is unavailable and the blocker is reported.
 
@@ -67,7 +80,8 @@ The runner is available when:
 - the runner script exists in this protocol checkout or submodule;
 - the target worktree is a git repository;
 - Python can run the script;
-- `claude` is available through the configured `--claude-bin`.
+- the selected reviewer backend binary is available through the configured
+  `--claude-bin` or `--codex-bin`.
 
 Do not silently substitute driver self-review, chat-only commentary, or a
 manually constructed reviewer prompt for a required repo-backed review gate. If
@@ -89,6 +103,11 @@ python structured-review/scripts/claude_structured_review.py \
   --topic "example plan"
 ```
 
+In both modes the reviewer runs read-only and returns the review text. In
+`write-commit-to-plan` the runner itself appends that text verbatim under the
+thread file's `Review Threads` section and creates the `structured-review:`
+commit, so the append-only thread contract holds by construction.
+
 Use `print-review` when appending review threads would pollute a durable
 reference artifact, such as a published doc, skill, protocol overlay, or other
 reference file.
@@ -100,6 +119,9 @@ provides a thread file that already has a `## Review Threads` section.
 The explicit runner `--mode` is the write-back authorization for that run. The
 driver still owns the decision after receiving reviewer output.
 
+Pass `--reviewer-backend claude|codex` to pin the reviewer backend; the
+default `auto` selects the cross-vendor reviewer for the detected driver.
+
 Use `--protocol-dir` only when testing or intentionally running against a
 different checkout of this protocol. By default the runner uses the
 `structured-review` directory that contains the script.
@@ -199,7 +221,7 @@ Pause and discuss with the human before editing when a reviewer finding:
 
 After applying or rejecting reviewer feedback, the driver summary must
 distinguish:
-- Claude reviewer suggestions;
+- reviewer suggestions, named by reviewer backend;
 - driver decisions;
 - human decisions;
 - remaining risks;
diff --git a/structured-review/scripts/claude_structured_review.py b/structured-review/scripts/claude_structured_review.py
index ff5b67f..ba8986b 100644
--- a/structured-review/scripts/claude_structured_review.py
+++ b/structured-review/scripts/claude_structured_review.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-"""Run Claude Code as a structured-review reviewer for a target worktree."""
+"""Run Claude Code or Codex as a structured-review reviewer for a target worktree."""
 
 from __future__ import annotations
 
@@ -9,11 +9,12 @@
 import re
 import secrets
 import selectors
+import shutil
 import subprocess
 import sys
 import time
-from collections.abc import Iterable, Sequence
-from dataclasses import dataclass
+from collections.abc import Callable, Iterable, Mapping, Sequence
+from dataclasses import dataclass, field
 from datetime import UTC, datetime
 from pathlib import Path
 from typing import Any, TextIO, cast
@@ -23,9 +24,18 @@
 REVIEW_TYPES = ("other-plan", "impl-plan", "impl", "closeout-review")
 DEFAULT_MODEL = "opus"
 DEFAULT_EFFORT = "xhigh"
+DEFAULT_CODEX_MODEL = "gpt-5.5"
+DEFAULT_CODEX_EFFORT = "xhigh"
 DEFAULT_TIMEOUT_SEC = 1800
 DEFAULT_HEARTBEAT_SEC = 30
 
+BACKEND_AUTO = "auto"
+BACKEND_CLAUDE = "claude"
+BACKEND_CODEX = "codex"
+CLAUDE_DRIVER_MARKERS = ("CLAUDECODE",)
+CODEX_DRIVER_MARKERS = ("CODEX_THREAD_ID", "CODEX_SANDBOX")
+CODEX_LAST_MESSAGE_NAME = "last-message.txt"
+
 CONTEXT_OVERLAY = Path(".agent-protocols/context.md")
 STRUCTURED_REVIEW_OVERLAY = Path(".agent-protocols/structured-review.md")
 REQUIRED_PROTOCOL_REFERENCES = (
@@ -87,9 +97,13 @@ class RunConfig:
     focus: str
     thread_file: ResolvedPath | None
     topic: str | None
+    backend: str
     model: str
     effort: str
     claude_bin: str
+    codex_bin: str
+    codex_model: str
+    codex_effort: str
     timeout_sec: int
     heartbeat_sec: int
     run_log_dir: Path | None
@@ -117,6 +131,14 @@ class StreamLineResult:
     malformed: bool
     text_delta: str = ""
     message_text: str = ""
+    usage: dict[str, Any] | None = None
+
+
+@dataclass
+class ReviewState:
+    text_deltas: list[str] = field(default_factory=list)
+    final_message: str = ""
+    messages: list[str] = field(default_factory=list)
 
 
 @dataclass
@@ -129,8 +151,9 @@ class ClaudeRunResult:
     timed_out: bool
     started_at: str
     ended_at: str
-    claude_version: str
+    reviewer_version: str
     argv: list[str]
+    token_usage: dict[str, Any] | None = None
 
 
 class Redactor:
@@ -264,6 +287,7 @@ def build_prompt(config: RunConfig) -> str:
             "Instructions:",
             "- The structured-review protocol below is mandatory. Do not substitute a generic code review style.",
             "- Inspect the requested artifacts before judging readiness.",
+            f"- You are the {config.backend} reviewer backend. Name the backend in each reviewer pass heading, for example: ### Reviewer pass 1 ({config.review_type}, {config.backend} reviewer).",
         ]
     )
     if config.mode == MODE_WRITE:
@@ -271,10 +295,8 @@ def build_prompt(config: RunConfig) -> str:
         assert config.topic is not None
         lines.extend(
             [
-                "- Append review threads near the end of the thread file under its Review Threads section.",
-                "- Commit only the thread-file change.",
-                f"- Use commit message: structured-review: add reviewer comments for {config.topic}",
-                "- Do not modify implementation files or any file other than the thread file.",
+                "- Return the complete review threads as markdown in your final response.",
+                "- Do not write files, stage changes, or commit anything; the runner appends your returned review to the thread file and creates the commit.",
                 "- If there are no blocking issues, say so explicitly in the review thread.",
             ]
         )
@@ -429,7 +451,7 @@ def contains_secret_material(text: str) -> bool:
 
 def verify_print_mode(config: RunConfig, before: GitSnapshot, after: GitSnapshot, result: ClaudeRunResult) -> None:
     if result.returncode != 0:
-        raise RunnerError("claude review failed")
+        raise RunnerError("reviewer run failed")
     if before.head != after.head:
         raise RunnerError("print-review changed HEAD")
     if before.status != after.status:
@@ -440,11 +462,54 @@ def verify_print_mode(config: RunConfig, before: GitSnapshot, after: GitSnapshot
         raise RunnerError("print-review output contains a local absolute path")
 
 
+def verify_reviewer_output(config: RunConfig, before: GitSnapshot, after: GitSnapshot, result: ClaudeRunResult) -> None:
+    """Require a read-only reviewer run with committable review text."""
+    if result.returncode != 0:
+        raise RunnerError("reviewer run failed")
+    if before.head != after.head:
+        raise RunnerError("reviewer moved HEAD; write mode requires read-only reviewer output")
+    if before.status != after.status:
+        raise RunnerError("reviewer modified the worktree; write mode requires read-only reviewer output")
+    text = result.review_text
+    if not text.strip():
+        raise RunnerError("reviewer produced no review text")
+    if not added_content_is_review_like(text.splitlines()):
+        raise RunnerError("review text does not look like review-thread content")
+    if contains_local_path(text, extra_paths=(config.worktree,)):
+        raise RunnerError("review text contains a local absolute path")
+    if contains_secret_material(text):
+        raise RunnerError("review text contains possible secret material")
+
+
+def append_and_commit_review(config: RunConfig, review_text: str) -> None:
+    """Append the reviewer's output under Review Threads and create the handoff commit."""
+    if config.thread_file is None or config.topic is None:
+        raise RunnerError("internal error: write mode missing thread file or topic")
+    path = config.thread_file.abs
+    text = read_text(path)
+    bounds = review_threads_bounds(text)
+    if bounds is None:
+        raise RunnerError("thread file lost its Review Threads section during the run")
+    _, section_end = bounds
+    head = text[:section_end].rstrip("\n")
+    tail = text[section_end:].lstrip("\n")
+    block = review_text if review_text.endswith("\n") else review_text + "\n"
+    updated = head + "\n\n" + block
+    if tail:
+        updated += "\n" + tail
+    path.write_text(updated, encoding="utf-8")
+    run_git(["add", config.thread_file.rel], root=config.worktree)
+    run_git(
+        ["commit", "-m", f"structured-review: add reviewer comments for {config.topic}"],
+        root=config.worktree,
+    )
+
+
 def verify_write_mode(config: RunConfig, before: GitSnapshot, after: GitSnapshot, result: ClaudeRunResult) -> None:
     if config.thread_file is None or config.topic is None:
         raise RunnerError("internal error: write mode missing thread file or topic")
     if result.returncode != 0:
-        raise RunnerError("claude review failed")
+        raise RunnerError("reviewer run failed")
     if after.status.strip():
         raise RunnerError("reviewer left uncommitted worktree changes")
     new_commits = commits_between(config.worktree, before.head, after.head)
@@ -515,6 +580,30 @@ def process_stream_line(line: str) -> StreamLineResult:
     return StreamLineResult(malformed=False, text_delta=stream_text_delta(event), message_text=stream_message_text(event))
 
 
+def codex_agent_message(value: Any) -> str:
+    if not isinstance(value, dict) or value.get("type") != "item.completed":
+        return ""
+    item = value.get("item")
+    if isinstance(item, dict) and item.get("type") == "agent_message" and isinstance(item.get("text"), str):
+        return item["text"]
+    return ""
+
+
+def codex_turn_usage(value: Any) -> dict[str, Any] | None:
+    if not isinstance(value, dict) or value.get("type") != "turn.completed":
+        return None
+    usage = value.get("usage")
+    return usage if isinstance(usage, dict) else None
+
+
+def process_codex_stream_line(line: str) -> StreamLineResult:
+    try:
+        event = json.loads(line)
+    except json.JSONDecodeError:
+        return StreamLineResult(malformed=True)
+    return StreamLineResult(malformed=False, message_text=codex_agent_message(event), usage=codex_turn_usage(event))
+
+
 def claude_argv(config: RunConfig) -> list[str]:
     return [
         config.claude_bin,
@@ -533,6 +622,40 @@ def claude_argv(config: RunConfig) -> list[str]:
     ]
 
 
+def codex_argv(config: RunConfig, logs: RunLogs) -> list[str]:
+    # workspace-write (without any .git --add-dir grant) lets the reviewer run
+    # tests, which need writable temp dirs, while commits stay impossible and
+    # stray worktree writes are rejected by the runner's untouched-worktree
+    # check before any append happens.
+    return [
+        config.codex_bin,
+        "exec",
+        "-",
+        "--json",
+        "--sandbox",
+        "workspace-write",
+        "-m",
+        config.codex_model,
+        "-c",
+        f"model_reasoning_effort={config.codex_effort}",
+        "--output-last-message",
+        str(logs.root / CODEX_LAST_MESSAGE_NAME),
+    ]
+
+
+def claude_finalize(state: ReviewState, logs: RunLogs) -> str:
+    return "".join(state.text_deltas) if state.text_deltas else state.final_message
+
+
+def codex_finalize(state: ReviewState, logs: RunLogs) -> str:
+    last_message = logs.root / CODEX_LAST_MESSAGE_NAME
+    if last_message.exists():
+        file_text = last_message.read_text(encoding="utf-8")
+        if file_text.strip():
+            return file_text
+    return "\n\n".join(message for message in state.messages if message.strip())
+
+
 def git_dir(root: Path) -> Path:
     result = run_git(["rev-parse", "--git-dir"], root=root)
     raw = Path(result.stdout.strip())
@@ -559,22 +682,93 @@ def prepare_run_logs(config: RunConfig) -> RunLogs:
     )
 
 
-def claude_version(config: RunConfig) -> str:
+def binary_version(bin_path: str, label: str, cwd: Path) -> str:
     try:
         result = subprocess.run(
-            [config.claude_bin, "--version"],
-            cwd=config.worktree,
+            [bin_path, "--version"],
+            cwd=cwd,
             capture_output=True,
             text=True,
             check=False,
             timeout=10,
         )
     except subprocess.TimeoutExpired:
-        return "unknown: claude --version timed out"
+        return f"unknown: {label} --version timed out"
     value = (result.stdout or result.stderr).strip()
     return value or f"unknown returncode={result.returncode}"
 
 
+def claude_version(config: RunConfig) -> str:
+    return binary_version(config.claude_bin, BACKEND_CLAUDE, config.worktree)
+
+
+def codex_version(config: RunConfig) -> str:
+    return binary_version(config.codex_bin, BACKEND_CODEX, config.worktree)
+
+
+@dataclass(frozen=True)
+class ReviewerBackend:
+    name: str
+    bin_for: Callable[[RunConfig], str]
+    argv_for: Callable[[RunConfig, RunLogs], list[str]]
+    parse_line: Callable[[str], StreamLineResult]
+    finalize: Callable[[ReviewState, RunLogs], str]
+    version_for: Callable[[RunConfig], str]
+    driver_markers: tuple[str, ...]
+
+
+BACKENDS: dict[str, ReviewerBackend] = {
+    BACKEND_CLAUDE: ReviewerBackend(
+        name=BACKEND_CLAUDE,
+        bin_for=lambda config: config.claude_bin,
+        argv_for=lambda config, logs: claude_argv(config),
+        parse_line=process_stream_line,
+        finalize=claude_finalize,
+        version_for=claude_version,
+        driver_markers=CLAUDE_DRIVER_MARKERS,
+    ),
+    BACKEND_CODEX: ReviewerBackend(
+        name=BACKEND_CODEX,
+        bin_for=lambda config: config.codex_bin,
+        argv_for=codex_argv,
+        parse_line=process_codex_stream_line,
+        finalize=codex_finalize,
+        version_for=codex_version,
+        driver_markers=CODEX_DRIVER_MARKERS,
+    ),
+}
+
+
+def detect_driver_markers(env: Mapping[str, str]) -> tuple[bool, bool]:
+    claude_marker = any(name in env for name in CLAUDE_DRIVER_MARKERS)
+    codex_marker = any(name in env for name in CODEX_DRIVER_MARKERS)
+    return claude_marker, codex_marker
+
+
+def resolve_reviewer_backend(raw: str, env: Mapping[str, str]) -> tuple[str, bool]:
+    """Resolve the reviewer backend name and whether a driver marker chose it."""
+    if raw != BACKEND_AUTO:
+        return raw, False
+    claude_marker, codex_marker = detect_driver_markers(env)
+    if claude_marker and codex_marker:
+        raise RunnerError(
+            "driver environment carries both Claude and Codex markers; pass --reviewer-backend explicitly"
+        )
+    if claude_marker:
+        return BACKEND_CODEX, True
+    if codex_marker:
+        return BACKEND_CLAUDE, True
+    return BACKEND_CLAUDE, False
+
+
+def active_model(config: RunConfig) -> str:
+    return config.model if config.backend == BACKEND_CLAUDE else config.codex_model
+
+
+def active_effort(config: RunConfig) -> str:
+    return config.effort if config.backend == BACKEND_CLAUDE else config.codex_effort
+
+
 def write_metadata(
     config: RunConfig,
     logs: RunLogs,
@@ -593,8 +787,9 @@ def write_metadata(
         "artifacts": [artifact.rel for artifact in config.artifacts],
         "thread_file": config.thread_file.rel if config.thread_file else None,
         "topic": config.topic,
-        "model": config.model,
-        "effort": config.effort,
+        "backend": config.backend,
+        "model": active_model(config),
+        "effort": active_effort(config),
         "timeout_sec": config.timeout_sec,
         "heartbeat_sec": config.heartbeat_sec,
         "run_log_dir": str(logs.root),
@@ -606,31 +801,34 @@ def write_metadata(
         payload.update(
             {
                 "argv": result.argv,
-                "claude_version": result.claude_version,
+                "reviewer_version": result.reviewer_version,
                 "returncode": result.returncode,
                 "timed_out": result.timed_out,
                 "malformed_stream_lines": result.malformed_stream_lines,
                 "started_at": result.started_at,
                 "ended_at": result.ended_at,
+                "token_usage": result.token_usage,
             }
         )
     logs.metadata.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n", encoding="utf-8")
 
 
 def run_claude(config: RunConfig, prompt: str, logs: RunLogs, redactor: Redactor) -> ClaudeRunResult:
-    argv = claude_argv(config)
-    version = claude_version(config)
+    """Run the selected reviewer backend; the name is kept for compatibility."""
+    backend = BACKENDS[config.backend]
+    argv = backend.argv_for(config, logs)
+    version = backend.version_for(config)
     logs.prompt.write_text(prompt, encoding="utf-8")
     started_at = utc_now()
     started = time.monotonic()
     last_event = started
     stdout_parts: list[str] = []
     stderr_parts: list[str] = []
-    text_delta_parts: list[str] = []
-    message_text_candidate = ""
+    state = ReviewState()
+    token_usage: dict[str, Any] | None = None
     malformed = 0
     print(
-        f"claude review start mode={config.mode} type={config.review_type} model={config.model} effort={config.effort} artifacts={','.join(a.rel for a in config.artifacts)}",
+        f"{config.backend} review start mode={config.mode} type={config.review_type} model={active_model(config)} effort={active_effort(config)} artifacts={','.join(a.rel for a in config.artifacts)}",
         file=sys.stderr,
     )
     proc = subprocess.Popen(
@@ -662,7 +860,7 @@ def run_claude(config: RunConfig, prompt: str, logs: RunLogs, redactor: Redactor
             if not events:
                 if now - last_event >= config.heartbeat_sec:
                     elapsed = int(now - started)
-                    print(f"claude review heartbeat elapsed_sec={elapsed}", file=sys.stderr)
+                    print(f"{config.backend} review heartbeat elapsed_sec={elapsed}", file=sys.stderr)
                     last_event = now
                 continue
             for key, _ in events:
@@ -676,14 +874,17 @@ def run_claude(config: RunConfig, prompt: str, logs: RunLogs, redactor: Redactor
                     stdout_parts.append(line)
                     stdout_log.write(line)
                     stdout_log.flush()
-                    stream_result = process_stream_line(line)
+                    stream_result = backend.parse_line(line)
                     if stream_result.malformed:
                         malformed += 1
                     if stream_result.text_delta:
-                        text_delta_parts.append(stream_result.text_delta)
+                        state.text_deltas.append(stream_result.text_delta)
                     elif stream_result.message_text:
-                        message_text_candidate = stream_result.message_text
-                    print("claude review event", file=sys.stderr)
+                        state.final_message = stream_result.message_text
+                        state.messages.append(stream_result.message_text)
+                    if stream_result.usage is not None:
+                        token_usage = stream_result.usage
+                    print(f"{config.backend} review event", file=sys.stderr)
                 else:
                     stderr_parts.append(line)
                     stderr_log.write(line)
@@ -695,7 +896,7 @@ def run_claude(config: RunConfig, prompt: str, logs: RunLogs, redactor: Redactor
     proc.stderr.close()
     if timed_out:
         returncode = -9
-    review_text = "".join(text_delta_parts) if text_delta_parts else message_text_candidate
+    review_text = backend.finalize(state, logs)
     logs.review.write_text(review_text, encoding="utf-8")
     return ClaudeRunResult(
         returncode=returncode,
@@ -706,8 +907,9 @@ def run_claude(config: RunConfig, prompt: str, logs: RunLogs, redactor: Redactor
         timed_out=timed_out,
         started_at=started_at,
         ended_at=utc_now(),
-        claude_version=version,
+        reviewer_version=version,
         argv=argv,
+        token_usage=token_usage,
     )
 
 
@@ -723,9 +925,18 @@ def parse_args(argv: Sequence[str]) -> argparse.Namespace:
     focus.add_argument("--focus-file")
     parser.add_argument("--thread-file")
     parser.add_argument("--topic")
+    parser.add_argument(
+        "--reviewer-backend",
+        default=BACKEND_AUTO,
+        choices=(BACKEND_AUTO, BACKEND_CLAUDE, BACKEND_CODEX),
+        dest="reviewer_backend",
+    )
     parser.add_argument("--model", default=DEFAULT_MODEL)
     parser.add_argument("--effort", default=DEFAULT_EFFORT)
     parser.add_argument("--claude-bin", default="claude")
+    parser.add_argument("--codex-bin", default="codex")
+    parser.add_argument("--codex-model", default=DEFAULT_CODEX_MODEL)
+    parser.add_argument("--codex-effort", default=DEFAULT_CODEX_EFFORT)
     parser.add_argument("--timeout-sec", type=int, default=DEFAULT_TIMEOUT_SEC)
     parser.add_argument("--heartbeat-sec", type=int, default=DEFAULT_HEARTBEAT_SEC)
     parser.add_argument("--run-log-dir")
@@ -733,7 +944,7 @@ def parse_args(argv: Sequence[str]) -> argparse.Namespace:
     return parser.parse_args(argv)
 
 
-def config_from_args(args: argparse.Namespace) -> RunConfig:
+def config_from_args(args: argparse.Namespace, env: Mapping[str, str] | None = None) -> RunConfig:
     protocol_dir = resolve_protocol_dir(args.protocol_dir)
     worktree = resolve_worktree(args.worktree)
     focus = args.focus if args.focus else read_text(resolve_repo_path(worktree, args.focus_file).abs)
@@ -749,6 +960,16 @@ def config_from_args(args: argparse.Namespace) -> RunConfig:
     if args.timeout_sec <= 0 or args.heartbeat_sec <= 0:
         raise RunnerError("timeout and heartbeat must be positive")
     run_log_dir = Path(args.run_log_dir).expanduser().resolve() if args.run_log_dir else None
+    backend, marker_resolved = resolve_reviewer_backend(
+        args.reviewer_backend, os.environ if env is None else env
+    )
+    if marker_resolved:
+        backend_bin = args.claude_bin if backend == BACKEND_CLAUDE else args.codex_bin
+        if shutil.which(backend_bin) is None:
+            raise RunnerError(
+                f"reviewer backend '{backend}' was auto-selected from the driver environment "
+                "but its binary is unavailable; install it or pass --reviewer-backend explicitly"
+            )
     return RunConfig(
         protocol_dir=protocol_dir,
         worktree=worktree,
@@ -758,9 +979,13 @@ def config_from_args(args: argparse.Namespace) -> RunConfig:
         focus=focus,
         thread_file=thread_file,
         topic=args.topic,
+        backend=backend,
         model=args.model,
         effort=args.effort,
         claude_bin=args.claude_bin,
+        codex_bin=args.codex_bin,
+        codex_model=args.codex_model,
+        codex_effort=args.codex_effort,
         timeout_sec=args.timeout_sec,
         heartbeat_sec=args.heartbeat_sec,
         run_log_dir=run_log_dir,
@@ -790,15 +1015,18 @@ def run(config: RunConfig) -> None:
         after = git_snapshot(config.worktree)
         if result.timed_out:
             dirty = " with uncommitted changes" if after.status.strip() else ""
-            write_metadata(config, logs, result, outcome="timeout", error=f"claude review timed out{dirty}", before=before, after=after)
-            raise RunnerError(f"claude review timed out{dirty}", exit_code=2)
+            write_metadata(config, logs, result, outcome="timeout", error=f"{config.backend} review timed out{dirty}", before=before, after=after)
+            raise RunnerError(f"{config.backend} review timed out{dirty}", exit_code=2)
         if config.mode == MODE_WRITE:
+            verify_reviewer_output(config, before, after, result)
+            append_and_commit_review(config, result.review_text)
+            after = git_snapshot(config.worktree)
             verify_write_mode(config, before, after, result)
         else:
             verify_print_mode(config, before, after, result)
             print(redactor.redact(result.review_text).rstrip())
         write_metadata(config, logs, result, outcome="success", before=before, after=after)
-        print(f"claude structured review completed run_log={redactor.redact(str(logs.root))}", file=sys.stderr)
+        print(f"{config.backend} structured review completed run_log={redactor.redact(str(logs.root))}", file=sys.stderr)
     except RunnerError as exc:
         if result is not None and not result.timed_out:
             if after is None:
@@ -808,7 +1036,7 @@ def run(config: RunConfig) -> None:
     except Exception as exc:
         after = git_snapshot(config.worktree)
         write_metadata(config, logs, result, outcome="error", error=str(exc), before=before, after=after)
-        raise RunnerError(f"claude review errored: {exc}") from exc
+        raise RunnerError(f"reviewer run errored: {exc}") from exc
 
 
 def main(argv: Sequence[str] | None = None) -> int:
diff --git a/structured-review/tests/test_claude_structured_review.py b/structured-review/tests/test_claude_structured_review.py
index 7bd8c9d..948f49a 100644
--- a/structured-review/tests/test_claude_structured_review.py
+++ b/structured-review/tests/test_claude_structured_review.py
@@ -79,6 +79,8 @@ def config_for(
             "docs/plan.md",
             "--focus",
             "Review the plan.",
+            "--reviewer-backend",
+            "claude",
         ]
         if mode == csr.MODE_WRITE:
             args.extend(["--thread-file", "docs/plan.md"])
@@ -261,7 +263,7 @@ def test_skill_removed_legacy_human_interaction_mechanics(self) -> None:
         for text in forbidden:
             self.assertNotIn(text, skill)
 
-        self.assertIn("Default Claude Runner Rule", skill)
+        self.assertIn("Default Runner Rule", skill)
         self.assertIn("accepted", skill)
         self.assertIn("rejected", skill)
         self.assertIn("deferred", skill)
@@ -570,7 +572,8 @@ def test_run_claude_writes_logs_metadata_and_review_text(self) -> None:
         self.assertIn("stderr path", logs.stderr.read_text(encoding="utf-8"))
         self.assertEqual(logs.review.read_text(encoding="utf-8"), "Review done.")
         metadata = json.loads(logs.metadata.read_text(encoding="utf-8"))
-        self.assertEqual(metadata["claude_version"], "2.1.143 (Claude Code)")
+        self.assertEqual(metadata["reviewer_version"], "2.1.143 (Claude Code)")
+        self.assertEqual(metadata["backend"], "claude")
         self.assertEqual(metadata["malformed_stream_lines"], 1)
 
     def test_run_reports_timeout_with_dirty_state(self) -> None:
@@ -607,6 +610,340 @@ def fake_run_claude(config: csr.RunConfig, prompt: str, logs: csr.RunLogs, redac
         self.assertEqual(metadata["outcome"], "error")
         self.assertEqual(metadata["error"], "boom")
 
+    def logs_for(self, root: Path) -> csr.RunLogs:
+        return csr.RunLogs(
+            root=root,
+            prompt=root / "prompt.md",
+            stdout=root / "stdout.stream.jsonl",
+            stderr=root / "stderr.log",
+            metadata=root / "metadata.json",
+            review=root / "review.md",
+        )
+
+    def write_fake_codex(self, body: str) -> Path:
+        fake_bin = self.root / "bin"
+        fake_bin.mkdir(exist_ok=True)
+        fake_codex = fake_bin / "codex"
+        fake_codex.write_text(
+            "#!/usr/bin/env python3\n"
+            "import subprocess, sys\n"
+            "\n"
+            "if '--version' in sys.argv:\n"
+            "    print('codex-cli 0.137.0')\n"
+            "    raise SystemExit(0)\n"
+            "\n"
+            "out_path = sys.argv[sys.argv.index('--output-last-message') + 1]\n"
+            "sys.stdin.read()\n" + body,
+            encoding="utf-8",
+        )
+        fake_codex.chmod(0o755)
+        return fake_codex
+
+    def test_resolve_reviewer_backend_cross_vendor_auto(self) -> None:
+        self.assertEqual(csr.resolve_reviewer_backend("auto", {"CLAUDECODE": "1"}), ("codex", True))
+        self.assertEqual(csr.resolve_reviewer_backend("auto", {"CODEX_THREAD_ID": "t"}), ("claude", True))
+        self.assertEqual(csr.resolve_reviewer_backend("auto", {"CODEX_SANDBOX": "seatbelt"}), ("claude", True))
+        self.assertEqual(csr.resolve_reviewer_backend("auto", {}), ("claude", False))
+        self.assertEqual(csr.resolve_reviewer_backend("codex", {"CLAUDECODE": "1"}), ("codex", False))
+
+    def test_resolve_reviewer_backend_both_markers_error(self) -> None:
+        with self.assertRaisesRegex(csr.RunnerError, "--reviewer-backend"):
+            csr.resolve_reviewer_backend("auto", {"CLAUDECODE": "1", "CODEX_SANDBOX": "seatbelt"})
+
+    def test_auto_marker_resolved_missing_binary_errors(self) -> None:
+        repo = self.init_target_repo()
+        protocol = self.init_protocol_dir()
+        args = csr.parse_args(
+            [
+                "--protocol-dir",
+                str(protocol),
+                "--worktree",
+                str(repo),
+                "--mode",
+                csr.MODE_PRINT,
+                "--type",
+                "impl-plan",
+                "--artifact",
+                "docs/plan.md",
+                "--focus",
+                "Review.",
+            ]
+        )
+        with mock.patch.object(csr.shutil, "which", return_value=None):
+            with self.assertRaisesRegex(csr.RunnerError, "--reviewer-backend"):
+                csr.config_from_args(args, env={"CLAUDECODE": "1"})
+
+    def test_auto_neither_marker_skips_preflight(self) -> None:
+        repo = self.init_target_repo()
+        protocol = self.init_protocol_dir()
+        args = csr.parse_args(
+            [
+                "--protocol-dir",
+                str(protocol),
+                "--worktree",
+                str(repo),
+                "--mode",
+                csr.MODE_PRINT,
+                "--type",
+                "impl-plan",
+                "--artifact",
+                "docs/plan.md",
+                "--focus",
+                "Review.",
+            ]
+        )
+        with mock.patch.object(csr.shutil, "which", return_value=None):
+            config = csr.config_from_args(args, env={})
+        self.assertEqual(config.backend, "claude")
+
+    def test_explicit_backend_skips_preflight(self) -> None:
+        repo = self.init_target_repo()
+        protocol = self.init_protocol_dir()
+        with mock.patch.object(csr.shutil, "which", return_value=None):
+            config = self.config_for(repo, protocol, extra=["--reviewer-backend", "codex"])
+        self.assertEqual(config.backend, "codex")
+
+    def test_codex_argv_is_workspace_write_without_git_grant_in_both_modes(self) -> None:
+        repo = self.init_target_repo()
+        protocol = self.init_protocol_dir()
+        logs = self.logs_for(self.root / "codex-logs")
+
+        for mode, topic in ((csr.MODE_WRITE, "plan"), (csr.MODE_PRINT, None)):
+            config = self.config_for(repo, protocol, mode=mode, topic=topic, extra=["--reviewer-backend", "codex"])
+            argv = csr.codex_argv(config, logs)
+
+            self.assertEqual(argv[:4], ["codex", "exec", "-", "--json"])
+            self.assertIn("workspace-write", argv)
+            self.assertNotIn("--add-dir", argv)
+            self.assertIn("gpt-5.5", argv)
+            self.assertIn("model_reasoning_effort=xhigh", argv)
+            self.assertEqual(argv[argv.index("--output-last-message") + 1], str(logs.root / "last-message.txt"))
+
+    def test_append_preserves_review_text_byte_for_byte(self) -> None:
+        repo = self.init_target_repo()
+        protocol = self.init_protocol_dir()
+        config = self.config_for(repo, protocol)
+        review = "### Reviewer pass 1 (impl-plan, codex reviewer)\n\nIndented:\n    code line\n\n- bullet  \n\nNo blockers."
+        before = csr.git_snapshot(repo)
+
+        csr.append_and_commit_review(config, review)
+
+        text = (repo / "docs/plan.md").read_text(encoding="utf-8")
+        self.assertTrue(text.endswith("\n\n" + review + "\n"))
+        after = csr.git_snapshot(repo)
+        csr.verify_write_mode(config, before, after, self.result())
+
+    def test_codex_model_and_effort_overrides(self) -> None:
+        repo = self.init_target_repo()
+        protocol = self.init_protocol_dir()
+        config = self.config_for(
+            repo,
+            protocol,
+            extra=["--reviewer-backend", "codex", "--codex-model", "gpt-9", "--codex-effort", "high"],
+        )
+        logs = self.logs_for(self.root / "codex-logs")
+
+        argv = csr.codex_argv(config, logs)
+
+        self.assertIn("gpt-9", argv)
+        self.assertIn("model_reasoning_effort=high", argv)
+        self.assertNotIn("gpt-5.5", argv)
+
+    def test_codex_stream_line_extracts_agent_message_and_usage(self) -> None:
+        message = csr.process_codex_stream_line(
+            '{"type":"item.completed","item":{"id":"i1","type":"agent_message","text":"No blockers."}}'
+        )
+        usage = csr.process_codex_stream_line('{"type":"turn.completed","usage":{"input_tokens":10,"output_tokens":5}}')
+        other = csr.process_codex_stream_line('{"type":"item.completed","item":{"id":"i2","type":"command_execution","command":"ls"}}')
+        malformed = csr.process_codex_stream_line("{not json")
+
+        self.assertEqual(message.message_text, "No blockers.")
+        self.assertEqual(usage.usage, {"input_tokens": 10, "output_tokens": 5})
+        self.assertEqual(other.message_text, "")
+        self.assertTrue(malformed.malformed)
+
+    def test_prompt_names_reviewer_backend_for_both_backends(self) -> None:
+        repo = self.init_target_repo()
+        protocol = self.init_protocol_dir()
+        claude_config = self.config_for(repo, protocol)
+        codex_config = self.config_for(repo, protocol, extra=["--reviewer-backend", "codex"])
+
+        self.assertIn("You are the claude reviewer backend", csr.build_prompt(claude_config))
+        self.assertIn("claude reviewer).", csr.build_prompt(claude_config))
+        self.assertIn("You are the codex reviewer backend", csr.build_prompt(codex_config))
+
+    def test_run_codex_prefers_last_message_file_and_records_metadata(self) -> None:
+        repo = self.init_target_repo()
+        protocol = self.init_protocol_dir()
+        fake_codex = self.write_fake_codex(
+            "print('{\"type\":\"item.completed\",\"item\":{\"id\":\"i1\",\"type\":\"agent_message\",\"text\":\"Partial note.\"}}', flush=True)\n"
+            "print('{not json', flush=True)\n"
+            "print('{\"type\":\"turn.completed\",\"usage\":{\"input_tokens\":10,\"output_tokens\":5}}', flush=True)\n"
+            "with open(out_path, 'w') as fh:\n"
+            "    fh.write('Codex review done.')\n"
+            "print('codex stderr path', file=sys.stderr, flush=True)\n"
+        )
+        config = self.config_for(
+            repo,
+            protocol,
+            mode=csr.MODE_PRINT,
+            topic=None,
+            extra=["--reviewer-backend", "codex", "--codex-bin", str(fake_codex), "--run-log-dir", str(self.root / "logs")],
+        )
+        logs = csr.prepare_run_logs(config)
+
+        result = csr.run_claude(config, "prompt", logs, csr.Redactor([repo]))
+        csr.write_metadata(config, logs, result, outcome="success")
+
+        self.assertEqual(result.returncode, 0)
+        self.assertEqual(result.review_text, "Codex review done.")
+        self.assertEqual(result.malformed_stream_lines, 1)
+        self.assertEqual(result.reviewer_version, "codex-cli 0.137.0")
+        metadata = json.loads(logs.metadata.read_text(encoding="utf-8"))
+        self.assertEqual(metadata["backend"], "codex")
+        self.assertEqual(metadata["model"], "gpt-5.5")
+        self.assertEqual(metadata["effort"], "xhigh")
+        self.assertEqual(metadata["reviewer_version"], "codex-cli 0.137.0")
+        self.assertEqual(metadata["token_usage"], {"input_tokens": 10, "output_tokens": 5})
+
+    def test_run_codex_falls_back_to_accumulated_messages(self) -> None:
+        repo = self.init_target_repo()
+        protocol = self.init_protocol_dir()
+        fake_codex = self.write_fake_codex(
+            "print('{\"type\":\"item.completed\",\"item\":{\"id\":\"i1\",\"type\":\"agent_message\",\"text\":\"First note.\"}}', flush=True)\n"
+            "print('{\"type\":\"item.completed\",\"item\":{\"id\":\"i2\",\"type\":\"agent_message\",\"text\":\"Second note.\"}}', flush=True)\n"
+        )
+        config = self.config_for(
+            repo,
+            protocol,
+            mode=csr.MODE_PRINT,
+            topic=None,
+            extra=["--reviewer-backend", "codex", "--codex-bin", str(fake_codex), "--run-log-dir", str(self.root / "logs")],
+        )
+        logs = csr.prepare_run_logs(config)
+
+        result = csr.run_claude(config, "prompt", logs, csr.Redactor([repo]))
+
+        self.assertEqual(result.review_text, "First note.\n\nSecond note.")
+
+    def test_run_codex_write_mode_runner_appends_and_commits(self) -> None:
+        repo = self.init_target_repo()
+        protocol = self.init_protocol_dir()
+        review = "### Reviewer pass 1 (impl-plan, codex reviewer)\\n\\nNo blocking issues."
+        fake_codex = self.write_fake_codex(
+            "with open(out_path, 'w') as fh:\n"
+            f"    fh.write('{review}')\n"
+        )
+        config = self.config_for(
+            repo,
+            protocol,
+            extra=["--reviewer-backend", "codex", "--codex-bin", str(fake_codex), "--run-log-dir", str(self.root / "logs")],
+        )
+        head_before = run_git(repo, "rev-parse", "HEAD")
+
+        csr.run(config)
+
+        self.assertEqual(run_git(repo, "rev-parse", f"{run_git(repo, 'rev-parse', 'HEAD')}^"), head_before)
+        self.assertEqual(run_git(repo, "log", "-1", "--pretty=%s"), "structured-review: add reviewer comments for plan")
+        plan_text = (repo / "docs/plan.md").read_text(encoding="utf-8")
+        self.assertIn("### Reviewer pass 1 (impl-plan, codex reviewer)\n\nNo blocking issues.\n", plan_text)
+        self.assertEqual(run_git(repo, "status", "--porcelain"), "")
+        metadata = json.loads((self.root / "logs/metadata.json").read_text(encoding="utf-8"))
+        self.assertEqual(metadata["outcome"], "success")
+        self.assertEqual(metadata["backend"], "codex")
+
+    def test_run_claude_write_mode_runner_appends_and_commits(self) -> None:
+        repo = self.init_target_repo()
+        protocol = self.init_protocol_dir()
+        config = self.config_for(repo, protocol, extra=["--run-log-dir", str(self.root / "logs")])
+
+        def fake_run_claude(config: csr.RunConfig, prompt: str, logs: csr.RunLogs, redactor: csr.Redactor) -> csr.ClaudeRunResult:
+            logs.prompt.write_text(prompt, encoding="utf-8")
+            logs.stdout.write_text("{}", encoding="utf-8")
+            logs.stderr.write_text("", encoding="utf-8")
+            return self.result(review_text="### Reviewer pass 1 (impl-plan, claude reviewer)\n\nNo blocking issues.\n")
+
+        with mock.patch.object(csr, "run_claude", fake_run_claude):
+            csr.run(config)
+
+        self.assertEqual(run_git(repo, "log", "-1", "--pretty=%s"), "structured-review: add reviewer comments for plan")
+        self.assertIn("claude reviewer", (repo / "docs/plan.md").read_text(encoding="utf-8"))
+
+    def test_append_inserts_inside_threads_section_before_later_sections(self) -> None:
+        repo = self.init_target_repo()
+        protocol = self.init_protocol_dir()
+        plan = repo / "docs/plan.md"
+        plan.write_text(
+            "# Plan\n\n## Review Threads\n\nExisting.\n\n## Appendix\n\nKeep me last.\n",
+            encoding="utf-8",
+        )
+        run_git(repo, "add", "docs/plan.md")
+        run_git(repo, "commit", "-m", "add appendix")
+        config = self.config_for(repo, protocol)
+        before = csr.git_snapshot(repo)
+
+        csr.append_and_commit_review(config, "### Reviewer pass 9 (impl-plan, codex reviewer)\n\nNo blockers.")
+
+        text = plan.read_text(encoding="utf-8")
+        self.assertLess(text.index("Reviewer pass 9"), text.index("## Appendix"))
+        self.assertTrue(text.endswith("Keep me last.\n"))
+        after = csr.git_snapshot(repo)
+        csr.verify_write_mode(config, before, after, self.result())
+
+    def test_write_mode_rejects_reviewer_worktree_modification(self) -> None:
+        repo = self.init_target_repo()
+        protocol = self.init_protocol_dir()
+        config = self.config_for(repo, protocol)
+        before = csr.git_snapshot(repo)
+        (repo / "docs/plan.md").write_text("# Plan\n\nDirtied by reviewer.\n", encoding="utf-8")
+        after = csr.git_snapshot(repo)
+
+        with self.assertRaisesRegex(csr.RunnerError, "read-only reviewer output"):
+            csr.verify_reviewer_output(config, before, after, self.result())
+
+    def test_write_mode_rejects_non_review_like_output(self) -> None:
+        repo = self.init_target_repo()
+        protocol = self.init_protocol_dir()
+        config = self.config_for(repo, protocol)
+        before = csr.git_snapshot(repo)
+
+        with self.assertRaisesRegex(csr.RunnerError, "review-thread content"):
+            csr.verify_reviewer_output(config, before, before, self.result(review_text="Just some notes.\n"))
+
+    def test_write_mode_rejects_local_path_in_review_output(self) -> None:
+        repo = self.init_target_repo()
+        protocol = self.init_protocol_dir()
+        config = self.config_for(repo, protocol)
+        before = csr.git_snapshot(repo)
+        leaked = "/" + "Users/example/work/repo/"
+
+        with self.assertRaisesRegex(csr.RunnerError, "local absolute path"):
+            csr.verify_reviewer_output(
+                config,
+                before,
+                before,
+                self.result(review_text=f"### Reviewer pass 1 (impl, codex reviewer)\n\nLeak: {leaked}\n"),
+            )
+
+    def test_write_prompt_forbids_reviewer_writes(self) -> None:
+        repo = self.init_target_repo()
+        protocol = self.init_protocol_dir()
+        config = self.config_for(repo, protocol)
+
+        prompt = csr.build_prompt(config)
+
+        self.assertIn("Do not write files, stage changes, or commit anything", prompt)
+        self.assertIn("the runner appends your returned review", prompt)
+        self.assertNotIn("Use commit message", prompt)
+
+    def test_codex_version_timeout_is_recorded_as_unknown(self) -> None:
+        repo = self.init_target_repo()
+        protocol = self.init_protocol_dir()
+        config = self.config_for(repo, protocol, extra=["--reviewer-backend", "codex"])
+
+        with mock.patch.object(csr.subprocess, "run", side_effect=subprocess.TimeoutExpired("codex", 10)):
+            self.assertEqual(csr.codex_version(config), "unknown: codex --version timed out")
+
     def result(
         self,
         *,
@@ -623,7 +960,7 @@ def result(
             timed_out=timed_out,
             started_at="2026-06-04T00:00:00Z",
             ended_at="2026-06-04T00:00:01Z",
-            claude_version="2.1.143 (Claude Code)",
+            reviewer_version="2.1.143 (Claude Code)",
             argv=["claude", "-p"],
         )