Provable-Games · loothero · May 4, 2026 · May 3, 2026 · gemini-code-assist · May 3, 2026
diff --git a/.github/fixtures/claude-execution-bad.json b/.github/fixtures/claude-execution-bad.json
@@ -0,0 +1 @@
+not valid json
diff --git a/.github/fixtures/claude-execution-empty.json b/.github/fixtures/claude-execution-empty.json
@@ -0,0 +1 @@
+[]
diff --git a/.github/fixtures/claude-execution-error.json b/.github/fixtures/claude-execution-error.json
@@ -0,0 +1,35 @@
+[
+  {
+    "type": "system",
+    "subtype": "init",
+    "model": "claude-opus-4-7"
+  },
+  {
+    "type": "assistant",
+    "message": {
+      "role": "assistant",
+      "content": [
+        { "type": "text", "text": "Starting review of the contracts diff..." }
+      ]
+    }
+  },
+  {
+    "type": "assistant",
+    "message": {
+      "role": "assistant",
+      "content": [
+        { "type": "tool_use", "id": "tool_1", "name": "Bash", "input": { "command": "git diff" } },
+        { "type": "text", "text": "Partial review before error" }
+      ]
+    }
+  },
+  {
+    "type": "result",
+    "subtype": "error_during_execution",
+    "is_error": true,
+    "duration_ms": 3000,
+    "num_turns": 1,
+    "errors": ["rate_limit"],
+    "session_id": "session-error"
+  }
+]
diff --git a/.github/fixtures/claude-execution-success-with-high.json b/.github/fixtures/claude-execution-success-with-high.json
@@ -0,0 +1,17 @@
+[
+  {
+    "type": "system",
+    "subtype": "init",
+    "model": "claude-opus-4-7"
+  },
+  {
+    "type": "result",
+    "subtype": "success",
+    "is_error": false,
+    "duration_ms": 8000,
+    "num_turns": 2,
+    "result": "## Claude Review - Test Scope\n\n[HIGH] contracts/src/foo.cairo:42 - reentrancy: external call before state update\nImpact: an attacker could re-enter and drain.\nFix: move state update before the call.\n\n[LOW] contracts/src/bar.cairo:10 - missing comment\nImpact: minor readability.\nFix: add a doc comment.\n\nSummary: 0 CRITICAL, 1 HIGH, 0 MEDIUM, 1 LOW, 0 INFO",
+    "total_cost_usd": 0.04,
+    "session_id": "session-high"
+  }
+]
diff --git a/.github/fixtures/claude-execution-success.json b/.github/fixtures/claude-execution-success.json
@@ -0,0 +1,27 @@
+[
+  {
+    "type": "system",
+    "subtype": "init",
+    "model": "claude-opus-4-7"
+  },
+  {
+    "type": "assistant",
+    "message": {
+      "role": "assistant",
+      "content": [
+        { "type": "text", "text": "Looking at the diff..." },
+        { "type": "tool_use", "id": "tool_1", "name": "Bash", "input": { "command": "git diff" } }
+      ]
+    }
+  },
+  {
+    "type": "result",
+    "subtype": "success",
+    "is_error": false,
+    "duration_ms": 12500,
+    "num_turns": 3,
+    "result": "## Claude Review - Test Scope\n\nrun=1 attempt=1 sha=abc scope=test\n\nNo issues found.\n\nSummary: 0 CRITICAL, 0 HIGH, 0 MEDIUM, 0 LOW, 0 INFO",
+    "total_cost_usd": 0.05,
+    "session_id": "session-success"
+  }
+]
diff --git a/.github/scripts/extract-claude-review.sh b/.github/scripts/extract-claude-review.sh
@@ -0,0 +1,59 @@
+#!/usr/bin/env bash
+#
+# Extract the final review text from a Claude Code action `execution_file`
+# and write it to REVIEW_OUT_FILE. On any failure, write a heading-prefixed
+# fallback message and exit 1 (with a `::error::` annotation for GitHub
+# Actions log surfacing).
+#
+# Inputs (env):
+#   EXECUTION_FILE   path to the action's execution_file
+#                    (a JSON array of SDKMessage objects per
+#                    base-action/src/run-claude-sdk.ts)
+#   HEADING          markdown heading to prefix the fallback message with
+#                    when extraction fails
+#   REVIEW_OUT_FILE  output path (default: /tmp/review.txt)
+#
+# Exit codes:
+#   0  review extracted successfully (REVIEW_OUT_FILE has the model output)
+#   1  no extractable review (REVIEW_OUT_FILE has heading + fallback message)
+#   2  invalid invocation (HEADING not set)
+
+set -eo pipefail
+
+REVIEW_OUT_FILE="${REVIEW_OUT_FILE:-/tmp/review.txt}"
+
+if [ -z "${HEADING:-}" ]; then
+  echo "extract-claude-review: HEADING env var is required" >&2
+  exit 2
+fi
+
+write_fallback() {
+  printf '%s\n\nNo review output was produced.\n' "$HEADING" > "$REVIEW_OUT_FILE"
+}
+
+if [ -z "${EXECUTION_FILE:-}" ] || [ ! -f "$EXECUTION_FILE" ]; then
+  write_fallback
+  echo "::error::Claude action did not produce an execution file"
+  exit 1
+fi
+
+# Primary path: SDKResultSuccess has `.result: string` with the final text.
+# `[…] | last` collects matching messages into an array and takes the last
+# (defensive against any future change that emits multiple result messages).
+REVIEW=$(jq -r '[.[] | select(.type == "result")] | last | .result // empty' \
+  "$EXECUTION_FILE" 2>/dev/null || true)
+
+# Fallback: SDKResultError has no `.result` field. Concatenate every
+# assistant text block so reviewers still see partial work in error cases.
+if [ -z "$REVIEW" ]; then
+  REVIEW=$(jq -r '[.[] | select(.type == "assistant") | .message.content[]? | select(.type == "text") | .text] | join("\n\n")' \
+    "$EXECUTION_FILE" 2>/dev/null || true)
-  REVIEW=$(jq -r '[.[] | select(.type == "assistant") | .message.content[]? | select(.type == "text") | .text] | join("\n\n")' \
-    "$EXECUTION_FILE" 2>/dev/null || true)
+  REVIEW=$(jq -r '[.[] | select(.type == "assistant") | .message.content[]? | select(.type == "text" and (.text | type == "string")) | .text] | join("\n\n")' \
+    "$EXECUTION_FILE" 2>/dev/null || true)
-  REVIEW=$(jq -r '[.[] | select(.type == "assistant") | .message.content[]? | select(.type == "text") | .text] | join("\n\n")' \
-    "$EXECUTION_FILE" 2>/dev/null || true)
+  REVIEW=$(jq -r '[.[] | select(.type == "assistant") | .message.content[]? | select(.type == "text" and (.text | type == "string")) | .text] | join("\n\n")' \
+    "$EXECUTION_FILE" 2>/dev/null || true)
+fi
+
+if [ -z "$REVIEW" ]; then
+  write_fallback
+  echo "::error::Claude review produced no extractable output"
+  exit 1
+fi
+
+printf '%s\n' "$REVIEW" > "$REVIEW_OUT_FILE"
diff --git a/.github/scripts/test-extract-claude-review.sh b/.github/scripts/test-extract-claude-review.sh
@@ -0,0 +1,122 @@
+#!/usr/bin/env bash
+#
+# Local test runner for extract-claude-review.sh.
+#
+# Iterates over fixture files under .github/fixtures/ and asserts the
+# script's exit code and the content of REVIEW_OUT_FILE for each scenario
+# we care about end-to-end:
+#
+#   1. Multi-line .result success — full body preserved (catches the
+#      `tail -1` regression that would silently drop [HIGH] markers).
+#   2. .result containing [HIGH] — survives extraction so the downstream
+#      `grep -qE '\[(CRITICAL|HIGH)\]'` blocking check still fires.
+#   3. SDKResultError (no .result, has assistant text) — falls back to
+#      assistant text concatenation.
+#   4. Empty array — exit 1 with heading-prefixed fallback.
+#   5. Malformed JSON — exit 1 with heading-prefixed fallback (catches
+#      jq exit 5 propagating through `bash -eo pipefail`).
+#   6. Missing file — exit 1 with heading-prefixed fallback.
+#
+# Run from the repo root:
+#   bash .github/scripts/test-extract-claude-review.sh
+
+set -eo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
+FIXTURES_DIR="$REPO_ROOT/.github/fixtures"
+EXTRACT="$SCRIPT_DIR/extract-claude-review.sh"
+
+if [ ! -x "$EXTRACT" ]; then
+  echo "extract-claude-review.sh not found or not executable at $EXTRACT" >&2
+  exit 2
+fi
+
+PASS=0
+FAIL=0
+TMPDIR_ROOT="$(mktemp -d)"
+trap 'rm -rf "$TMPDIR_ROOT"' EXIT
+
+# run_case <label> <expected_exit> <execution_file> <pattern> [<pattern>...]
+#   Runs extract-claude-review.sh against the given fixture and asserts:
+#     - exit code matches expected
+#     - every <pattern> appears at least once in REVIEW_OUT_FILE (grep -E)
+run_case() {
+  local label="$1" expected_exit="$2" execution_file="$3"
+  shift 3
+  local patterns=("$@")
+  local out
+  out="$TMPDIR_ROOT/$(printf '%s' "$label" | tr ' /[]' '____').out"
+
+  local exit_code=0
+  EXECUTION_FILE="$execution_file" \
+  HEADING="## Claude Review - Test Scope" \
+  REVIEW_OUT_FILE="$out" \
+    bash "$EXTRACT" >/dev/null 2>&1 || exit_code=$?
+
+  local result="PASS" reason=""
+  if [ "$exit_code" != "$expected_exit" ]; then
+    result="FAIL"
+    reason="exit=$exit_code expected=$expected_exit"
+  else
+    for pattern in "${patterns[@]}"; do
+      if ! grep -qE "$pattern" "$out"; then
+        result="FAIL"
+        reason="output missing pattern '$pattern'"
+        break
+      fi
+    done
+  fi
+
+  if [ "$result" = "PASS" ]; then
+    PASS=$((PASS + 1))
+    printf '  [PASS] %s\n' "$label"
+  else
+    FAIL=$((FAIL + 1))
+    printf '  [FAIL] %s — %s\n' "$label" "$reason"
+    printf '         output (first 5 lines):\n'
+    head -n 5 "$out" 2>/dev/null | sed 's/^/         | /'
+  fi
+}
+
+echo "Running extract-claude-review.sh fixture tests"
+echo
+
+run_case "success: multi-line .result preserves heading + body + summary" \
+  0 \
+  "$FIXTURES_DIR/claude-execution-success.json" \
+  '^## Claude Review - Test Scope$' \
+  '^Summary: 0 CRITICAL, 0 HIGH'
+
+run_case "success-with-HIGH: bracketed [HIGH] marker survives extraction" \
+  0 \
+  "$FIXTURES_DIR/claude-execution-success-with-high.json" \
+  '\[HIGH\] contracts/src/foo\.cairo:42'
+
+run_case "error: SDKResultError falls back to assistant text concat" \
+  0 \
+  "$FIXTURES_DIR/claude-execution-error.json" \
+  'Starting review of the contracts diff' \
+  'Partial review before error'
+
+run_case "empty array: exits 1 with heading + fallback" \
+  1 \
+  "$FIXTURES_DIR/claude-execution-empty.json" \
+  '^## Claude Review - Test Scope$' \
+  '^No review output was produced\.$'
+
+run_case "malformed JSON: exits 1 with heading + fallback" \
+  1 \
+  "$FIXTURES_DIR/claude-execution-bad.json" \
+  '^## Claude Review - Test Scope$' \
+  '^No review output was produced\.$'
+
+run_case "missing file: exits 1 with heading + fallback" \
+  1 \
+  "$FIXTURES_DIR/does-not-exist.json" \
+  '^## Claude Review - Test Scope$' \
+  '^No review output was produced\.$'
+
+echo
+echo "Result: $PASS passed, $FAIL failed"
+[ "$FAIL" -eq 0 ]
diff --git a/.github/workflows/pr-ci.yml b/.github/workflows/pr-ci.yml
@@ -65,6 +65,7 @@ jobs:
               - ".github/workflows/pr-ci.yml"
               - ".github/prompts/**"
               - ".github/actions/**"
+              - ".github/scripts/**"
 
       # True iff the PR touches a file outside contracts/, client/, indexer/, api/.
       - name: Compute general_review
@@ -502,23 +503,7 @@ jobs:
         env:
           EXECUTION_FILE: ${{ steps.claude.outputs.execution_file }}
           HEADING: "## Claude Review - Cairo/Starknet Contract Review"
-        run: |
-          if [ -z "$EXECUTION_FILE" ] || [ ! -f "$EXECUTION_FILE" ]; then
-            printf '%s\n\nNo review output was produced.\n' "$HEADING" > /tmp/review.txt
-            echo "::error::Claude action did not produce an execution file"
-            exit 1
-          fi
-
-          REVIEW=$(jq -r '[.[] | select(.type == "result")] | last | .result // empty' "$EXECUTION_FILE" 2>/dev/null || true)
-          if [ -z "$REVIEW" ]; then
-            REVIEW=$(jq -r '[.[] | select(.type == "assistant") | .message.content[]? | select(.type == "text") | .text] | join("\n\n")' "$EXECUTION_FILE" 2>/dev/null || true)
-          fi
-          if [ -z "$REVIEW" ]; then
-            printf '%s\n\nNo review output was produced.\n' "$HEADING" > /tmp/review.txt
-            echo "::error::Claude review produced no extractable output"
-            exit 1
-          fi
-          printf '%s\n' "$REVIEW" > /tmp/review.txt
+        run: bash .github/scripts/extract-claude-review.sh
 
       - name: Post review comment
         if: always()
@@ -741,23 +726,7 @@ jobs:
         env:
           EXECUTION_FILE: ${{ steps.claude.outputs.execution_file }}
           HEADING: "## Claude Review - React/Frontend Review"
-        run: |
-          if [ -z "$EXECUTION_FILE" ] || [ ! -f "$EXECUTION_FILE" ]; then
-            printf '%s\n\nNo review output was produced.\n' "$HEADING" > /tmp/review.txt
-            echo "::error::Claude action did not produce an execution file"
-            exit 1
-          fi
-
-          REVIEW=$(jq -r '[.[] | select(.type == "result")] | last | .result // empty' "$EXECUTION_FILE" 2>/dev/null || true)
-          if [ -z "$REVIEW" ]; then
-            REVIEW=$(jq -r '[.[] | select(.type == "assistant") | .message.content[]? | select(.type == "text") | .text] | join("\n\n")' "$EXECUTION_FILE" 2>/dev/null || true)
-          fi
-          if [ -z "$REVIEW" ]; then
-            printf '%s\n\nNo review output was produced.\n' "$HEADING" > /tmp/review.txt
-            echo "::error::Claude review produced no extractable output"
-            exit 1
-          fi
-          printf '%s\n' "$REVIEW" > /tmp/review.txt
+        run: bash .github/scripts/extract-claude-review.sh
 
       - name: Post review comment
         if: always()
@@ -980,23 +949,7 @@ jobs:
         env:
           EXECUTION_FILE: ${{ steps.claude.outputs.execution_file }}
           HEADING: "## Claude Review - Indexer/API Review"
-        run: |
-          if [ -z "$EXECUTION_FILE" ] || [ ! -f "$EXECUTION_FILE" ]; then
-            printf '%s\n\nNo review output was produced.\n' "$HEADING" > /tmp/review.txt
-            echo "::error::Claude action did not produce an execution file"
-            exit 1
-          fi
-
-          REVIEW=$(jq -r '[.[] | select(.type == "result")] | last | .result // empty' "$EXECUTION_FILE" 2>/dev/null || true)
-          if [ -z "$REVIEW" ]; then
-            REVIEW=$(jq -r '[.[] | select(.type == "assistant") | .message.content[]? | select(.type == "text") | .text] | join("\n\n")' "$EXECUTION_FILE" 2>/dev/null || true)
-          fi
-          if [ -z "$REVIEW" ]; then
-            printf '%s\n\nNo review output was produced.\n' "$HEADING" > /tmp/review.txt
-            echo "::error::Claude review produced no extractable output"
-            exit 1
-          fi
-          printf '%s\n' "$REVIEW" > /tmp/review.txt
+        run: bash .github/scripts/extract-claude-review.sh
 
       - name: Post review comment
         if: always()
@@ -1220,23 +1173,7 @@ jobs:
         env:
           EXECUTION_FILE: ${{ steps.claude.outputs.execution_file }}
           HEADING: "## Claude Review - General Engineering Review"
-        run: |
-          if [ -z "$EXECUTION_FILE" ] || [ ! -f "$EXECUTION_FILE" ]; then
-            printf '%s\n\nNo review output was produced.\n' "$HEADING" > /tmp/review.txt
-            echo "::error::Claude action did not produce an execution file"
-            exit 1
-          fi
-
-          REVIEW=$(jq -r '[.[] | select(.type == "result")] | last | .result // empty' "$EXECUTION_FILE" 2>/dev/null || true)
-          if [ -z "$REVIEW" ]; then
-            REVIEW=$(jq -r '[.[] | select(.type == "assistant") | .message.content[]? | select(.type == "text") | .text] | join("\n\n")' "$EXECUTION_FILE" 2>/dev/null || true)
-          fi
-          if [ -z "$REVIEW" ]; then
-            printf '%s\n\nNo review output was produced.\n' "$HEADING" > /tmp/review.txt
-            echo "::error::Claude review produced no extractable output"
-            exit 1
-          fi
-          printf '%s\n' "$REVIEW" > /tmp/review.txt
+        run: bash .github/scripts/extract-claude-review.sh
 
       - name: Post review comment
         if: always()