Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/fixtures/claude-execution-bad.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
not valid json
1 change: 1 addition & 0 deletions .github/fixtures/claude-execution-empty.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[]
35 changes: 35 additions & 0 deletions .github/fixtures/claude-execution-error.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
[
{
"type": "system",
"subtype": "init",
"model": "claude-opus-4-7"
},
{
"type": "assistant",
"message": {
"role": "assistant",
"content": [
{ "type": "text", "text": "Starting review of the contracts diff..." }
]
}
},
{
"type": "assistant",
"message": {
"role": "assistant",
"content": [
{ "type": "tool_use", "id": "tool_1", "name": "Bash", "input": { "command": "git diff" } },
{ "type": "text", "text": "Partial review before error" }
]
}
},
{
"type": "result",
"subtype": "error_during_execution",
"is_error": true,
"duration_ms": 3000,
"num_turns": 1,
"errors": ["rate_limit"],
"session_id": "session-error"
}
]
17 changes: 17 additions & 0 deletions .github/fixtures/claude-execution-success-with-high.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
[
{
"type": "system",
"subtype": "init",
"model": "claude-opus-4-7"
},
{
"type": "result",
"subtype": "success",
"is_error": false,
"duration_ms": 8000,
"num_turns": 2,
"result": "## Claude Review - Test Scope\n\n[HIGH] contracts/src/foo.cairo:42 - reentrancy: external call before state update\nImpact: an attacker could re-enter and drain.\nFix: move state update before the call.\n\n[LOW] contracts/src/bar.cairo:10 - missing comment\nImpact: minor readability.\nFix: add a doc comment.\n\nSummary: 0 CRITICAL, 1 HIGH, 0 MEDIUM, 1 LOW, 0 INFO",
"total_cost_usd": 0.04,
"session_id": "session-high"
}
]
27 changes: 27 additions & 0 deletions .github/fixtures/claude-execution-success.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
[
{
"type": "system",
"subtype": "init",
"model": "claude-opus-4-7"
},
{
"type": "assistant",
"message": {
"role": "assistant",
"content": [
{ "type": "text", "text": "Looking at the diff..." },
{ "type": "tool_use", "id": "tool_1", "name": "Bash", "input": { "command": "git diff" } }
]
}
},
{
"type": "result",
"subtype": "success",
"is_error": false,
"duration_ms": 12500,
"num_turns": 3,
"result": "## Claude Review - Test Scope\n\nrun=1 attempt=1 sha=abc scope=test\n\nNo issues found.\n\nSummary: 0 CRITICAL, 0 HIGH, 0 MEDIUM, 0 LOW, 0 INFO",
"total_cost_usd": 0.05,
"session_id": "session-success"
}
]
59 changes: 59 additions & 0 deletions .github/scripts/extract-claude-review.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/usr/bin/env bash
#
# Extract the final review text from a Claude Code action `execution_file`
# and write it to REVIEW_OUT_FILE. On any failure, write a heading-prefixed
# fallback message and exit 1 (with a `::error::` annotation for GitHub
# Actions log surfacing).
#
# Inputs (env):
# EXECUTION_FILE path to the action's execution_file
# (a JSON array of SDKMessage objects per
# base-action/src/run-claude-sdk.ts)
# HEADING markdown heading to prefix the fallback message with
# when extraction fails
# REVIEW_OUT_FILE output path (default: /tmp/review.txt)
#
# Exit codes:
# 0 review extracted successfully (REVIEW_OUT_FILE has the model output)
# 1 no extractable review (REVIEW_OUT_FILE has heading + fallback message)
# 2 invalid invocation (HEADING not set)

set -eo pipefail

REVIEW_OUT_FILE="${REVIEW_OUT_FILE:-/tmp/review.txt}"

if [ -z "${HEADING:-}" ]; then
echo "extract-claude-review: HEADING env var is required" >&2
exit 2
fi

write_fallback() {
printf '%s\n\nNo review output was produced.\n' "$HEADING" > "$REVIEW_OUT_FILE"
}

if [ -z "${EXECUTION_FILE:-}" ] || [ ! -f "$EXECUTION_FILE" ]; then
write_fallback
echo "::error::Claude action did not produce an execution file"
exit 1
fi

# Primary path: SDKResultSuccess has `.result: string` with the final text.
# `[…] | last` collects matching messages into an array and takes the last
# (defensive against any future change that emits multiple result messages).
REVIEW=$(jq -r '[.[] | select(.type == "result")] | last | .result // empty' \
"$EXECUTION_FILE" 2>/dev/null || true)

# Fallback: SDKResultError has no `.result` field. Concatenate every
# assistant text block so reviewers still see partial work in error cases.
if [ -z "$REVIEW" ]; then
REVIEW=$(jq -r '[.[] | select(.type == "assistant") | .message.content[]? | select(.type == "text") | .text] | join("\n\n")' \
"$EXECUTION_FILE" 2>/dev/null || true)
Comment on lines +49 to +50
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The jq filter for assistant text concatenation could fail if the .text field is not a string (e.g., if it's null or an object), as join requires an array of strings. While the || true and 2>/dev/null prevent the script from exiting, it would result in an empty REVIEW and trigger the generic fallback message even if some text was extractable. Adding a type check makes the extraction more robust.

Suggested change
REVIEW=$(jq -r '[.[] | select(.type == "assistant") | .message.content[]? | select(.type == "text") | .text] | join("\n\n")' \
"$EXECUTION_FILE" 2>/dev/null || true)
REVIEW=$(jq -r '[.[] | select(.type == "assistant") | .message.content[]? | select(.type == "text" and (.text | type == "string")) | .text] | join("\n\n")' \
"$EXECUTION_FILE" 2>/dev/null || true)

fi

if [ -z "$REVIEW" ]; then
write_fallback
echo "::error::Claude review produced no extractable output"
exit 1
fi

printf '%s\n' "$REVIEW" > "$REVIEW_OUT_FILE"
122 changes: 122 additions & 0 deletions .github/scripts/test-extract-claude-review.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
#!/usr/bin/env bash
#
# Local test runner for extract-claude-review.sh.
#
# Iterates over fixture files under .github/fixtures/ and asserts the
# script's exit code and the content of REVIEW_OUT_FILE for each scenario
# we care about end-to-end:
#
# 1. Multi-line .result success — full body preserved (catches the
# `tail -1` regression that would silently drop [HIGH] markers).
# 2. .result containing [HIGH] — survives extraction so the downstream
# `grep -qE '\[(CRITICAL|HIGH)\]'` blocking check still fires.
# 3. SDKResultError (no .result, has assistant text) — falls back to
# assistant text concatenation.
# 4. Empty array — exit 1 with heading-prefixed fallback.
# 5. Malformed JSON — exit 1 with heading-prefixed fallback (catches
# jq exit 5 propagating through `bash -eo pipefail`).
# 6. Missing file — exit 1 with heading-prefixed fallback.
#
# Run from the repo root:
# bash .github/scripts/test-extract-claude-review.sh

set -eo pipefail

SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
FIXTURES_DIR="$REPO_ROOT/.github/fixtures"
EXTRACT="$SCRIPT_DIR/extract-claude-review.sh"

if [ ! -x "$EXTRACT" ]; then
echo "extract-claude-review.sh not found or not executable at $EXTRACT" >&2
Comment on lines +30 to +31
exit 2
fi

PASS=0
FAIL=0
TMPDIR_ROOT="$(mktemp -d)"
trap 'rm -rf "$TMPDIR_ROOT"' EXIT

# run_case <label> <expected_exit> <execution_file> <pattern> [<pattern>...]
# Runs extract-claude-review.sh against the given fixture and asserts:
# - exit code matches expected
# - every <pattern> appears at least once in REVIEW_OUT_FILE (grep -E)
run_case() {
local label="$1" expected_exit="$2" execution_file="$3"
shift 3
local patterns=("$@")
local out
out="$TMPDIR_ROOT/$(printf '%s' "$label" | tr ' /[]' '____').out"

local exit_code=0
EXECUTION_FILE="$execution_file" \
HEADING="## Claude Review - Test Scope" \
REVIEW_OUT_FILE="$out" \
bash "$EXTRACT" >/dev/null 2>&1 || exit_code=$?

local result="PASS" reason=""
if [ "$exit_code" != "$expected_exit" ]; then
result="FAIL"
reason="exit=$exit_code expected=$expected_exit"
else
for pattern in "${patterns[@]}"; do
if ! grep -qE "$pattern" "$out"; then
result="FAIL"
reason="output missing pattern '$pattern'"
break
fi
done
fi

if [ "$result" = "PASS" ]; then
PASS=$((PASS + 1))
printf ' [PASS] %s\n' "$label"
else
FAIL=$((FAIL + 1))
printf ' [FAIL] %s — %s\n' "$label" "$reason"
printf ' output (first 5 lines):\n'
head -n 5 "$out" 2>/dev/null | sed 's/^/ | /'
fi
Comment on lines +51 to +79
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The test runner currently suppresses all output from the extraction script (>/dev/null 2>&1). If a test fails due to a system error (e.g., jq missing or a syntax error in the script), the reason is hidden. Capturing stderr to a temporary file and displaying it on failure would significantly improve debuggability.

  local err_out
  err_out="$TMPDIR_ROOT/$(printf '%s' "$label" | tr ' /[]' '____').err"

  local exit_code=0
  EXECUTION_FILE="$execution_file" \
  HEADING="## Claude Review - Test Scope" \
  REVIEW_OUT_FILE="$out" \
    bash "$EXTRACT" >/dev/null 2>"$err_out" || exit_code=$?

  local result="PASS" reason=""
  if [ "$exit_code" != "$expected_exit" ]; then
    result="FAIL"
    reason="exit=$exit_code expected=$expected_exit"
  else
    for pattern in "${patterns[@]}"; do
      if ! grep -qE "$pattern" "$out"; then
        result="FAIL"
        reason="output missing pattern '$pattern'"
        break
      fi
    done
  fi

  if [ "$result" = "PASS" ]; then
    PASS=$((PASS + 1))
    printf '  [PASS] %s\n' "$label"
  else
    FAIL=$((FAIL + 1))
    printf '  [FAIL] %s — %s\n' "$label" "$reason"
    if [ -s "$err_out" ]; then
      printf '         stderr:\n'
      sed 's/^/         | /' "$err_out"
    fi
    printf '         output (first 5 lines):\n'
    head -n 5 "$out" 2>/dev/null | sed 's/^/         | /'
  fi

}

echo "Running extract-claude-review.sh fixture tests"
echo

run_case "success: multi-line .result preserves heading + body + summary" \
0 \
"$FIXTURES_DIR/claude-execution-success.json" \
'^## Claude Review - Test Scope$' \
'^Summary: 0 CRITICAL, 0 HIGH'

run_case "success-with-HIGH: bracketed [HIGH] marker survives extraction" \
0 \
"$FIXTURES_DIR/claude-execution-success-with-high.json" \
'\[HIGH\] contracts/src/foo\.cairo:42'

run_case "error: SDKResultError falls back to assistant text concat" \
0 \
"$FIXTURES_DIR/claude-execution-error.json" \
'Starting review of the contracts diff' \
'Partial review before error'

run_case "empty array: exits 1 with heading + fallback" \
1 \
"$FIXTURES_DIR/claude-execution-empty.json" \
'^## Claude Review - Test Scope$' \
'^No review output was produced\.$'

run_case "malformed JSON: exits 1 with heading + fallback" \
1 \
"$FIXTURES_DIR/claude-execution-bad.json" \
'^## Claude Review - Test Scope$' \
'^No review output was produced\.$'

run_case "missing file: exits 1 with heading + fallback" \
1 \
"$FIXTURES_DIR/does-not-exist.json" \
'^## Claude Review - Test Scope$' \
'^No review output was produced\.$'

echo
echo "Result: $PASS passed, $FAIL failed"
[ "$FAIL" -eq 0 ]
73 changes: 5 additions & 68 deletions .github/workflows/pr-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ jobs:
- ".github/workflows/pr-ci.yml"
- ".github/prompts/**"
- ".github/actions/**"
- ".github/scripts/**"

# True iff the PR touches a file outside contracts/, client/, indexer/, api/.
- name: Compute general_review
Expand Down Expand Up @@ -502,23 +503,7 @@ jobs:
env:
EXECUTION_FILE: ${{ steps.claude.outputs.execution_file }}
HEADING: "## Claude Review - Cairo/Starknet Contract Review"
run: |
if [ -z "$EXECUTION_FILE" ] || [ ! -f "$EXECUTION_FILE" ]; then
printf '%s\n\nNo review output was produced.\n' "$HEADING" > /tmp/review.txt
echo "::error::Claude action did not produce an execution file"
exit 1
fi

REVIEW=$(jq -r '[.[] | select(.type == "result")] | last | .result // empty' "$EXECUTION_FILE" 2>/dev/null || true)
if [ -z "$REVIEW" ]; then
REVIEW=$(jq -r '[.[] | select(.type == "assistant") | .message.content[]? | select(.type == "text") | .text] | join("\n\n")' "$EXECUTION_FILE" 2>/dev/null || true)
fi
if [ -z "$REVIEW" ]; then
printf '%s\n\nNo review output was produced.\n' "$HEADING" > /tmp/review.txt
echo "::error::Claude review produced no extractable output"
exit 1
fi
printf '%s\n' "$REVIEW" > /tmp/review.txt
run: bash .github/scripts/extract-claude-review.sh

- name: Post review comment
if: always()
Expand Down Expand Up @@ -741,23 +726,7 @@ jobs:
env:
EXECUTION_FILE: ${{ steps.claude.outputs.execution_file }}
HEADING: "## Claude Review - React/Frontend Review"
run: |
if [ -z "$EXECUTION_FILE" ] || [ ! -f "$EXECUTION_FILE" ]; then
printf '%s\n\nNo review output was produced.\n' "$HEADING" > /tmp/review.txt
echo "::error::Claude action did not produce an execution file"
exit 1
fi

REVIEW=$(jq -r '[.[] | select(.type == "result")] | last | .result // empty' "$EXECUTION_FILE" 2>/dev/null || true)
if [ -z "$REVIEW" ]; then
REVIEW=$(jq -r '[.[] | select(.type == "assistant") | .message.content[]? | select(.type == "text") | .text] | join("\n\n")' "$EXECUTION_FILE" 2>/dev/null || true)
fi
if [ -z "$REVIEW" ]; then
printf '%s\n\nNo review output was produced.\n' "$HEADING" > /tmp/review.txt
echo "::error::Claude review produced no extractable output"
exit 1
fi
printf '%s\n' "$REVIEW" > /tmp/review.txt
run: bash .github/scripts/extract-claude-review.sh

- name: Post review comment
if: always()
Expand Down Expand Up @@ -980,23 +949,7 @@ jobs:
env:
EXECUTION_FILE: ${{ steps.claude.outputs.execution_file }}
HEADING: "## Claude Review - Indexer/API Review"
run: |
if [ -z "$EXECUTION_FILE" ] || [ ! -f "$EXECUTION_FILE" ]; then
printf '%s\n\nNo review output was produced.\n' "$HEADING" > /tmp/review.txt
echo "::error::Claude action did not produce an execution file"
exit 1
fi

REVIEW=$(jq -r '[.[] | select(.type == "result")] | last | .result // empty' "$EXECUTION_FILE" 2>/dev/null || true)
if [ -z "$REVIEW" ]; then
REVIEW=$(jq -r '[.[] | select(.type == "assistant") | .message.content[]? | select(.type == "text") | .text] | join("\n\n")' "$EXECUTION_FILE" 2>/dev/null || true)
fi
if [ -z "$REVIEW" ]; then
printf '%s\n\nNo review output was produced.\n' "$HEADING" > /tmp/review.txt
echo "::error::Claude review produced no extractable output"
exit 1
fi
printf '%s\n' "$REVIEW" > /tmp/review.txt
run: bash .github/scripts/extract-claude-review.sh

- name: Post review comment
if: always()
Expand Down Expand Up @@ -1220,23 +1173,7 @@ jobs:
env:
EXECUTION_FILE: ${{ steps.claude.outputs.execution_file }}
HEADING: "## Claude Review - General Engineering Review"
run: |
if [ -z "$EXECUTION_FILE" ] || [ ! -f "$EXECUTION_FILE" ]; then
printf '%s\n\nNo review output was produced.\n' "$HEADING" > /tmp/review.txt
echo "::error::Claude action did not produce an execution file"
exit 1
fi

REVIEW=$(jq -r '[.[] | select(.type == "result")] | last | .result // empty' "$EXECUTION_FILE" 2>/dev/null || true)
if [ -z "$REVIEW" ]; then
REVIEW=$(jq -r '[.[] | select(.type == "assistant") | .message.content[]? | select(.type == "text") | .text] | join("\n\n")' "$EXECUTION_FILE" 2>/dev/null || true)
fi
if [ -z "$REVIEW" ]; then
printf '%s\n\nNo review output was produced.\n' "$HEADING" > /tmp/review.txt
echo "::error::Claude review produced no extractable output"
exit 1
fi
printf '%s\n' "$REVIEW" > /tmp/review.txt
run: bash .github/scripts/extract-claude-review.sh

- name: Post review comment
if: always()
Expand Down
Loading