Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 25 additions & 13 deletions .github/workflows/vet-skill.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,13 @@ name: AI Security Review

on:
pull_request:
paths:
- 'skills/**'

jobs:
read-skill:
runs-on: ubuntu-latest
outputs:
content: ${{ steps.skill.outputs.content }}
has_skills: ${{ steps.changed.outputs.any_changed }}
steps:
- uses: actions/checkout@v4

Expand All @@ -21,15 +20,18 @@ jobs:

- name: Read skill content
id: skill
if: steps.changed.outputs.any_changed == 'true'
run: |
DELIMITER="SKILL_CONTENT_$(openssl rand -hex 16)"
CONTENT=$(cat ${{ steps.changed.outputs.all_changed_files }} | head -c 10000)
echo "content<<EOF" >> $GITHUB_OUTPUT
echo "content<<$DELIMITER" >> $GITHUB_OUTPUT
echo "$CONTENT" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
echo "$DELIMITER" >> $GITHUB_OUTPUT

# Agent 1: Claude Opus 4.6 - Anthropic's frontier model
claude-opus-review:
needs: read-skill
if: needs.read-skill.outputs.has_skills == 'true'
runs-on: ubuntu-latest
outputs:
verdict: ${{ steps.analyze.outputs.verdict }}
Expand Down Expand Up @@ -59,10 +61,11 @@ jobs:
CLEAN_JSON=$(echo "$TEXT" | sed 's/^```json//g' | sed 's/^```//g' | sed 's/```$//g' | tr -d '\n' | sed 's/^[[:space:]]*//g')

VERDICT=$(echo "$CLEAN_JSON" | jq -r '.verdict // "ERROR"' 2>/dev/null || echo "ERROR")
DELIMITER="CLAUDE_RESP_$(openssl rand -hex 16)"
echo "verdict=$VERDICT" >> $GITHUB_OUTPUT
echo "response<<EOF" >> $GITHUB_OUTPUT
echo "response<<$DELIMITER" >> $GITHUB_OUTPUT
echo "$CLEAN_JSON" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
echo "$DELIMITER" >> $GITHUB_OUTPUT

# Fail the job if verdict is FAIL (makes it visible in GitHub UI)
if [ "$VERDICT" = "FAIL" ]; then
Expand All @@ -73,6 +76,7 @@ jobs:
# Agent 2: GPT-5.2 - OpenAI's frontier model
openai-review:
needs: read-skill
if: needs.read-skill.outputs.has_skills == 'true'
runs-on: ubuntu-latest
outputs:
verdict: ${{ steps.analyze.outputs.verdict }}
Expand Down Expand Up @@ -103,20 +107,22 @@ jobs:
ERROR=$(echo "$RESPONSE" | jq -r '.error.message // empty')
if [ -n "$ERROR" ]; then
echo "API Error: $ERROR"
DELIMITER="OAI_ERR_$(openssl rand -hex 16)"
echo "verdict=ERROR" >> $GITHUB_OUTPUT
echo "response<<EOF" >> $GITHUB_OUTPUT
echo "response<<$DELIMITER" >> $GITHUB_OUTPUT
echo "{\"verdict\": \"ERROR\", \"issues\": [\"API Error: $ERROR\"], \"reasoning\": \"Failed to call OpenAI API\"}" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
echo "$DELIMITER" >> $GITHUB_OUTPUT
exit 0
fi

CONTENT=$(echo "$RESPONSE" | jq -r '.choices[0].message.content // "{}"')
VERDICT=$(echo "$CONTENT" | jq -r '.verdict // "ERROR"' 2>/dev/null || echo "ERROR")

DELIMITER="OAI_RESP_$(openssl rand -hex 16)"
echo "verdict=$VERDICT" >> $GITHUB_OUTPUT
echo "response<<EOF" >> $GITHUB_OUTPUT
echo "response<<$DELIMITER" >> $GITHUB_OUTPUT
echo "$CONTENT" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
echo "$DELIMITER" >> $GITHUB_OUTPUT

# Fail the job if verdict is FAIL (makes it visible in GitHub UI)
if [ "$VERDICT" = "FAIL" ]; then
Expand All @@ -126,11 +132,16 @@ jobs:

# Aggregate results and post comment
aggregate:
needs: [claude-opus-review, openai-review]
if: always() # Run even if AI review jobs failed (so we can post comment)
needs: [read-skill, claude-opus-review, openai-review]
if: always() # Run even if AI review jobs were skipped or failed
runs-on: ubuntu-latest
steps:
- name: Skip if no skills changed
if: needs.read-skill.outputs.has_skills != 'true'
run: echo "No skill files changed, skipping AI review"

- name: Aggregate Verdicts
if: needs.read-skill.outputs.has_skills == 'true'
id: aggregate
run: |
VERDICTS="${{ needs.claude-opus-review.outputs.verdict }},${{ needs.openai-review.outputs.verdict }}"
Expand All @@ -149,6 +160,7 @@ jobs:
fi

- name: Post Review Comment
if: needs.read-skill.outputs.has_skills == 'true'
uses: actions/github-script@v7
env:
CLAUDE_VERDICT: ${{ needs.claude-opus-review.outputs.verdict }}
Expand Down Expand Up @@ -198,5 +210,5 @@ jobs:
});

- name: Fail if any FAIL verdict
if: steps.aggregate.outputs.final == 'FAIL'
if: needs.read-skill.outputs.has_skills == 'true' && steps.aggregate.outputs.final == 'FAIL'
run: exit 1