Skip to content

chore: test matrix

chore: test matrix #43

name: Skill Validation
on:
issue_comment:
types: [created]
permissions:
pull-requests: write
contents: read
jobs:
prepare:
runs-on: ubuntu-latest
if: github.event.issue.pull_request && contains(github.event.comment.body, '/test')
outputs:
matrix: ${{ steps.matrix.outputs.result }}
steps:
- uses: actions/checkout@v4
- uses: astral-sh/setup-uv@v5
- name: Parse test command
id: command
run: |
SKILLS=$(python3 ci/parse_test_command.py "${{ github.event.comment.body }}")
echo "skills=$SKILLS" >> $GITHUB_OUTPUT
if [ -n "$SKILLS" ]; then
echo "Override skills: $SKILLS"
else
echo "Auto-detecting changed skills..."
fi
- name: Generate matrix
id: matrix
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PR_NUMBER: ${{ github.event.issue.number }}
run: |
python3 ci/generate_matrix.py "$PR_NUMBER" ${{ steps.command.outputs.skills }} > /tmp/matrix.json
echo "result<<EOF" >> $GITHUB_OUTPUT
cat /tmp/matrix.json >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
python3 -c "import json; m=json.load(open('/tmp/matrix.json')); print(f'Generated {len(m[\"include\"])} jobs')"
evaluate:
needs: prepare
runs-on: ubuntu-latest
strategy:
fail-fast: false
max-parallel: 2
matrix: ${{ fromJson(needs.prepare.outputs.matrix) }}
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
if: matrix.provider == 'copilot'
with:
node-version: '20'
- name: Install Copilot CLI
if: matrix.provider == 'copilot'
run: npm install -g @github/copilot
- uses: astral-sh/setup-uv@v5
with:
enable-cache: true
- name: Set artifact name
id: artifact
run: |
ARTIFACT_NAME="results-${{ matrix.provider }}-${{ matrix.model }}-${{ matrix.skill || 'all' }}"
ARTIFACT_NAME="${ARTIFACT_NAME//:/--}"
echo "name=$ARTIFACT_NAME" >> $GITHUB_OUTPUT
- name: Evaluate ${{ matrix.display_name }}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
COPILOT_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
OLLAMA_API_KEY: ${{ secrets.OLLAMA_API_KEY }}
run: |
CMD="uv run --project tests --frozen tests/evaluator.py"
CMD="$CMD --provider ${{ matrix.provider }}"
CMD="$CMD --model ${{ matrix.model }}"
CMD="$CMD --judge --verbose --report --threshold 50"
if [ -n "${{ matrix.extra_args }}" ]; then
CMD="$CMD ${{ matrix.extra_args }}"
fi
if [ -n "${{ matrix.skill }}" ]; then
CMD="$CMD --skill ${{ matrix.skill }}"
else
CMD="$CMD --all"
fi
echo "Running: $CMD"
eval "$CMD"
- name: Upload results
if: always()
uses: actions/upload-artifact@v4
with:
name: ${{ steps.artifact.outputs.name }}
path: tests/results/
retention-days: 1
consolidate:
needs: evaluate
runs-on: ubuntu-latest
if: always()
steps:
- uses: actions/checkout@v4
- uses: actions/download-artifact@v4
with:
path: tests/results/
- name: Consolidate results
run: python3 ci/consolidate_results.py
- name: Post to PR
uses: marocchino/sticky-pull-request-comment@v2
if: hashFiles('comment.md') != ''
with:
number: ${{ github.event.issue.number }}
path: comment.md