chore: test matrix #43

Workflow file for this run

.github/workflows/skill-validation.yml at c601b33

	name: Skill Validation

	on:
	issue_comment:
	types: [created]

	permissions:
	pull-requests: write
	contents: read

	jobs:
	prepare:
	runs-on: ubuntu-latest
	if: github.event.issue.pull_request && contains(github.event.comment.body, '/test')
	outputs:
	matrix: ${{ steps.matrix.outputs.result }}

	steps:
	- uses: actions/checkout@v4
	- uses: astral-sh/setup-uv@v5

	- name: Parse test command
	id: command
	run: \|
	SKILLS=$(python3 ci/parse_test_command.py "${{ github.event.comment.body }}")
	echo "skills=$SKILLS" >> $GITHUB_OUTPUT
	if [ -n "$SKILLS" ]; then
	echo "Override skills: $SKILLS"
	else
	echo "Auto-detecting changed skills..."
	fi

	- name: Generate matrix
	id: matrix
	env:
	GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	PR_NUMBER: ${{ github.event.issue.number }}
	run: \|
	python3 ci/generate_matrix.py "$PR_NUMBER" ${{ steps.command.outputs.skills }} > /tmp/matrix.json

	echo "result<<EOF" >> $GITHUB_OUTPUT
	cat /tmp/matrix.json >> $GITHUB_OUTPUT
	echo "EOF" >> $GITHUB_OUTPUT

	python3 -c "import json; m=json.load(open('/tmp/matrix.json')); print(f'Generated {len(m[\"include\"])} jobs')"

	evaluate:
	needs: prepare
	runs-on: ubuntu-latest
	strategy:
	fail-fast: false
	max-parallel: 2
	matrix: ${{ fromJson(needs.prepare.outputs.matrix) }}

	steps:
	- uses: actions/checkout@v4

	- uses: actions/setup-node@v4
	if: matrix.provider == 'copilot'
	with:
	node-version: '20'

	- name: Install Copilot CLI
	if: matrix.provider == 'copilot'
	run: npm install -g @github/copilot

	- uses: astral-sh/setup-uv@v5
	with:
	enable-cache: true

	- name: Set artifact name
	id: artifact
	run: \|
	ARTIFACT_NAME="results-${{ matrix.provider }}-${{ matrix.model }}-${{ matrix.skill \|\| 'all' }}"
	ARTIFACT_NAME="${ARTIFACT_NAME//:/--}"
	echo "name=$ARTIFACT_NAME" >> $GITHUB_OUTPUT

	- name: Evaluate ${{ matrix.display_name }}
	env:
	GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	COPILOT_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
	OLLAMA_API_KEY: ${{ secrets.OLLAMA_API_KEY }}
	run: \|
	CMD="uv run --project tests --frozen tests/evaluator.py"
	CMD="$CMD --provider ${{ matrix.provider }}"
	CMD="$CMD --model ${{ matrix.model }}"
	CMD="$CMD --judge --verbose --report --threshold 50"

	if [ -n "${{ matrix.extra_args }}" ]; then
	CMD="$CMD ${{ matrix.extra_args }}"
	fi

	if [ -n "${{ matrix.skill }}" ]; then
	CMD="$CMD --skill ${{ matrix.skill }}"
	else
	CMD="$CMD --all"
	fi

	echo "Running: $CMD"
	eval "$CMD"

	- name: Upload results
	if: always()
	uses: actions/upload-artifact@v4
	with:
	name: ${{ steps.artifact.outputs.name }}
	path: tests/results/
	retention-days: 1

	consolidate:
	needs: evaluate
	runs-on: ubuntu-latest
	if: always()

	steps:
	- uses: actions/checkout@v4

	- uses: actions/download-artifact@v4
	with:
	path: tests/results/

	- name: Consolidate results
	run: python3 ci/consolidate_results.py

	- name: Post to PR
	uses: marocchino/sticky-pull-request-comment@v2
	if: hashFiles('comment.md') != ''
	with:
	number: ${{ github.event.issue.number }}
	path: comment.md

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

chore: test matrix #43

Workflow file

chore: test matrix #43

Uh oh!

Workflow file for this run