Skip to content

Benchmark Dashboard

Benchmark Dashboard #3

name: Benchmark Dashboard
on:
workflow_dispatch:
inputs:
provider:
description: "Provider (ollama, copilot, gemini)"
required: true
type: choice
options:
- ollama
- copilot
- gemini
model:
description: "Model name (e.g., qwen-coder-next:cloud)"
required: true
type: string
skill:
description: "Optional specific skill to test"
required: false
type: string
jobs:
run-benchmark:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
ref: main
path: workspace
- uses: astral-sh/setup-uv@v5
with:
enable-cache: true
- name: Run benchmark
working-directory: workspace
env:
OLLAMA_API_KEY: ${{ secrets.OLLAMA_API_KEY }}
run: |
# Install dependencies
uv sync --project tests
# Run benchmark
uv run --project tests tests/evaluator.py \
--provider ${{ inputs.provider }} \
--model ${{ inputs.model }} \
--judge \
--verbose \
--report
# Rename artifact for clarity
if [ -d tests/results ]; then
ARTIFACT_NAME="benchmark-${{ inputs.provider }}-${{ inputs.model }}-$(date +%Y%m%d-%H%M%S)"
mv tests/results "tests/${ARTIFACT_NAME}"
fi
# Create docs/benchmarks if it doesn't exist for publish_benchmarks.py
mkdir -p docs/benchmarks
- name: Generate dashboard
working-directory: workspace
run: |
uv run --project tests python3 ci/publish_benchmarks.py \
--provider ${{ inputs.provider }} \
--model ${{ inputs.model }} \
--branch benchmark-history \
--no-benchmark
- name: Generate dashboard
working-directory: workspace
run: |
uv run --project tests python3 ci/publish_benchmarks.py \
--provider ${{ inputs.provider }} \
--model ${{ inputs.model }} \
--branch benchmark-history
deploy-pages:
needs: run-benchmark
runs-on: ubuntu-latest
steps:
- name: Checkout workspace
uses: actions/checkout@v4
with:
ref: main
path: workspace
- name: Checkout benchmark data
uses: actions/checkout@v4
with:
ref: benchmark-history
path: benchmark-data
- name: Copy results to docs
run: |
mkdir -p workspace/docs/benchmarks
cp benchmark-data/docs/benchmarks.json workspace/docs/benchmarks.json 2>/dev/null || true
cp benchmark-data/docs/index.html workspace/docs/index.html 2>/dev/null || true
# Also copy individual benchmark results if they exist
cp -r benchmark-data/docs/benchmarks/*.json workspace/docs/benchmarks/ 2>/dev/null || true
- name: Commit and push updates
working-directory: workspace
run: |
git config user.name "GitHub Actions"
git config user.email "actions@github.com"
git add docs/
git commit -m "Update benchmark data" || echo "No changes to commit"
git push origin HEAD:benchmark-history