Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 141 additions & 0 deletions .github/workflows/benchmark-manual.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
name: benchmark-manual

on:
workflow_dispatch:
inputs:
frameworks:
description: "Comma-separated frameworks (modkit,nestjs,baseline,wire,fx,do)"
required: true
default: "modkit,nestjs"
type: string
runs:
description: "Benchmark runs per framework (1-10)"
required: true
default: "3"
type: string
benchmark_requests:
description: "Benchmark requests per run (50-1000)"
required: true
default: "300"
type: string

concurrency:
group: benchmark-manual-${{ github.ref }}-${{ github.event.inputs.frameworks }}
cancel-in-progress: true

jobs:
benchmark:
name: Manual bounded benchmark run
runs-on: ubuntu-latest
timeout-minutes: 30
steps:
- uses: actions/checkout@v5
- uses: actions/setup-go@v5
with:
go-version-file: go.mod
- name: Install benchmark quality tools
run: |
sudo apt-get update
sudo apt-get install -y hyperfine
go install golang.org/x/perf/cmd/benchstat@latest
echo "$(go env GOPATH)/bin" >> "$GITHUB_PATH"
- name: Validate and normalize workflow inputs
id: normalize
shell: bash
env:
INPUT_FRAMEWORKS: ${{ github.event.inputs.frameworks }}
INPUT_RUNS: ${{ github.event.inputs.runs }}
INPUT_BENCHMARK_REQUESTS: ${{ github.event.inputs.benchmark_requests }}
run: |
set -euo pipefail
allowed="modkit nestjs baseline wire fx do"
framework_csv="$(printf '%s' "$INPUT_FRAMEWORKS" | tr -d '[:space:]')"
if [[ -z "$framework_csv" ]]; then
echo "frameworks input must not be empty" >&2
exit 1
fi

IFS=',' read -r -a raw_frameworks <<< "$framework_csv"
if [[ ${#raw_frameworks[@]} -eq 0 || ${#raw_frameworks[@]} -gt 6 ]]; then
echo "frameworks input must contain 1-6 entries" >&2
exit 1
fi

normalized=()
seen_frameworks=","
for framework in "${raw_frameworks[@]}"; do
case " $allowed " in
*" $framework "*)
if [[ "$seen_frameworks" == *",$framework,"* ]]; then
echo "duplicate framework not allowed: $framework" >&2
exit 1
fi
normalized+=("$framework")
seen_frameworks+="$framework,"
;;
*)
echo "unsupported framework: $framework" >&2
exit 1
;;
esac
done

if ! [[ "$INPUT_RUNS" =~ ^[0-9]+$ ]]; then
echo "runs must be an integer" >&2
exit 1
fi
if ! [[ "$INPUT_BENCHMARK_REQUESTS" =~ ^[0-9]+$ ]]; then
echo "benchmark_requests must be an integer" >&2
exit 1
fi

BENCH_RUNS="$INPUT_RUNS"
BENCH_REQUESTS="$INPUT_BENCHMARK_REQUESTS"

if (( BENCH_RUNS < 1 || BENCH_RUNS > 10 )); then
echo "runs must be between 1 and 10" >&2
exit 1
fi
if (( BENCH_REQUESTS < 50 || BENCH_REQUESTS > 1000 )); then
echo "benchmark_requests must be between 50 and 1000" >&2
exit 1
fi

{
echo "frameworks=$(IFS=,; echo "${normalized[*]}")"
echo "bench_runs=$BENCH_RUNS"
echo "bench_requests=$BENCH_REQUESTS"
} >> "$GITHUB_OUTPUT"
- name: Run bounded benchmarks
shell: bash
env:
BENCH_ENGINE: hyperfine
run: |
set -euo pipefail
python3 scripts/environment-manifest.py collect-fingerprint --out results/latest/environment.fingerprint.json
IFS=',' read -r -a frameworks <<< "${{ steps.normalize.outputs.frameworks }}"
for framework in "${frameworks[@]}"; do
BENCHMARK_METADATA_MANAGED=1 \
BENCHMARK_RUNS="${{ steps.normalize.outputs.bench_runs }}" \
BENCHMARK_REQUESTS="${{ steps.normalize.outputs.bench_requests }}" \
bash scripts/run-single.sh "$framework"
done
python3 scripts/validate-result-schemas.py raw-check --raw-dir results/latest/raw
python3 scripts/environment-manifest.py write-manifest --raw-dir results/latest/raw --fingerprint results/latest/environment.fingerprint.json --out results/latest/environment.manifest.json
- name: Generate report and policy checks
run: |
python3 scripts/generate-report.py
make benchmark-schema-validate
make ci-benchmark-quality-check
- name: Upload manual benchmark artifacts
uses: actions/upload-artifact@v4
with:
name: benchmark-manual-results
path: |
results/latest/raw
results/latest/summary.json
results/latest/report.md
results/latest/benchmark-quality-summary.json
results/latest/environment.fingerprint.json
results/latest/environment.manifest.json
retention-days: 14
9 changes: 9 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ on:
- main
pull_request:

concurrency:
group: ci-${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
pr-title:
name: Validate PR title
Expand Down Expand Up @@ -35,6 +39,10 @@ jobs:
scripts:
name: Script smoke tests (skipped targets expected)
runs-on: ubuntu-latest
timeout-minutes: 25
concurrency:
group: benchmark-smoke-${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
steps:
- uses: actions/checkout@v5
- uses: actions/setup-go@v5
Expand All @@ -59,3 +67,4 @@ jobs:
with:
name: benchmark-quality-summary
path: results/latest/benchmark-quality-summary.json
retention-days: 14
1 change: 1 addition & 0 deletions METHODOLOGY.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
- thresholds and required metrics are defined in `stats-policy.yaml`
- `make ci-benchmark-quality-check` enforces policy locally and in CI
- benchstat comparisons are evaluated against policy baseline framework (`baseline` by default)
- manual CI benchmark runs use bounded workflow inputs (`frameworks` subset, `runs` 1..10, `benchmark_requests` 50..1000)

## Reporting

Expand Down
11 changes: 10 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ GOPATH ?= $(shell $(GO) env GOPATH)
GO_PATCH_COVER ?= $(GOPATH)/bin/go-patch-cover
MODULES = $(shell find . -type f -name "go.mod" -not -path "*/.*/*" -not -path "*/vendor/*" -exec dirname {} \;)

.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test test-coverage test-patch-coverage tools parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-raw-schema-check benchmark-summary-schema-check benchmark-schema-validate benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check
.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test test-coverage test-patch-coverage tools parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-raw-schema-check benchmark-summary-schema-check benchmark-schema-validate benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check workflow-concurrency-check workflow-budget-check workflow-inputs-check

benchmark:
bash scripts/run-all.sh
Expand Down Expand Up @@ -99,3 +99,12 @@ benchmark-benchstat-check:

ci-benchmark-quality-check:
$(PYTHON) scripts/benchmark-quality-check.py ci-check

workflow-concurrency-check:
$(PYTHON) scripts/workflow-policy-check.py concurrency-check

workflow-budget-check:
$(PYTHON) scripts/workflow-policy-check.py budget-check

workflow-inputs-check:
$(PYTHON) scripts/workflow-policy-check.py inputs-check
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ make ci-benchmark-quality-check

Benchmark/report flow enforces schema validation for raw and summary artifacts before quality gates.

Manual bounded benchmark workflow is available in GitHub Actions as `benchmark-manual`.
See `docs/guides/benchmark-workflow.md` for input bounds and execution details.

Use OSS measurement engine (optional):

```bash
Expand Down
16 changes: 16 additions & 0 deletions docs/guides/benchmark-workflow.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,16 @@ make benchmark-nestjs

Per-target runs also emit `results/latest/environment.fingerprint.json` and `results/latest/environment.manifest.json`.

## Manual bounded CI run

Use GitHub Actions workflow `benchmark-manual` with bounded `workflow_dispatch` inputs:

- `frameworks`: comma-separated subset of `modkit,nestjs,baseline,wire,fx,do`
- `runs`: integer in range `1..10`
- `benchmark_requests`: integer in range `50..1000`

Runs that exceed bounds are rejected before benchmark execution.

Optional OSS measurement engine:

```bash
Expand Down Expand Up @@ -77,3 +87,9 @@ Quality thresholds and required metrics are versioned in `stats-policy.yaml`.
- run from a clean working tree when possible
- keep runtime versions stable
- include host and Docker metadata in report notes

## CI budget policy

- benchmark smoke job timeout budget: 25 minutes
- benchmark quality summary artifact retention: 14 days
- expected CI compute envelope: one benchmark smoke run per ref due to concurrency cancellation; superseded runs are canceled before full benchmark execution
109 changes: 109 additions & 0 deletions scripts/workflow-policy-check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
#!/usr/bin/env python3
from __future__ import annotations

import argparse
from pathlib import Path


ROOT = Path(__file__).resolve().parent.parent
CI_WORKFLOW = ROOT / ".github" / "workflows" / "ci.yml"
MANUAL_WORKFLOW = ROOT / ".github" / "workflows" / "benchmark-manual.yml"


def read_text(path: Path) -> str:
if not path.exists():
raise SystemExit(f"Workflow file not found: {path}")
return path.read_text(encoding="utf-8")


def assert_contains(text: str, needle: str, err: str) -> None:
if needle not in text:
raise SystemExit(err)


def check_concurrency() -> None:
text = read_text(CI_WORKFLOW)
assert_contains(
text,
"concurrency:\n group: ci-${{ github.workflow }}-${{ github.ref }}\n cancel-in-progress: true",
"workflow-concurrency-check failed: top-level workflow concurrency with cancel-in-progress=true is required",
)
assert_contains(
text,
" scripts:\n name: Script smoke tests (skipped targets expected)",
"workflow-concurrency-check failed: scripts benchmark smoke job is missing",
)
assert_contains(
text,
" group: benchmark-smoke-${{ github.workflow }}-${{ github.ref }}",
"workflow-concurrency-check failed: scripts job benchmark concurrency group is missing",
)
assert_contains(
text,
" cancel-in-progress: true",
"workflow-concurrency-check failed: scripts job cancel-in-progress=true is required",
)
print("workflow-concurrency-check: validated workflow and benchmark job concurrency controls")


def check_budget() -> None:
text = read_text(CI_WORKFLOW)
assert_contains(
text,
" scripts:\n name: Script smoke tests (skipped targets expected)\n runs-on: ubuntu-latest\n timeout-minutes: 25",
"workflow-budget-check failed: scripts job timeout-minutes budget must be set to 25",
)
assert_contains(
text,
" - name: Upload benchmark quality summary\n uses: actions/upload-artifact@v4\n with:\n name: benchmark-quality-summary\n path: results/latest/benchmark-quality-summary.json\n retention-days: 14",
"workflow-budget-check failed: benchmark-quality-summary artifact retention-days must be set",
)
print("workflow-budget-check: validated timeout budget and artifact retention policy")


def check_inputs() -> None:
text = read_text(MANUAL_WORKFLOW)
assert_contains(
text,
"workflow_dispatch:",
"workflow-inputs-check failed: benchmark-manual workflow_dispatch is required",
)
for key in ("frameworks", "runs", "benchmark_requests"):
assert_contains(
text,
f" {key}:",
f"workflow-inputs-check failed: missing workflow_dispatch input '{key}'",
)
for token in (
"INPUT_FRAMEWORKS",
"INPUT_RUNS",
"INPUT_BENCHMARK_REQUESTS",
"BENCH_RUNS",
"BENCH_REQUESTS",
"runs must be between 1 and 10",
"benchmark_requests must be between 50 and 1000",
"duplicate framework not allowed",
):
assert_contains(
text,
token,
f"workflow-inputs-check failed: missing bounded input token '{token}'",
)
print("workflow-inputs-check: validated bounded manual workflow inputs")


def main() -> None:
parser = argparse.ArgumentParser(description="Validate benchmark workflow safety policies")
parser.add_argument("cmd", choices=["concurrency-check", "budget-check", "inputs-check"])
args = parser.parse_args()

if args.cmd == "concurrency-check":
check_concurrency()
elif args.cmd == "budget-check":
check_budget()
elif args.cmd == "inputs-check":
check_inputs()


if __name__ == "__main__":
main()
Loading