diff --git a/.github/workflows/benchmark-manual.yml b/.github/workflows/benchmark-manual.yml new file mode 100644 index 0000000..1b7a5e0 --- /dev/null +++ b/.github/workflows/benchmark-manual.yml @@ -0,0 +1,141 @@ +name: benchmark-manual + +on: + workflow_dispatch: + inputs: + frameworks: + description: "Comma-separated frameworks (modkit,nestjs,baseline,wire,fx,do)" + required: true + default: "modkit,nestjs" + type: string + runs: + description: "Benchmark runs per framework (1-10)" + required: true + default: "3" + type: string + benchmark_requests: + description: "Benchmark requests per run (50-1000)" + required: true + default: "300" + type: string + +concurrency: + group: benchmark-manual-${{ github.ref }}-${{ github.event.inputs.frameworks }} + cancel-in-progress: true + +jobs: + benchmark: + name: Manual bounded benchmark run + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - uses: actions/checkout@v5 + - uses: actions/setup-go@v5 + with: + go-version-file: go.mod + - name: Install benchmark quality tools + run: | + sudo apt-get update + sudo apt-get install -y hyperfine + go install golang.org/x/perf/cmd/benchstat@latest + echo "$(go env GOPATH)/bin" >> "$GITHUB_PATH" + - name: Validate and normalize workflow inputs + id: normalize + shell: bash + env: + INPUT_FRAMEWORKS: ${{ github.event.inputs.frameworks }} + INPUT_RUNS: ${{ github.event.inputs.runs }} + INPUT_BENCHMARK_REQUESTS: ${{ github.event.inputs.benchmark_requests }} + run: | + set -euo pipefail + allowed="modkit nestjs baseline wire fx do" + framework_csv="$(printf '%s' "$INPUT_FRAMEWORKS" | tr -d '[:space:]')" + if [[ -z "$framework_csv" ]]; then + echo "frameworks input must not be empty" >&2 + exit 1 + fi + + IFS=',' read -r -a raw_frameworks <<< "$framework_csv" + if [[ ${#raw_frameworks[@]} -eq 0 || ${#raw_frameworks[@]} -gt 6 ]]; then + echo "frameworks input must contain 1-6 entries" >&2 + exit 1 + fi + + normalized=() + seen_frameworks="," + for framework in "${raw_frameworks[@]}"; do + case " $allowed " in + *" $framework "*) + if [[ "$seen_frameworks" == *",$framework,"* ]]; then + echo "duplicate framework not allowed: $framework" >&2 + exit 1 + fi + normalized+=("$framework") + seen_frameworks+="$framework," + ;; + *) + echo "unsupported framework: $framework" >&2 + exit 1 + ;; + esac + done + + if ! [[ "$INPUT_RUNS" =~ ^[0-9]+$ ]]; then + echo "runs must be an integer" >&2 + exit 1 + fi + if ! [[ "$INPUT_BENCHMARK_REQUESTS" =~ ^[0-9]+$ ]]; then + echo "benchmark_requests must be an integer" >&2 + exit 1 + fi + + BENCH_RUNS="$INPUT_RUNS" + BENCH_REQUESTS="$INPUT_BENCHMARK_REQUESTS" + + if (( BENCH_RUNS < 1 || BENCH_RUNS > 10 )); then + echo "runs must be between 1 and 10" >&2 + exit 1 + fi + if (( BENCH_REQUESTS < 50 || BENCH_REQUESTS > 1000 )); then + echo "benchmark_requests must be between 50 and 1000" >&2 + exit 1 + fi + + { + echo "frameworks=$(IFS=,; echo "${normalized[*]}")" + echo "bench_runs=$BENCH_RUNS" + echo "bench_requests=$BENCH_REQUESTS" + } >> "$GITHUB_OUTPUT" + - name: Run bounded benchmarks + shell: bash + env: + BENCH_ENGINE: hyperfine + run: | + set -euo pipefail + python3 scripts/environment-manifest.py collect-fingerprint --out results/latest/environment.fingerprint.json + IFS=',' read -r -a frameworks <<< "${{ steps.normalize.outputs.frameworks }}" + for framework in "${frameworks[@]}"; do + BENCHMARK_METADATA_MANAGED=1 \ + BENCHMARK_RUNS="${{ steps.normalize.outputs.bench_runs }}" \ + BENCHMARK_REQUESTS="${{ steps.normalize.outputs.bench_requests }}" \ + bash scripts/run-single.sh "$framework" + done + python3 scripts/validate-result-schemas.py raw-check --raw-dir results/latest/raw + python3 scripts/environment-manifest.py write-manifest --raw-dir results/latest/raw --fingerprint results/latest/environment.fingerprint.json --out results/latest/environment.manifest.json + - name: Generate report and policy checks + run: | + python3 scripts/generate-report.py + make benchmark-schema-validate + make ci-benchmark-quality-check + - name: Upload manual benchmark artifacts + uses: actions/upload-artifact@v4 + with: + name: benchmark-manual-results + path: | + results/latest/raw + results/latest/summary.json + results/latest/report.md + results/latest/benchmark-quality-summary.json + results/latest/environment.fingerprint.json + results/latest/environment.manifest.json + retention-days: 14 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d49713d..fd37a70 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,6 +6,10 @@ on: - main pull_request: +concurrency: + group: ci-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: pr-title: name: Validate PR title @@ -35,6 +39,10 @@ jobs: scripts: name: Script smoke tests (skipped targets expected) runs-on: ubuntu-latest + timeout-minutes: 25 + concurrency: + group: benchmark-smoke-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true steps: - uses: actions/checkout@v5 - uses: actions/setup-go@v5 @@ -59,3 +67,4 @@ jobs: with: name: benchmark-quality-summary path: results/latest/benchmark-quality-summary.json + retention-days: 14 diff --git a/METHODOLOGY.md b/METHODOLOGY.md index 77df593..3efddca 100644 --- a/METHODOLOGY.md +++ b/METHODOLOGY.md @@ -36,6 +36,7 @@ - thresholds and required metrics are defined in `stats-policy.yaml` - `make ci-benchmark-quality-check` enforces policy locally and in CI - benchstat comparisons are evaluated against policy baseline framework (`baseline` by default) +- manual CI benchmark runs use bounded workflow inputs (`frameworks` subset, `runs` 1..10, `benchmark_requests` 50..1000) ## Reporting diff --git a/Makefile b/Makefile index f6b7f72..4584c5b 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ GOPATH ?= $(shell $(GO) env GOPATH) GO_PATCH_COVER ?= $(GOPATH)/bin/go-patch-cover MODULES = $(shell find . -type f -name "go.mod" -not -path "*/.*/*" -not -path "*/vendor/*" -exec dirname {} \;) -.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test test-coverage test-patch-coverage tools parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-raw-schema-check benchmark-summary-schema-check benchmark-schema-validate benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check +.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test test-coverage test-patch-coverage tools parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-raw-schema-check benchmark-summary-schema-check benchmark-schema-validate benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check workflow-concurrency-check workflow-budget-check workflow-inputs-check benchmark: bash scripts/run-all.sh @@ -99,3 +99,12 @@ benchmark-benchstat-check: ci-benchmark-quality-check: $(PYTHON) scripts/benchmark-quality-check.py ci-check + +workflow-concurrency-check: + $(PYTHON) scripts/workflow-policy-check.py concurrency-check + +workflow-budget-check: + $(PYTHON) scripts/workflow-policy-check.py budget-check + +workflow-inputs-check: + $(PYTHON) scripts/workflow-policy-check.py inputs-check diff --git a/README.md b/README.md index d407be4..b5d1201 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,9 @@ make ci-benchmark-quality-check Benchmark/report flow enforces schema validation for raw and summary artifacts before quality gates. +Manual bounded benchmark workflow is available in GitHub Actions as `benchmark-manual`. +See `docs/guides/benchmark-workflow.md` for input bounds and execution details. + Use OSS measurement engine (optional): ```bash diff --git a/docs/guides/benchmark-workflow.md b/docs/guides/benchmark-workflow.md index 216828b..2519eb6 100644 --- a/docs/guides/benchmark-workflow.md +++ b/docs/guides/benchmark-workflow.md @@ -25,6 +25,16 @@ make benchmark-nestjs Per-target runs also emit `results/latest/environment.fingerprint.json` and `results/latest/environment.manifest.json`. +## Manual bounded CI run + +Use GitHub Actions workflow `benchmark-manual` with bounded `workflow_dispatch` inputs: + +- `frameworks`: comma-separated subset of `modkit,nestjs,baseline,wire,fx,do` +- `runs`: integer in range `1..10` +- `benchmark_requests`: integer in range `50..1000` + +Runs that exceed bounds are rejected before benchmark execution. + Optional OSS measurement engine: ```bash @@ -77,3 +87,9 @@ Quality thresholds and required metrics are versioned in `stats-policy.yaml`. - run from a clean working tree when possible - keep runtime versions stable - include host and Docker metadata in report notes + +## CI budget policy + +- benchmark smoke job timeout budget: 25 minutes +- benchmark quality summary artifact retention: 14 days +- expected CI compute envelope: one benchmark smoke run per ref due to concurrency cancellation; superseded runs are canceled before full benchmark execution diff --git a/scripts/workflow-policy-check.py b/scripts/workflow-policy-check.py new file mode 100644 index 0000000..bc652c0 --- /dev/null +++ b/scripts/workflow-policy-check.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +from pathlib import Path + + +ROOT = Path(__file__).resolve().parent.parent +CI_WORKFLOW = ROOT / ".github" / "workflows" / "ci.yml" +MANUAL_WORKFLOW = ROOT / ".github" / "workflows" / "benchmark-manual.yml" + + +def read_text(path: Path) -> str: + if not path.exists(): + raise SystemExit(f"Workflow file not found: {path}") + return path.read_text(encoding="utf-8") + + +def assert_contains(text: str, needle: str, err: str) -> None: + if needle not in text: + raise SystemExit(err) + + +def check_concurrency() -> None: + text = read_text(CI_WORKFLOW) + assert_contains( + text, + "concurrency:\n group: ci-${{ github.workflow }}-${{ github.ref }}\n cancel-in-progress: true", + "workflow-concurrency-check failed: top-level workflow concurrency with cancel-in-progress=true is required", + ) + assert_contains( + text, + " scripts:\n name: Script smoke tests (skipped targets expected)", + "workflow-concurrency-check failed: scripts benchmark smoke job is missing", + ) + assert_contains( + text, + " group: benchmark-smoke-${{ github.workflow }}-${{ github.ref }}", + "workflow-concurrency-check failed: scripts job benchmark concurrency group is missing", + ) + assert_contains( + text, + " cancel-in-progress: true", + "workflow-concurrency-check failed: scripts job cancel-in-progress=true is required", + ) + print("workflow-concurrency-check: validated workflow and benchmark job concurrency controls") + + +def check_budget() -> None: + text = read_text(CI_WORKFLOW) + assert_contains( + text, + " scripts:\n name: Script smoke tests (skipped targets expected)\n runs-on: ubuntu-latest\n timeout-minutes: 25", + "workflow-budget-check failed: scripts job timeout-minutes budget must be set to 25", + ) + assert_contains( + text, + " - name: Upload benchmark quality summary\n uses: actions/upload-artifact@v4\n with:\n name: benchmark-quality-summary\n path: results/latest/benchmark-quality-summary.json\n retention-days: 14", + "workflow-budget-check failed: benchmark-quality-summary artifact retention-days must be set", + ) + print("workflow-budget-check: validated timeout budget and artifact retention policy") + + +def check_inputs() -> None: + text = read_text(MANUAL_WORKFLOW) + assert_contains( + text, + "workflow_dispatch:", + "workflow-inputs-check failed: benchmark-manual workflow_dispatch is required", + ) + for key in ("frameworks", "runs", "benchmark_requests"): + assert_contains( + text, + f" {key}:", + f"workflow-inputs-check failed: missing workflow_dispatch input '{key}'", + ) + for token in ( + "INPUT_FRAMEWORKS", + "INPUT_RUNS", + "INPUT_BENCHMARK_REQUESTS", + "BENCH_RUNS", + "BENCH_REQUESTS", + "runs must be between 1 and 10", + "benchmark_requests must be between 50 and 1000", + "duplicate framework not allowed", + ): + assert_contains( + text, + token, + f"workflow-inputs-check failed: missing bounded input token '{token}'", + ) + print("workflow-inputs-check: validated bounded manual workflow inputs") + + +def main() -> None: + parser = argparse.ArgumentParser(description="Validate benchmark workflow safety policies") + parser.add_argument("cmd", choices=["concurrency-check", "budget-check", "inputs-check"]) + args = parser.parse_args() + + if args.cmd == "concurrency-check": + check_concurrency() + elif args.cmd == "budget-check": + check_budget() + elif args.cmd == "inputs-check": + check_inputs() + + +if __name__ == "__main__": + main()