diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a1f2a95..d49713d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -50,6 +50,8 @@ jobs: run: bash scripts/run-all.sh - name: Generate report from raw results run: python3 scripts/generate-report.py + - name: Validate benchmark result schemas + run: make benchmark-schema-validate - name: Run statistical quality gate run: make ci-benchmark-quality-check - name: Upload benchmark quality summary diff --git a/.gitignore b/.gitignore index 280124e..92cb619 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,6 @@ coverage.out # Python cache/bytecode **/__pycache__/ *.py[cod] + +# Local worktrees +.worktrees/ diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a5289d9..98201f6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -23,15 +23,28 @@ Run these before opening a PR: ```bash go test ./... +make test-coverage TARGET=http://localhost:3001 bash scripts/parity-check.sh ``` +For patch coverage against `origin/main`: + +```bash +make test-patch-coverage +``` + If you changed scripts, also run shell linting if available: ```bash shellcheck scripts/*.sh ``` +If you changed benchmark artifacts or report generation, also run schema validation: + +```bash +make benchmark-schema-validate +``` + ## Pull request process 1. Create a branch from `main`. diff --git a/Makefile b/Makefile index 0755546..f6b7f72 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,11 @@ -.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check +SHELL := /bin/sh +PYTHON ?= python3 +GO ?= go +GOPATH ?= $(shell $(GO) env GOPATH) +GO_PATCH_COVER ?= $(GOPATH)/bin/go-patch-cover +MODULES = $(shell find . -type f -name "go.mod" -not -path "*/.*/*" -not -path "*/vendor/*" -exec dirname {} \;) + +.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test test-coverage test-patch-coverage tools parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-raw-schema-check benchmark-summary-schema-check benchmark-schema-validate benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check benchmark: bash scripts/run-all.sh @@ -22,10 +29,36 @@ benchmark-do: bash scripts/run-single.sh do report: - python3 scripts/generate-report.py + $(PYTHON) scripts/generate-report.py test: - go test ./... + $(GO) test ./... + +test-coverage: + @mkdir -p .coverage + @echo "mode: atomic" > .coverage/coverage.out + @for mod in $(MODULES); do \ + echo "Testing coverage for module: $$mod"; \ + (cd $$mod && $(GO) test -coverprofile=profile.out -covermode=atomic ./...) || exit 1; \ + if [ -f $$mod/profile.out ]; then \ + tail -n +2 $$mod/profile.out >> .coverage/coverage.out; \ + rm $$mod/profile.out; \ + fi; \ + done + @printf "\nTotal Coverage:\n" + @$(GO) tool cover -func=.coverage/coverage.out | grep "total:" + +test-patch-coverage: tools test-coverage + @echo "Comparing against origin/main..." + @git diff -U0 --no-color origin/main...HEAD > .coverage/diff.patch + @$(GO_PATCH_COVER) .coverage/coverage.out .coverage/diff.patch > .coverage/patch_coverage.out + @echo "Patch Coverage Report:" + @cat .coverage/patch_coverage.out + +tools: + @echo "Installing development tools..." + @$(GO) install github.com/seriousben/go-patch-cover/cmd/go-patch-cover@latest + @echo "Done: go-patch-cover installed" parity-check: TARGET="$(PARITY_TARGET)" bash scripts/parity-check.sh @@ -37,22 +70,32 @@ parity-check-nestjs: TARGET=http://localhost:3002 bash scripts/parity-check.sh benchmark-fingerprint-check: - python3 scripts/environment-manifest.py check-fingerprint --file results/latest/environment.fingerprint.json + $(PYTHON) scripts/environment-manifest.py check-fingerprint --file results/latest/environment.fingerprint.json benchmark-limits-check: - python3 scripts/environment-manifest.py check-limits --compose docker-compose.yml + $(PYTHON) scripts/environment-manifest.py check-limits --compose docker-compose.yml benchmark-manifest-check: - python3 scripts/environment-manifest.py check-manifest --file results/latest/environment.manifest.json + $(PYTHON) scripts/environment-manifest.py check-manifest --file results/latest/environment.manifest.json + +benchmark-raw-schema-check: + $(PYTHON) scripts/validate-result-schemas.py raw-check + +benchmark-summary-schema-check: + $(PYTHON) scripts/validate-result-schemas.py summary-check + +benchmark-schema-validate: + $(MAKE) benchmark-raw-schema-check + $(MAKE) benchmark-summary-schema-check benchmark-stats-check: - python3 scripts/benchmark-quality-check.py stats-check + $(PYTHON) scripts/benchmark-quality-check.py stats-check benchmark-variance-check: - python3 scripts/benchmark-quality-check.py variance-check + $(PYTHON) scripts/benchmark-quality-check.py variance-check benchmark-benchstat-check: - python3 scripts/benchmark-quality-check.py benchstat-check + $(PYTHON) scripts/benchmark-quality-check.py benchstat-check ci-benchmark-quality-check: - python3 scripts/benchmark-quality-check.py ci-check + $(PYTHON) scripts/benchmark-quality-check.py ci-check diff --git a/README.md b/README.md index a0865ec..d407be4 100644 --- a/README.md +++ b/README.md @@ -21,9 +21,12 @@ Run benchmark orchestration and generate a report: ```bash make benchmark make report +make benchmark-schema-validate make ci-benchmark-quality-check ``` +Benchmark/report flow enforces schema validation for raw and summary artifacts before quality gates. + Use OSS measurement engine (optional): ```bash @@ -36,6 +39,7 @@ BENCH_ENGINE=hyperfine make benchmark - Python 3 - hyperfine (optional benchmark engine) - benchstat (`go install golang.org/x/perf/cmd/benchstat@latest`) +- go-patch-cover (`go install github.com/seriousben/go-patch-cover/cmd/go-patch-cover@latest`, for `make test-patch-coverage`) ## Repository layout diff --git a/docs/guides/benchmark-workflow.md b/docs/guides/benchmark-workflow.md index 8485d4c..216828b 100644 --- a/docs/guides/benchmark-workflow.md +++ b/docs/guides/benchmark-workflow.md @@ -13,6 +13,7 @@ ```bash make benchmark make report +make benchmark-schema-validate ``` ## Per-target run @@ -56,10 +57,13 @@ Benchmark scripts must run parity first for each target. If parity fails, skip b - `results/latest/report.md` - markdown report - `results/latest/benchmark-quality-summary.json` - policy quality gate output - `results/latest/tooling/benchstat/*.txt` - benchstat comparison outputs +- `schemas/benchmark-raw-v1.schema.json` - raw benchmark artifact contract +- `schemas/benchmark-summary-v1.schema.json` - summary artifact contract ## Quality checks ```bash +make benchmark-schema-validate make benchmark-stats-check make benchmark-variance-check make benchmark-benchstat-check diff --git a/schemas/benchmark-raw-v1.schema.json b/schemas/benchmark-raw-v1.schema.json new file mode 100644 index 0000000..0c08480 --- /dev/null +++ b/schemas/benchmark-raw-v1.schema.json @@ -0,0 +1,110 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://go-modkit.dev/schemas/benchmark-raw-v1.schema.json", + "title": "Benchmark Raw Result v1", + "type": "object", + "required": [ + "schema_version", + "framework", + "target", + "status" + ], + "additionalProperties": true, + "properties": { + "schema_version": { + "const": "raw-v1" + }, + "framework": { + "type": "string", + "minLength": 1 + }, + "target": { + "type": "string", + "minLength": 1 + }, + "status": { + "type": "string", + "enum": [ + "ok", + "skipped" + ] + }, + "reason": { + "type": "string" + }, + "parity": { + "type": "string" + }, + "engine": { + "type": "string" + }, + "metric_units": { + "type": "object" + }, + "benchmark": { + "type": "object", + "properties": { + "run_stats": { + "type": "array" + }, + "median": { + "type": "object" + } + } + }, + "docker": { + "type": "object" + }, + "resources_normalized": { + "type": "object" + } + }, + "allOf": [ + { + "if": { + "properties": { + "status": { + "const": "ok" + } + }, + "required": [ + "status" + ] + }, + "then": { + "required": [ + "parity", + "engine", + "metric_units", + "benchmark", + "resources_normalized" + ], + "properties": { + "benchmark": { + "required": [ + "run_stats", + "median" + ] + } + } + } + }, + { + "if": { + "properties": { + "status": { + "const": "skipped" + } + }, + "required": [ + "status" + ] + }, + "then": { + "required": [ + "reason" + ] + } + } + ] +} diff --git a/schemas/benchmark-summary-v1.schema.json b/schemas/benchmark-summary-v1.schema.json new file mode 100644 index 0000000..aa98a05 --- /dev/null +++ b/schemas/benchmark-summary-v1.schema.json @@ -0,0 +1,132 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://go-modkit.dev/schemas/benchmark-summary-v1.schema.json", + "title": "Benchmark Summary v1", + "type": "object", + "required": [ + "schema_version", + "generated_at", + "total_targets", + "successful_targets", + "skipped_targets", + "targets" + ], + "additionalProperties": true, + "properties": { + "schema_version": { + "const": "summary-v1" + }, + "generated_at": { + "type": "string", + "minLength": 1 + }, + "total_targets": { + "type": "integer", + "minimum": 0 + }, + "successful_targets": { + "type": "integer", + "minimum": 0 + }, + "skipped_targets": { + "type": "integer", + "minimum": 0 + }, + "targets": { + "type": "array", + "items": { + "type": "object", + "required": [ + "framework", + "status", + "target", + "provenance" + ], + "additionalProperties": true, + "properties": { + "framework": { + "type": "string", + "minLength": 1 + }, + "status": { + "type": "string", + "enum": [ + "ok", + "skipped" + ] + }, + "target": { + "type": "string", + "minLength": 1 + }, + "reason": { + "type": "string" + }, + "median": { + "type": "object" + }, + "uncertainty": { + "type": "object" + }, + "provenance": { + "type": "object", + "required": [ + "raw_source" + ], + "properties": { + "raw_source": { + "type": "string", + "minLength": 1 + }, + "manifest": { + "type": "string", + "minLength": 1 + } + } + } + } + } + } + }, + "allOf": [ + { + "if": { + "properties": { + "targets": { + "type": "array" + } + }, + "required": [ + "targets" + ] + }, + "then": { + "properties": { + "targets": { + "items": { + "allOf": [ + { + "if": { + "properties": { + "status": { + "const": "ok" + } + }, + "required": [ + "status" + ] + }, + "then": { + "required": [ + "uncertainty" + ] + } + } + ] + } + } + } + } + } + ] +} diff --git a/scripts/benchmark-measure.py b/scripts/benchmark-measure.py index 166e815..cb23599 100644 --- a/scripts/benchmark-measure.py +++ b/scripts/benchmark-measure.py @@ -241,6 +241,7 @@ def main(): if not run_stats: payload = { + "schema_version": "raw-v1", "framework": args.framework, "target": args.target, "status": "skipped", @@ -279,6 +280,7 @@ def main(): docker_stats = collect_docker_stats(args.framework) payload = { + "schema_version": "raw-v1", "framework": args.framework, "target": args.target, "status": "ok", diff --git a/scripts/generate-report.py b/scripts/generate-report.py index 3da31ba..faeb914 100755 --- a/scripts/generate-report.py +++ b/scripts/generate-report.py @@ -1,5 +1,7 @@ #!/usr/bin/env python3 import json +import subprocess +import sys from datetime import datetime, timezone from pathlib import Path @@ -11,6 +13,13 @@ REPORT_PATH = RESULTS_LATEST / "report.md" +def run_schema_check(command): + completed = subprocess.run(command, capture_output=True, text=True, check=False) + if completed.returncode != 0: + message = completed.stderr.strip() or completed.stdout.strip() or "schema validation failed" + raise SystemExit(message) + + def load_raw_files(): if not RAW_DIR.exists(): return [] @@ -18,7 +27,9 @@ def load_raw_files(): for path in sorted(RAW_DIR.glob("*.json")): try: with path.open("r", encoding="utf-8") as f: - rows.append(json.load(f)) + payload = json.load(f) + payload["_source_file"] = path.name + rows.append(payload) except json.JSONDecodeError as exc: print(f"Warning: skipping malformed JSON {path}: {exc}") return rows @@ -27,6 +38,7 @@ def load_raw_files(): def build_summary(rows): generated_at = datetime.now(timezone.utc).isoformat() summary = { + "schema_version": "summary-v1", "generated_at": generated_at, "total_targets": len(rows), "successful_targets": sum(1 for r in rows if r.get("status") == "ok"), @@ -39,8 +51,18 @@ def build_summary(rows): "status": row.get("status"), "target": row.get("target"), "reason": row.get("reason"), + "provenance": { + "raw_source": f"results/latest/raw/{row.get('_source_file', 'unknown')}" + }, } bench = row.get("benchmark") or {} + quality = (bench.get("quality") or {}).get("variance") or {} + if quality: + target["uncertainty"] = { + "rps_cv": quality.get("rps_cv"), + "latency_ms_p95_cv": quality.get("latency_ms_p95_cv"), + "latency_ms_p99_cv": quality.get("latency_ms_p99_cv"), + } median = bench.get("median") or {} if median: target["median"] = { @@ -104,9 +126,11 @@ def write_report(summary): def main(): + run_schema_check([sys.executable, "scripts/validate-result-schemas.py", "raw-check"]) rows = load_raw_files() summary = build_summary(rows) write_summary(summary) + run_schema_check([sys.executable, "scripts/validate-result-schemas.py", "summary-check"]) write_report(summary) print(f"Wrote: {SUMMARY_PATH}") print(f"Wrote: {REPORT_PATH}") diff --git a/scripts/run-all.sh b/scripts/run-all.sh index 52f6e4f..8cbebf6 100755 --- a/scripts/run-all.sh +++ b/scripts/run-all.sh @@ -71,6 +71,8 @@ for framework in "${frameworks[@]}"; do BENCHMARK_METADATA_MANAGED=1 bash scripts/run-single.sh "$framework" done +python3 scripts/validate-result-schemas.py raw-check --raw-dir "$raw_dir" + python3 scripts/environment-manifest.py write-manifest --raw-dir "$raw_dir" --fingerprint "$fingerprint_file" --out "$manifest_file" echo "Raw benchmark files generated in: $raw_dir" diff --git a/scripts/run-single.sh b/scripts/run-single.sh index 2e86241..cbfd7cf 100755 --- a/scripts/run-single.sh +++ b/scripts/run-single.sh @@ -96,6 +96,7 @@ if ! curl -fsS "$target/health" >/dev/null 2>&1; then import json, sys framework, target, out_file = sys.argv[1], sys.argv[2], sys.argv[3] payload = { + "schema_version": "raw-v1", "framework": framework, "target": target, "status": "skipped", @@ -116,6 +117,7 @@ else import json, sys framework, target, out_file = sys.argv[1], sys.argv[2], sys.argv[3] payload = { + "schema_version": "raw-v1", "framework": framework, "target": target, "status": "skipped", diff --git a/scripts/validate-result-schemas.py b/scripts/validate-result-schemas.py new file mode 100644 index 0000000..658ae58 --- /dev/null +++ b/scripts/validate-result-schemas.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python3 +import argparse +import json +from pathlib import Path + + +ROOT = Path(__file__).resolve().parent.parent +RAW_DIR = ROOT / "results" / "latest" / "raw" +RAW_SCHEMA = ROOT / "schemas" / "benchmark-raw-v1.schema.json" +SUMMARY_FILE = ROOT / "results" / "latest" / "summary.json" +SUMMARY_SCHEMA = ROOT / "schemas" / "benchmark-summary-v1.schema.json" + + +def load_json(path): + try: + with path.open("r", encoding="utf-8") as handle: + return json.load(handle) + except FileNotFoundError as exc: + raise SystemExit(f"File not found: {path}") from exc + except json.JSONDecodeError as exc: + raise SystemExit(f"Malformed JSON in {path}: {exc.msg} at line {exc.lineno}, column {exc.colno}") from exc + + +def validate_raw_row(path, payload, schema_version): + required = ("schema_version", "framework", "target", "status") + for field in required: + if field not in payload: + raise SystemExit(f"Raw schema validation failed for {path}: missing {field}") + + if payload.get("schema_version") != schema_version: + raise SystemExit( + f"Raw schema validation failed for {path}: schema_version={payload.get('schema_version')!r}, expected {schema_version!r}" + ) + + status = payload.get("status") + if status not in ("ok", "skipped"): + raise SystemExit(f"Raw schema validation failed for {path}: status={status!r} must be 'ok' or 'skipped'") + + if not isinstance(payload.get("framework"), str) or not payload.get("framework"): + raise SystemExit(f"Raw schema validation failed for {path}: framework must be non-empty string") + if not isinstance(payload.get("target"), str) or not payload.get("target"): + raise SystemExit(f"Raw schema validation failed for {path}: target must be non-empty string") + + if status == "skipped": + reason = payload.get("reason") + if not isinstance(reason, str) or not reason: + raise SystemExit(f"Raw schema validation failed for {path}: skipped rows require non-empty reason") + return + + for field in ("parity", "engine", "metric_units", "benchmark", "resources_normalized"): + if field not in payload: + raise SystemExit(f"Raw schema validation failed for {path}: missing {field}") + + benchmark = payload.get("benchmark") + if not isinstance(benchmark, dict): + raise SystemExit(f"Raw schema validation failed for {path}: benchmark must be object") + + for metric_field in ("run_stats", "median"): + if metric_field not in benchmark: + raise SystemExit(f"Raw schema validation failed for {path}: benchmark.{metric_field} is required") + + +def validate_raw(raw_dir, schema_path): + schema = load_json(schema_path) + schema_version = (schema.get("properties") or {}).get("schema_version", {}).get("const") + if not isinstance(schema_version, str) or not schema_version: + raise SystemExit(f"Raw schema file missing properties.schema_version.const: {schema_path}") + + files = sorted(raw_dir.glob("*.json")) + if not files: + raise SystemExit(f"No raw benchmark files found in: {raw_dir}") + + for path in files: + payload = load_json(path) + validate_raw_row(path, payload, schema_version) + + print(f"benchmark-raw-schema-check: validated {len(files)} raw artifact(s)") + + +def validate_summary(summary_file, schema_path): + schema = load_json(schema_path) + schema_version = (schema.get("properties") or {}).get("schema_version", {}).get("const") + if not isinstance(schema_version, str) or not schema_version: + raise SystemExit(f"Summary schema file missing properties.schema_version.const: {schema_path}") + + if not summary_file.exists(): + raise SystemExit(f"Summary file not found: {summary_file}") + + payload = load_json(summary_file) + required = ( + "schema_version", + "generated_at", + "total_targets", + "successful_targets", + "skipped_targets", + "targets", + ) + for field in required: + if field not in payload: + raise SystemExit(f"Summary schema validation failed for {summary_file}: missing {field}") + + if payload.get("schema_version") != schema_version: + raise SystemExit( + f"Summary schema validation failed for {summary_file}: schema_version={payload.get('schema_version')!r}, expected {schema_version!r}" + ) + + targets = payload.get("targets") + if not isinstance(targets, list): + raise SystemExit(f"Summary schema validation failed for {summary_file}: targets must be array") + + for idx, target in enumerate(targets): + if not isinstance(target, dict): + raise SystemExit(f"Summary schema validation failed for {summary_file}: targets[{idx}] must be object") + for field in ("framework", "status", "target", "provenance"): + if field not in target: + raise SystemExit(f"Summary schema validation failed for {summary_file}: targets[{idx}] missing {field}") + provenance = target.get("provenance") + if not isinstance(provenance, dict) or not provenance.get("raw_source"): + raise SystemExit( + f"Summary schema validation failed for {summary_file}: targets[{idx}].provenance.raw_source is required" + ) + + status = target.get("status") + if status == "ok" and "uncertainty" not in target: + raise SystemExit( + f"Summary schema validation failed for {summary_file}: targets[{idx}] missing uncertainty for status=ok" + ) + + print("benchmark-summary-schema-check: validated summary artifact") + + +def parse_args(): + parser = argparse.ArgumentParser(description="Validate benchmark result schemas") + parser.add_argument("cmd", choices=["raw-check", "summary-check"]) + parser.add_argument("--raw-dir", type=Path, default=RAW_DIR) + parser.add_argument("--raw-schema", type=Path, default=RAW_SCHEMA) + parser.add_argument("--summary-file", type=Path, default=SUMMARY_FILE) + parser.add_argument("--summary-schema", type=Path, default=SUMMARY_SCHEMA) + return parser.parse_args() + + +def main(): + args = parse_args() + if args.cmd == "raw-check": + validate_raw(args.raw_dir, args.raw_schema) + return + if args.cmd == "summary-check": + validate_summary(args.summary_file, args.summary_schema) + return + raise SystemExit(f"Unknown command: {args.cmd}") + + +if __name__ == "__main__": + main()