Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ jobs:
run: bash scripts/run-all.sh
- name: Generate report from raw results
run: python3 scripts/generate-report.py
- name: Validate benchmark result schemas
run: make benchmark-schema-validate
- name: Run statistical quality gate
run: make ci-benchmark-quality-check
- name: Upload benchmark quality summary
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,6 @@ coverage.out
# Python cache/bytecode
**/__pycache__/
*.py[cod]

# Local worktrees
.worktrees/
13 changes: 13 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,28 @@ Run these before opening a PR:

```bash
go test ./...
make test-coverage
TARGET=http://localhost:3001 bash scripts/parity-check.sh
```

For patch coverage against `origin/main`:

```bash
make test-patch-coverage
```

If you changed scripts, also run shell linting if available:

```bash
shellcheck scripts/*.sh
```

If you changed benchmark artifacts or report generation, also run schema validation:

```bash
make benchmark-schema-validate
```

## Pull request process

1. Create a branch from `main`.
Expand Down
63 changes: 53 additions & 10 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check
SHELL := /bin/sh
PYTHON ?= python3
GO ?= go
GOPATH ?= $(shell $(GO) env GOPATH)
GO_PATCH_COVER ?= $(GOPATH)/bin/go-patch-cover
MODULES = $(shell find . -type f -name "go.mod" -not -path "*/.*/*" -not -path "*/vendor/*" -exec dirname {} \;)

.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test test-coverage test-patch-coverage tools parity-check parity-check-modkit parity-check-nestjs benchmark-fingerprint-check benchmark-limits-check benchmark-manifest-check benchmark-raw-schema-check benchmark-summary-schema-check benchmark-schema-validate benchmark-stats-check benchmark-variance-check benchmark-benchstat-check ci-benchmark-quality-check

benchmark:
bash scripts/run-all.sh
Expand All @@ -22,10 +29,36 @@ benchmark-do:
bash scripts/run-single.sh do

report:
python3 scripts/generate-report.py
$(PYTHON) scripts/generate-report.py

test:
go test ./...
$(GO) test ./...

test-coverage:
@mkdir -p .coverage
@echo "mode: atomic" > .coverage/coverage.out
@for mod in $(MODULES); do \
echo "Testing coverage for module: $$mod"; \
(cd $$mod && $(GO) test -coverprofile=profile.out -covermode=atomic ./...) || exit 1; \
if [ -f $$mod/profile.out ]; then \
tail -n +2 $$mod/profile.out >> .coverage/coverage.out; \
rm $$mod/profile.out; \
fi; \
done
@printf "\nTotal Coverage:\n"
@$(GO) tool cover -func=.coverage/coverage.out | grep "total:"

test-patch-coverage: tools test-coverage
@echo "Comparing against origin/main..."
@git diff -U0 --no-color origin/main...HEAD > .coverage/diff.patch
@$(GO_PATCH_COVER) .coverage/coverage.out .coverage/diff.patch > .coverage/patch_coverage.out
@echo "Patch Coverage Report:"
@cat .coverage/patch_coverage.out

tools:
@echo "Installing development tools..."
@$(GO) install github.com/seriousben/go-patch-cover/cmd/go-patch-cover@latest
@echo "Done: go-patch-cover installed"

parity-check:
TARGET="$(PARITY_TARGET)" bash scripts/parity-check.sh
Expand All @@ -37,22 +70,32 @@ parity-check-nestjs:
TARGET=http://localhost:3002 bash scripts/parity-check.sh

benchmark-fingerprint-check:
python3 scripts/environment-manifest.py check-fingerprint --file results/latest/environment.fingerprint.json
$(PYTHON) scripts/environment-manifest.py check-fingerprint --file results/latest/environment.fingerprint.json

benchmark-limits-check:
python3 scripts/environment-manifest.py check-limits --compose docker-compose.yml
$(PYTHON) scripts/environment-manifest.py check-limits --compose docker-compose.yml

benchmark-manifest-check:
python3 scripts/environment-manifest.py check-manifest --file results/latest/environment.manifest.json
$(PYTHON) scripts/environment-manifest.py check-manifest --file results/latest/environment.manifest.json

benchmark-raw-schema-check:
$(PYTHON) scripts/validate-result-schemas.py raw-check

benchmark-summary-schema-check:
$(PYTHON) scripts/validate-result-schemas.py summary-check

benchmark-schema-validate:
$(MAKE) benchmark-raw-schema-check
$(MAKE) benchmark-summary-schema-check

benchmark-stats-check:
python3 scripts/benchmark-quality-check.py stats-check
$(PYTHON) scripts/benchmark-quality-check.py stats-check

benchmark-variance-check:
python3 scripts/benchmark-quality-check.py variance-check
$(PYTHON) scripts/benchmark-quality-check.py variance-check

benchmark-benchstat-check:
python3 scripts/benchmark-quality-check.py benchstat-check
$(PYTHON) scripts/benchmark-quality-check.py benchstat-check

ci-benchmark-quality-check:
python3 scripts/benchmark-quality-check.py ci-check
$(PYTHON) scripts/benchmark-quality-check.py ci-check
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,12 @@ Run benchmark orchestration and generate a report:
```bash
make benchmark
make report
make benchmark-schema-validate
make ci-benchmark-quality-check
```

Benchmark/report flow enforces schema validation for raw and summary artifacts before quality gates.

Use OSS measurement engine (optional):

```bash
Expand All @@ -36,6 +39,7 @@ BENCH_ENGINE=hyperfine make benchmark
- Python 3
- hyperfine (optional benchmark engine)
- benchstat (`go install golang.org/x/perf/cmd/benchstat@latest`)
- go-patch-cover (`go install github.com/seriousben/go-patch-cover/cmd/go-patch-cover@latest`, for `make test-patch-coverage`)

## Repository layout

Expand Down
4 changes: 4 additions & 0 deletions docs/guides/benchmark-workflow.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
```bash
make benchmark
make report
make benchmark-schema-validate
```

## Per-target run
Expand Down Expand Up @@ -56,10 +57,13 @@ Benchmark scripts must run parity first for each target. If parity fails, skip b
- `results/latest/report.md` - markdown report
- `results/latest/benchmark-quality-summary.json` - policy quality gate output
- `results/latest/tooling/benchstat/*.txt` - benchstat comparison outputs
- `schemas/benchmark-raw-v1.schema.json` - raw benchmark artifact contract
- `schemas/benchmark-summary-v1.schema.json` - summary artifact contract

## Quality checks

```bash
make benchmark-schema-validate
make benchmark-stats-check
make benchmark-variance-check
make benchmark-benchstat-check
Expand Down
110 changes: 110 additions & 0 deletions schemas/benchmark-raw-v1.schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://go-modkit.dev/schemas/benchmark-raw-v1.schema.json",
"title": "Benchmark Raw Result v1",
"type": "object",
"required": [
"schema_version",
"framework",
"target",
"status"
],
"additionalProperties": true,
"properties": {
"schema_version": {
"const": "raw-v1"
},
"framework": {
"type": "string",
"minLength": 1
},
"target": {
"type": "string",
"minLength": 1
},
"status": {
"type": "string",
"enum": [
"ok",
"skipped"
]
},
"reason": {
"type": "string"
},
"parity": {
"type": "string"
},
"engine": {
"type": "string"
},
"metric_units": {
"type": "object"
},
"benchmark": {
"type": "object",
"properties": {
"run_stats": {
"type": "array"
},
"median": {
"type": "object"
}
}
},
"docker": {
"type": "object"
},
"resources_normalized": {
"type": "object"
}
},
"allOf": [
{
"if": {
"properties": {
"status": {
"const": "ok"
}
},
"required": [
"status"
]
},
"then": {
"required": [
"parity",
"engine",
"metric_units",
"benchmark",
"resources_normalized"
],
"properties": {
"benchmark": {
"required": [
"run_stats",
"median"
]
}
}
}
},
{
"if": {
"properties": {
"status": {
"const": "skipped"
}
},
"required": [
"status"
]
},
"then": {
"required": [
"reason"
]
}
}
]
}
Loading
Loading