diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
new file mode 100644
index 0000000..c0088b9
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,34 @@
+---
+name: Bug report
+about: Report a reproducible problem
+labels: bug, triage
+---
+
+## Summary
+
+## Steps to reproduce
+
+1.
+2.
+3.
+
+## Expected behavior
+
+## Actual behavior
+
+```text
+paste errors/logs
+```
+
+## Scope
+
+- [ ] parity runner (`cmd/parity-test`)
+- [ ] parity fixtures (`test/fixtures/parity`)
+- [ ] benchmark scripts (`scripts/`)
+- [ ] docs
+
+## Environment
+
+- Go version:
+- OS:
+- Command used:
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
new file mode 100644
index 0000000..859dd15
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,8 @@
+blank_issues_enabled: false
+contact_links:
+  - name: Questions and Discussions
+    url: https://github.com/go-modkit/benchmarks/discussions
+    about: Use Discussions for usage questions and open-ended design discussion.
+  - name: Security vulnerabilities
+    url: https://github.com/go-modkit/benchmarks/security/policy
+    about: Report vulnerabilities privately.
diff --git a/.github/ISSUE_TEMPLATE/documentation.md b/.github/ISSUE_TEMPLATE/documentation.md
new file mode 100644
index 0000000..3f484ef
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/documentation.md
@@ -0,0 +1,20 @@
+---
+name: Documentation issue
+about: Report missing, outdated, or unclear docs
+labels: docs, triage
+---
+
+## Location
+
+<!-- file path or link -->
+
+## Issue type
+
+- [ ] unclear explanation
+- [ ] missing information
+- [ ] outdated content
+- [ ] broken command/example
+
+## Description
+
+## Suggested update
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
new file mode 100644
index 0000000..1891bc2
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,23 @@
+---
+name: Feature request
+about: Propose an enhancement
+labels: enhancement, triage
+---
+
+## Problem statement
+
+## Proposed solution
+
+## Alternatives considered
+
+## Acceptance criteria
+
+- [ ]
+- [ ]
+
+## Affected areas
+
+- [ ] parity runner
+- [ ] fixtures
+- [ ] benchmark orchestration
+- [ ] reporting
diff --git a/.github/ISSUE_TEMPLATE/rfc.md b/.github/ISSUE_TEMPLATE/rfc.md
new file mode 100644
index 0000000..33ef108
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/rfc.md
@@ -0,0 +1,19 @@
+---
+name: RFC / design proposal
+about: Propose a significant design change
+labels: rfc, discussion
+---
+
+## Summary
+
+## Motivation
+
+## Detailed design
+
+## Compatibility and migration
+
+## Drawbacks
+
+## Alternatives
+
+## Open questions
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
new file mode 100644
index 0000000..b9cfb06
--- /dev/null
+++ b/.github/pull_request_template.md
@@ -0,0 +1,34 @@
+## Type
+
+- [ ] `feat` - New capability
+- [ ] `fix` - Bug fix
+- [ ] `docs` - Documentation only
+- [ ] `test` - Test changes
+- [ ] `chore` - Tooling, CI, maintenance
+
+## Summary
+
+<!-- One short paragraph: what changed and why -->
+
+## Changes
+
+-
+
+## Validation
+
+```bash
+go test ./...
+```
+
+<!-- If parity relevant, include exact command and target used -->
+
+## Checklist
+
+- [ ] Code and docs follow project conventions
+- [ ] Tests updated/added for behavior changes
+- [ ] Parity contract/fixtures updated if API behavior changed
+- [ ] Related design/docs updated if matcher semantics changed
+
+## Resolves
+
+Resolves #
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..8131893
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,46 @@
+name: ci
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+
+jobs:
+  pr-title:
+    name: Validate PR title
+    if: github.event_name == 'pull_request'
+    runs-on: ubuntu-latest
+    steps:
+      - uses: amannn/action-semantic-pull-request@v5
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+  test:
+    name: Go tests + coverage
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v5
+      - uses: actions/setup-go@v5
+        with:
+          go-version-file: go.mod
+      - name: Run unit tests with coverage
+        run: go test ./... -coverprofile=coverage.out -covermode=atomic
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v5
+        with:
+          files: ./coverage.out
+          fail_ci_if_error: false
+
+  scripts:
+    name: Script smoke tests (skipped targets expected)
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v5
+      - uses: actions/setup-go@v5
+        with:
+          go-version-file: go.mod
+      - name: Run benchmark script smoke
+        run: bash scripts/run-all.sh
+      - name: Generate report from raw results
+        run: python3 scripts/generate-report.py
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
new file mode 100644
index 0000000..a9d9880
--- /dev/null
+++ b/.github/workflows/codeql.yml
@@ -0,0 +1,26 @@
+name: codeql
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+
+jobs:
+  analyze:
+    runs-on: ubuntu-latest
+    permissions:
+      actions: read
+      contents: read
+      security-events: write
+    strategy:
+      fail-fast: false
+      matrix:
+        language: ["go"]
+    steps:
+      - uses: actions/checkout@v5
+      - uses: github/codeql-action/init@v3
+        with:
+          languages: ${{ matrix.language }}
+      - uses: github/codeql-action/autobuild@v3
+      - uses: github/codeql-action/analyze@v3
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..a481402
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+# Generated benchmark artifacts
+results/latest/raw/*.json
+results/latest/summary.json
+results/latest/report.md
diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000..924294e
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,82 @@
+# PROJECT KNOWLEDGE BASE
+
+**Generated:** 2026-02-07T17:40:00+02:00
+**Commit:** 46f84cd
+**Branch:** feat/benchmark-repo-hardening
+
+## OVERVIEW
+Benchmark harness for API parity and performance comparison across framework implementations.
+Correctness is enforced first (parity), then benchmark scripts generate raw metrics and reports.
+
+## STRUCTURE
+```text
+benchmarks/
+├── cmd/parity-test/          # Go parity runner + matcher/fixture tests
+├── test/fixtures/parity/     # Parity seed + endpoint scenarios
+├── scripts/                  # Parity-gated benchmark and report pipeline
+├── docs/                     # Architecture + operational guides
+├── .github/workflows/        # CI + CodeQL
+├── apps/                     # Placeholder for framework app implementations
+├── results/latest/           # Generated raw metrics, summary, report
+├── Makefile
+└── docker-compose.yml
+```
+
+## WHERE TO LOOK
+
+| Task | Location | Notes |
+|------|----------|-------|
+| Run parity checks | `Makefile`, `scripts/parity-check.sh` | `PARITY_TARGET=... make parity-check` is canonical |
+| Extend parity runner | `cmd/parity-test/main.go`, `cmd/parity-test/main_test.go` | Matcher semantics + fixture validation tests |
+| Add/adjust contract cases | `test/fixtures/parity/scenarios/*.json` | Endpoint-grouped fixtures (`health`, `users-*`) |
+| Change baseline test state | `test/fixtures/parity/seed.json` | Posted before scenarios when seed endpoint configured |
+| Benchmark orchestration | `scripts/run-single.sh`, `scripts/run-all.sh` | Per-target parity gate then benchmark output emit |
+| Reporting | `scripts/generate-report.py`, `results/latest/` | Builds `summary.json` and `report.md` from raw JSON |
+| CI policy | `.github/workflows/ci.yml`, `.github/workflows/codeql.yml` | Semantic PR title check + Go tests + script smoke + CodeQL |
+
+
+## CODE MAP
+LSP project views unavailable in this environment (`no views`).
+Use direct file map; Go entrypoint remains `cmd/parity-test/main.go` (`func main()`).
+
+## CONVENTIONS
+- Benchmark scripts are parity-gated per target: benchmark is skipped when health or parity fails.
+- Raw benchmark outputs are one JSON file per framework under `results/latest/raw/`.
+- Report generation is deterministic from raw artifacts (`summary.json`, `report.md`).
+- CI runs three tracks: PR title semantics, Go tests, script/report smoke.
+
+## ANTI-PATTERNS (THIS PROJECT)
+- Do not benchmark before parity passes for the target implementation.
+- Do not change matcher token semantics without updating fixture expectations and design doc.
+- Do not treat generated files in `results/latest/` as hand-authored source-of-truth.
+
+## UNIQUE STYLES
+- Parity contract is fixture-first; runner logic is intentionally generic and target-agnostic.
+- Scenarios stay endpoint-grouped (`users-*`, `health`) instead of a single aggregate fixture file.
+- Benchmark scripts degrade gracefully by writing `skipped` records when targets are unavailable.
+
+## COMMANDS
+```bash
+# Run full benchmark orchestration
+make benchmark
+
+# Generate benchmark report
+make report
+
+# Run unit tests
+make test
+
+# Run parity checks against a specific service
+make parity-check-modkit
+make parity-check-nestjs
+
+# Generic parity invocation (set target URL)
+PARITY_TARGET=http://localhost:3001 make parity-check
+
+# Direct parity CLI invocation
+go run ./cmd/parity-test -target http://localhost:3001 -fixtures test/fixtures/parity
+```
+
+## NOTES
+- `apps/` is still a placeholder in this branch.
+- `results/latest/` is generated output; contents vary between runs and environments.
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
new file mode 100644
index 0000000..ef1434d
--- /dev/null
+++ b/CODE_OF_CONDUCT.md
@@ -0,0 +1,26 @@
+# Code of Conduct
+
+## Our pledge
+
+We are committed to a harassment-free experience for everyone.
+
+## Expected behavior
+
+- be respectful and constructive
+- assume good intent
+- accept and give feedback professionally
+
+## Unacceptable behavior
+
+- harassment, insults, or discrimination
+- doxxing or sharing private information without consent
+- disruptive or hostile conduct
+
+## Enforcement
+
+Project maintainers may remove or reject content that violates this policy.
+
+## Attribution
+
+Adapted from Contributor Covenant, version 2.1:
+https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..a5289d9
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,57 @@
+# Contributing to benchmarks
+
+Thanks for contributing.
+
+## Scope
+
+This repository validates API parity and runs framework benchmarks. Keep changes focused on one of these areas:
+
+- parity contract (`test/fixtures/parity`)
+- parity runner (`cmd/parity-test`)
+- benchmark orchestration (`scripts`, `Makefile`, `docker-compose.yml`)
+- docs (`docs/`, `README.md`, `METHODOLOGY.md`)
+
+## Prerequisites
+
+- Go 1.25.7+
+- Docker + Docker Compose (for local service runs)
+- GNU Make
+
+## Local validation
+
+Run these before opening a PR:
+
+```bash
+go test ./...
+TARGET=http://localhost:3001 bash scripts/parity-check.sh
+```
+
+If you changed scripts, also run shell linting if available:
+
+```bash
+shellcheck scripts/*.sh
+```
+
+## Pull request process
+
+1. Create a branch from `main`.
+2. Keep changes atomic and add/update tests when behavior changes.
+3. Run local validation commands.
+4. Fill out `.github/pull_request_template.md`.
+5. Link relevant issues with `Resolves #<number>`.
+
+## Contract rules
+
+- Do not benchmark a framework before parity passes for that target.
+- Do not change matcher semantics (`@any_number`, `@is_iso8601`) without updating fixtures and design docs.
+- Keep fixture files endpoint-scoped (`users-*`, `health`) instead of creating a single large fixture file.
+
+## Commit style
+
+Use Conventional Commits when possible:
+
+- `feat:` new functionality
+- `fix:` bug fix
+- `docs:` documentation only
+- `test:` tests only
+- `chore:` tooling/build/CI
diff --git a/METHODOLOGY.md b/METHODOLOGY.md
index fabc6eb..7c3055b 100644
--- a/METHODOLOGY.md
+++ b/METHODOLOGY.md
@@ -1,18 +1,43 @@
 # Methodology
 
-## Fair Comparison
-- Identical endpoints and response formats
-- SQLite in-memory for all frameworks
-- Same load test parameters
-
-## Environment
-- Docker + Docker Compose
-- wrk
-- Python 3
-
-## Benchmark Parameters
-- wrk: -t12 -c400 -d30s
-- Warmup: 1000 requests
-
-## Notes
-This is a stub. Expand with hardware/versions and interpretation guidance.
+## Fairness principles
+
+- all implementations must expose equivalent API behavior validated by parity tests
+- identical endpoint semantics: status, headers, and body contract
+- identical seed state before parity and benchmark runs
+- identical load profile for each benchmark target
+
+## Benchmark gate
+
+1. Start target service
+2. Wait for readiness endpoint
+3. Run parity checks against target
+4. Only if parity passes, run load benchmark
+5. Persist raw metrics and summary artifacts
+
+## Runtime environment
+
+- Docker + Docker Compose for service orchestration
+- Go parity runner (`cmd/parity-test`)
+- shell scripts in `scripts/` for orchestration
+- Python 3 report generator (`scripts/generate-report.py`)
+
+## Baseline benchmark profile
+
+- warmup requests: 1000
+- request threads: 12
+- concurrent connections: 400
+- run duration: 30s
+- runs per target: 3 (median reported)
+
+## Reporting
+
+- raw run outputs: `results/latest/raw/`
+- normalized summary: `results/latest/summary.json`
+- markdown report: `results/latest/report.md`
+
+## Interpretation guidance
+
+- treat parity failures as correctness blockers, not performance regressions
+- compare medians first, then inspect distribution variance
+- annotate environment drift (host type, CPU, memory, Docker version) in report notes
diff --git a/Makefile b/Makefile
index 629ff86..8e258ae 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report parity-check parity-check-modkit parity-check-nestjs
+.PHONY: benchmark benchmark-modkit benchmark-nestjs benchmark-baseline benchmark-wire benchmark-fx benchmark-do report test parity-check parity-check-modkit parity-check-nestjs
 
 benchmark:
 	bash scripts/run-all.sh
@@ -24,6 +24,9 @@ benchmark-do:
 report:
 	python3 scripts/generate-report.py
 
+test:
+	go test ./...
+
 parity-check:
 	TARGET="$(PARITY_TARGET)" bash scripts/parity-check.sh
 
diff --git a/README.md b/README.md
index 9e81e19..4549be8 100644
--- a/README.md
+++ b/README.md
@@ -1,19 +1,54 @@
-# modkit Benchmarks
+# benchmarks
 
-## Goal
-A reproducible benchmark suite comparing modkit with NestJS and Go DI alternatives.
+Benchmark harness for framework parity and performance comparisons.
+
+## What this repository does
+
+- validates API behavior parity across implementations before performance runs
+- stores declarative parity fixtures and seed data
+- provides benchmark orchestration scripts and report generation
 
 ## Quickstart
 
+Run parity against a local target:
+
+```bash
+PARITY_TARGET=http://localhost:3001 make parity-check
+```
+
+Run benchmark orchestration and generate a report:
+
 ```bash
 make benchmark
 make report
 ```
 
-## Repository Layout
-- apps/: framework implementations
-- scripts/: benchmark orchestration
-- results/: latest + archived outputs
+## Repository layout
+
+```text
+benchmarks/
+|- cmd/parity-test/           # Go parity CLI
+|- test/fixtures/parity/      # seed + scenario contract fixtures
+|- scripts/                   # benchmark/parity orchestration
+|- docs/                      # design and operational guides
+|- apps/                      # framework app implementations (placeholder)
+`- results/                   # benchmark outputs (placeholder)
+```
+
+## Core policies
+
+- parity is a gate: do not benchmark a target that fails parity
+- fixture contract is source-of-truth for expected API behavior
+- matcher changes require fixture updates and design doc updates
+
+## Documentation
+
+- `docs/design/002-api-parity-contract.md` - parity contract rationale
+- `docs/architecture.md` - repository architecture and execution flow
+- `docs/guides/parity-contract.md` - fixture and matcher semantics
+- `docs/guides/adding-scenarios.md` - how to add parity scenarios
+- `docs/guides/benchmark-workflow.md` - benchmark and reporting flow
+
+## Contributing
 
-## Latest Results
-See results/latest/report.md after running benchmarks.
+See `CONTRIBUTING.md` for contribution process and validation commands.
diff --git a/SECURITY.md b/SECURITY.md
new file mode 100644
index 0000000..bcde849
--- /dev/null
+++ b/SECURITY.md
@@ -0,0 +1,24 @@
+# Security Policy
+
+## Reporting a vulnerability
+
+Do not open public issues for security vulnerabilities.
+
+Use GitHub Security Advisories for private disclosure:
+
+- https://github.com/go-modkit/benchmarks/security/advisories/new
+
+If private advisories are unavailable, open a discussion asking maintainers for a private contact channel without disclosing exploit details.
+
+## What to include
+
+- impacted component(s)
+- reproduction steps
+- potential impact
+- suggested mitigation (optional)
+
+## Disclosure policy
+
+- We acknowledge reports as soon as possible.
+- We prioritize fixes by severity and exploitability.
+- We publish fixes and advisories after remediation is available.
diff --git a/cmd/parity-test/AGENTS.md b/cmd/parity-test/AGENTS.md
new file mode 100644
index 0000000..3fcd178
--- /dev/null
+++ b/cmd/parity-test/AGENTS.md
@@ -0,0 +1,45 @@
+# PROJECT KNOWLEDGE BASE - cmd/parity-test
+
+**Generated:** 2026-02-07T17:40:00+02:00
+**Commit:** 46f84cd
+**Branch:** feat/benchmark-repo-hardening
+
+## OVERVIEW
+Go parity runner CLI that validates HTTP contract behavior against fixture scenarios.
+
+## WHERE TO LOOK
+
+| Task | Location | Notes |
+|------|----------|-------|
+| CLI flags / execution flow | `cmd/parity-test/main.go` (`func main`) | Target URL, fixture dir, seed endpoint, timeout wiring |
+| Seed bootstrap behavior | `cmd/parity-test/main.go` (`seedTarget`) | Optional pre-run POST to seed endpoint |
+| Scenario request execution | `cmd/parity-test/main.go` (`runScenario`) | Request construction, status/header/body assertion |
+| Body matcher semantics | `cmd/parity-test/main.go` (`compareValue`, `matchStringValue`) | Recursive object/array checks + token matching |
+| Matcher primitives | `cmd/parity-test/main.go` (`isNumber`, `isISO8601`) | Type/format checks used by tokens |
+| Unit coverage for matcher + fixtures | `cmd/parity-test/main_test.go` | Token behavior, recursive compare, fixture shape guards |
+
+
+## CONVENTIONS
+- Exit behavior is strict: missing target/fixtures or any failed scenario returns non-zero.
+- Scenario files are loaded from `<fixtures>/scenarios/*.json` and processed in sorted order.
+- Header checks verify expected keys exist and values match token-aware string matching.
+- Body checks are partial-object style: expected keys must exist; extra keys in actual object are tolerated.
+- Test suite treats fixture health as contract integrity (JSON validity, names, status/path requirements).
+
+## ANTI-PATTERNS (THIS DIRECTORY)
+- Do not silently swallow request/JSON parse errors; keep explicit failure messages.
+- Do not turn seed failures into hard exits by default; current contract treats them as warnings.
+- Do not add service-specific logic (framework names, per-app branches); runner must stay target-agnostic.
+- Do not loosen matcher tests when adding tokens; add explicit positive + negative cases.
+
+## COMMANDS
+```bash
+# Run parity CLI directly
+go run ./cmd/parity-test -target http://localhost:3001 -fixtures test/fixtures/parity
+
+# Same via make wrapper
+PARITY_TARGET=http://localhost:3001 make parity-check
+```
+
+## NOTES
+- Runtime remains intentionally small; tests carry most semantic guardrails.
diff --git a/cmd/parity-test/main_test.go b/cmd/parity-test/main_test.go
new file mode 100644
index 0000000..d6c8226
--- /dev/null
+++ b/cmd/parity-test/main_test.go
@@ -0,0 +1,129 @@
+package main
+
+import (
+	"encoding/json"
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func TestMatchStringValue_Tokens(t *testing.T) {
+	t.Parallel()
+
+	if !matchStringValue("@any_number", 42.0) {
+		t.Fatal("expected @any_number to match float64")
+	}
+	if !matchStringValue("@any_number", "42") {
+		t.Fatal("expected @any_number to match numeric string")
+	}
+	if matchStringValue("@any_number", "abc") {
+		t.Fatal("expected @any_number to reject non-numeric string")
+	}
+
+	if !matchStringValue("@is_iso8601", "2025-01-01T00:00:00Z") {
+		t.Fatal("expected @is_iso8601 to match RFC3339")
+	}
+	if matchStringValue("@is_iso8601", "2025/01/01") {
+		t.Fatal("expected @is_iso8601 to reject non-ISO string")
+	}
+}
+
+func TestMatchStringValue_EmbeddedTokens(t *testing.T) {
+	t.Parallel()
+
+	if !matchStringValue("/users/@any_number", "/users/123") {
+		t.Fatal("expected embedded @any_number token to match")
+	}
+	if matchStringValue("/users/@any_number", "/users/abc") {
+		t.Fatal("expected embedded @any_number token to reject non-number")
+	}
+
+	if !matchStringValue("created:@is_iso8601", "created:2025-01-01T00:00:00.000Z") {
+		t.Fatal("expected embedded @is_iso8601 token to match")
+	}
+}
+
+func TestCompareValue_RecursiveObjectAndArray(t *testing.T) {
+	t.Parallel()
+
+	expected := map[string]interface{}{
+		"id": "@any_number",
+		"meta": map[string]interface{}{
+			"createdAt": "@is_iso8601",
+		},
+		"items": []interface{}{map[string]interface{}{"value": "@any_number"}},
+	}
+
+	actual := map[string]interface{}{
+		"id": 1.0,
+		"meta": map[string]interface{}{
+			"createdAt": "2025-01-01T00:00:00Z",
+		},
+		"items": []interface{}{map[string]interface{}{"value": 12.0}},
+		"extra": "allowed",
+	}
+
+	ok, msg := compareValue(expected, actual)
+	if !ok {
+		t.Fatalf("expected compareValue to pass, got msg=%s", msg)
+	}
+}
+
+func TestCompareValue_ArrayLengthMismatch(t *testing.T) {
+	t.Parallel()
+
+	ok, _ := compareValue([]interface{}{1.0, 2.0}, []interface{}{1.0})
+	if ok {
+		t.Fatal("expected compareValue to fail on array length mismatch")
+	}
+}
+
+func TestParityFixtures_AreWellFormed(t *testing.T) {
+	t.Parallel()
+
+	scenarioDir := filepath.Join("..", "..", "test", "fixtures", "parity", "scenarios")
+	entries, err := os.ReadDir(scenarioDir)
+	if err != nil {
+		t.Fatalf("failed to read scenario dir: %v", err)
+	}
+	if len(entries) == 0 {
+		t.Fatal("expected at least one scenario file")
+	}
+
+	for _, entry := range entries {
+		if entry.IsDir() || filepath.Ext(entry.Name()) != ".json" {
+			continue
+		}
+		path := filepath.Join(scenarioDir, entry.Name())
+		data, readErr := os.ReadFile(path)
+		if readErr != nil {
+			t.Fatalf("failed reading %s: %v", path, readErr)
+		}
+
+		var scenarios []Scenario
+		if unmarshalErr := json.Unmarshal(data, &scenarios); unmarshalErr != nil {
+			t.Fatalf("invalid scenario JSON %s: %v", path, unmarshalErr)
+		}
+		if len(scenarios) == 0 {
+			t.Fatalf("expected at least one scenario in %s", path)
+		}
+
+		seen := make(map[string]bool)
+		for _, s := range scenarios {
+			if s.Name == "" {
+				t.Fatalf("scenario with empty name in %s", path)
+			}
+			if seen[s.Name] {
+				t.Fatalf("duplicate scenario name %q in %s", s.Name, path)
+			}
+			seen[s.Name] = true
+
+			if s.Request.Path == "" {
+				t.Fatalf("scenario %q has empty request.path in %s", s.Name, path)
+			}
+			if s.Response.Status <= 0 {
+				t.Fatalf("scenario %q has invalid response.status in %s", s.Name, path)
+			}
+		}
+	}
+}
diff --git a/codecov.yml b/codecov.yml
new file mode 100644
index 0000000..385a719
--- /dev/null
+++ b/codecov.yml
@@ -0,0 +1,16 @@
+ignore:
+  - "**/cmd/**/main.go"
+
+coverage:
+  status:
+    project:
+      default:
+        target: auto
+        threshold: 1%
+    patch:
+      default:
+        target: 80%
+
+comment:
+  layout: "header,diff,flags,files"
+  behavior: default
diff --git a/docs/architecture.md b/docs/architecture.md
new file mode 100644
index 0000000..b7e1103
--- /dev/null
+++ b/docs/architecture.md
@@ -0,0 +1,48 @@
+# Architecture
+
+## Overview
+
+The repository has two core planes:
+
+- correctness plane: parity runner + fixture contract
+- performance plane: benchmark orchestration + reporting
+
+Correctness runs before performance.
+
+## Components
+
+```text
+cmd/parity-test/         parity CLI runtime
+test/fixtures/parity/    seed + scenario contract files
+scripts/                 benchmark/parity orchestration wrappers
+results/latest/          benchmark outputs and generated report
+```
+
+## Parity flow
+
+1. Load `seed.json` (if configured)
+2. Discover `scenarios/*.json`
+3. Execute each scenario request
+4. Compare status, headers, and body recursively
+5. Aggregate failures and exit non-zero on any mismatch
+
+## Matcher semantics
+
+- `@any_number`: accepts numeric values (number types or numeric strings)
+- `@is_iso8601`: accepts RFC3339/RFC3339Nano timestamps
+- tokens can appear as full values or interpolated inside strings
+
+## Benchmark flow
+
+1. Launch target services
+2. Run parity checks per target
+3. Run load benchmarks for parity-passing targets
+4. Save raw outputs
+5. Build `summary.json`
+6. Generate `report.md`
+
+## Failure model
+
+- parity failures do not stop fixture file iteration; they aggregate and fail at the end
+- benchmark runs should short-circuit per target if parity fails
+- report generation should tolerate partial target results and mark skipped targets
diff --git a/docs/guides/AGENTS.md b/docs/guides/AGENTS.md
new file mode 100644
index 0000000..8811c99
--- /dev/null
+++ b/docs/guides/AGENTS.md
@@ -0,0 +1,38 @@
+# PROJECT KNOWLEDGE BASE - docs/guides
+
+**Generated:** 2026-02-07T17:40:00+02:00
+**Commit:** 46f84cd
+**Branch:** feat/benchmark-repo-hardening
+
+## OVERVIEW
+Operator-facing guides for parity contract usage, scenario authoring, and benchmark workflow.
+
+## WHERE TO LOOK
+
+| Task | Location | Notes |
+|------|----------|-------|
+| Understand fixture semantics | `docs/guides/parity-contract.md` | Request/response schema and matcher tokens |
+| Add or modify scenarios safely | `docs/guides/adding-scenarios.md` | Endpoint-grouped workflow + checklist |
+| Run benchmark pipeline | `docs/guides/benchmark-workflow.md` | Standard, per-target, and artifact expectations |
+
+
+## CONVENTIONS
+- Keep guides procedural and command-oriented.
+- Reference canonical files/commands instead of duplicating full implementation details.
+- Prefer policy statements that match executable behavior (parity gate, skip records).
+
+## ANTI-PATTERNS
+- Do not restate root README sections verbatim.
+- Do not document behavior that scripts or CLI do not implement.
+- Do not embed environment-specific benchmark numbers in guide files.
+
+## COMMANDS
+```bash
+PARITY_TARGET=http://localhost:3001 make parity-check
+make benchmark
+make report
+```
+
+## NOTES
+- Keep this directory focused on operator workflows; conceptual rationale stays in `docs/design/` and `docs/architecture.md`.
+- Update guides when script interfaces or required environment variables change.
diff --git a/docs/guides/adding-scenarios.md b/docs/guides/adding-scenarios.md
new file mode 100644
index 0000000..df1c336
--- /dev/null
+++ b/docs/guides/adding-scenarios.md
@@ -0,0 +1,40 @@
+# Adding Parity Scenarios
+
+## 1) Pick the fixture file
+
+Group by endpoint domain:
+
+- `health.json`
+- `users-create.json`
+- `users-read.json`
+- `users-update.json`
+- `users-delete.json`
+
+If needed, add a new endpoint-grouped file under `test/fixtures/parity/scenarios/`.
+
+## 2) Add scenario entries
+
+Add a JSON object with:
+
+- unique `name`
+- minimal `request`
+- explicit expected `response`
+
+Prefer deterministic expectations. Use matcher tokens only for dynamic fields.
+
+## 3) Update seed state if required
+
+If the scenario depends on baseline data, update `test/fixtures/parity/seed.json`.
+
+## 4) Run locally
+
+```bash
+PARITY_TARGET=http://localhost:3001 make parity-check
+```
+
+## 5) Review checklist
+
+- scenario name is descriptive
+- endpoint-grouped file organization preserved
+- no implementation-specific assumptions leaked into contract
+- docs/design updated if contract semantics changed
diff --git a/docs/guides/benchmark-workflow.md b/docs/guides/benchmark-workflow.md
new file mode 100644
index 0000000..8cce7d6
--- /dev/null
+++ b/docs/guides/benchmark-workflow.md
@@ -0,0 +1,36 @@
+# Benchmark Workflow
+
+## Prerequisites
+
+- targets available locally or via Docker Compose
+- parity contract fixtures up to date
+
+## Standard run
+
+```bash
+make benchmark
+make report
+```
+
+## Per-target run
+
+```bash
+make benchmark-modkit
+make benchmark-nestjs
+```
+
+## Parity gate
+
+Benchmark scripts must run parity first for each target. If parity fails, skip benchmark for that target and record the skip reason.
+
+## Artifacts
+
+- `results/latest/raw/*.json` - raw benchmark outputs
+- `results/latest/summary.json` - normalized summary
+- `results/latest/report.md` - markdown report
+
+## Reproducibility notes
+
+- run from a clean working tree when possible
+- keep runtime versions stable
+- include host and Docker metadata in report notes
diff --git a/docs/guides/parity-contract.md b/docs/guides/parity-contract.md
new file mode 100644
index 0000000..ea3c142
--- /dev/null
+++ b/docs/guides/parity-contract.md
@@ -0,0 +1,47 @@
+# Parity Contract Guide
+
+## Contract location
+
+- fixtures: `test/fixtures/parity/`
+- design intent: `docs/design/002-api-parity-contract.md`
+
+## Fixture format
+
+Each scenario file is a JSON array:
+
+```json
+[
+  {
+    "name": "scenario name",
+    "request": {"method": "GET", "path": "/health"},
+    "response": {"status": 200, "body": {"status": "ok"}}
+  }
+]
+```
+
+## Request block
+
+- `method` (optional; defaults to `GET`)
+- `path` (required)
+- `headers` (optional)
+- `body` (optional)
+
+## Response block
+
+- `status` (required, exact)
+- `headers` (optional, expected headers must exist)
+- `body` (optional, recursive comparison)
+
+## Matchers
+
+- `@any_number`
+- `@is_iso8601`
+
+Both can be full values or embedded in strings.
+
+## Change policy
+
+- changing matcher semantics requires:
+  - parity runner updates
+  - fixture updates
+  - design doc update
diff --git a/scripts/AGENTS.md b/scripts/AGENTS.md
new file mode 100644
index 0000000..408d9ce
--- /dev/null
+++ b/scripts/AGENTS.md
@@ -0,0 +1,37 @@
+# PROJECT KNOWLEDGE BASE - scripts
+
+**Generated:** 2026-02-07T17:40:00+02:00
+**Commit:** 46f84cd
+**Branch:** feat/benchmark-repo-hardening
+
+## OVERVIEW
+Operational scripts for parity-gated benchmarking and report generation.
+
+## WHERE TO LOOK
+| Task | Location | Notes |
+|------|----------|-------|
+| Direct parity invocation wrapper | `scripts/parity-check.sh` | Calls Go parity CLI with target/fixtures/env wiring |
+| Run one framework benchmark | `scripts/run-single.sh` | Health probe -> parity gate -> benchmark or skip JSON |
+| Run full framework matrix | `scripts/run-all.sh` | Iterates `modkit,nestjs,baseline,wire,fx,do` |
+| Generate summary/report | `scripts/generate-report.py` | Reads `results/latest/raw/*.json` and writes summary/report |
+
+## CONVENTIONS
+- Shell scripts use `set -euo pipefail` and fail fast.
+- Each framework emits exactly one raw JSON artifact under `results/latest/raw/`.
+- Unavailable targets are not treated as hard failures; scripts write `status: skipped` records.
+- `run-single.sh` must execute parity before any load sampling.
+- Report generation is deterministic from raw JSON only.
+
+## ANTI-PATTERNS
+- Do not benchmark a target directly without invoking parity gate logic.
+- Do not silently discard skip/failure context; encode a reason in raw JSON.
+- Do not bake framework-specific behavior into generic report generation.
+- Do not write to paths outside `results/latest/` from these scripts.
+
+## COMMANDS
+```bash
+bash scripts/parity-check.sh
+bash scripts/run-single.sh modkit
+bash scripts/run-all.sh
+python3 scripts/generate-report.py
+```
diff --git a/scripts/generate-report.py b/scripts/generate-report.py
new file mode 100755
index 0000000..2073dde
--- /dev/null
+++ b/scripts/generate-report.py
@@ -0,0 +1,110 @@
+#!/usr/bin/env python3
+import json
+from datetime import datetime, timezone
+from pathlib import Path
+
+
+ROOT = Path(__file__).resolve().parent.parent
+RESULTS_LATEST = ROOT / "results" / "latest"
+RAW_DIR = RESULTS_LATEST / "raw"
+SUMMARY_PATH = RESULTS_LATEST / "summary.json"
+REPORT_PATH = RESULTS_LATEST / "report.md"
+
+
+def load_raw_files():
+    if not RAW_DIR.exists():
+        return []
+    rows = []
+    for path in sorted(RAW_DIR.glob("*.json")):
+        try:
+            with path.open("r", encoding="utf-8") as f:
+                rows.append(json.load(f))
+        except json.JSONDecodeError as exc:
+            print(f"Warning: skipping malformed JSON {path}: {exc}")
+    return rows
+
+
+def build_summary(rows):
+    generated_at = datetime.now(timezone.utc).isoformat()
+    summary = {
+        "generated_at": generated_at,
+        "total_targets": len(rows),
+        "successful_targets": sum(1 for r in rows if r.get("status") == "ok"),
+        "skipped_targets": sum(1 for r in rows if r.get("status") != "ok"),
+        "targets": [],
+    }
+    for row in rows:
+        target = {
+            "framework": row.get("framework"),
+            "status": row.get("status"),
+            "target": row.get("target"),
+            "reason": row.get("reason"),
+        }
+        bench = row.get("benchmark") or {}
+        median = bench.get("median") or {}
+        if median:
+            target["median"] = {
+                "rps": median.get("rps"),
+                "latency_ms_p50": median.get("latency_ms_p50"),
+                "latency_ms_p95": median.get("latency_ms_p95"),
+            }
+        summary["targets"].append(target)
+    return summary
+
+
+def write_summary(summary):
+    RESULTS_LATEST.mkdir(parents=True, exist_ok=True)
+    with SUMMARY_PATH.open("w", encoding="utf-8") as f:
+        json.dump(summary, f, indent=2)
+
+
+def write_report(summary):
+    lines = [
+        "# Benchmark Report",
+        "",
+        f"Generated: `{summary['generated_at']}`",
+        "",
+        "## Overview",
+        f"- Total targets: {summary['total_targets']}",
+        f"- Successful: {summary['successful_targets']}",
+        f"- Skipped: {summary['skipped_targets']}",
+        "",
+        "## Results",
+        "",
+        "| Framework | Status | Median RPS | P50 Latency (ms) | P95 Latency (ms) | Notes |",
+        "|---|---:|---:|---:|---:|---|",
+    ]
+
+    for t in summary["targets"]:
+        median = t.get("median") or {}
+        rps = f"{median.get('rps', 0):.2f}" if "rps" in median else "-"
+        p50 = f"{median.get('latency_ms_p50', 0):.2f}" if "latency_ms_p50" in median else "-"
+        p95 = f"{median.get('latency_ms_p95', 0):.2f}" if "latency_ms_p95" in median else "-"
+        notes = t.get("reason") or ""
+        lines.append(f"| {t.get('framework','-')} | {t.get('status','-')} | {rps} | {p50} | {p95} | {notes} |")
+
+    lines.extend(
+        [
+            "",
+            "## Raw Artifacts",
+            "",
+            "- Raw JSON: `results/latest/raw/*.json`",
+            "- Summary JSON: `results/latest/summary.json`",
+        ]
+    )
+
+    with REPORT_PATH.open("w", encoding="utf-8") as f:
+        f.write("\n".join(lines) + "\n")
+
+
+def main():
+    rows = load_raw_files()
+    summary = build_summary(rows)
+    write_summary(summary)
+    write_report(summary)
+    print(f"Wrote: {SUMMARY_PATH}")
+    print(f"Wrote: {REPORT_PATH}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/run-all.sh b/scripts/run-all.sh
new file mode 100755
index 0000000..5407faa
--- /dev/null
+++ b/scripts/run-all.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+frameworks=(modkit nestjs baseline wire fx "do")
+raw_dir="${RESULTS_RAW_DIR:-results/latest/raw}"
+mkdir -p "$raw_dir"
+
+for framework in "${frameworks[@]}"; do
+  echo "=== Benchmarking: $framework ==="
+  bash scripts/run-single.sh "$framework"
+done
+
+echo "Raw benchmark files generated in: $raw_dir"
diff --git a/scripts/run-single.sh b/scripts/run-single.sh
new file mode 100755
index 0000000..8bcd6cb
--- /dev/null
+++ b/scripts/run-single.sh
@@ -0,0 +1,188 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+framework="${1:-}"
+if [[ -z "$framework" ]]; then
+  echo "Usage: bash scripts/run-single.sh <framework>" >&2
+  exit 1
+fi
+
+case "$framework" in
+  modkit) target="${TARGET:-http://localhost:3001}" ;;
+  nestjs) target="${TARGET:-http://localhost:3002}" ;;
+  baseline) target="${TARGET:-http://localhost:3003}" ;;
+  wire) target="${TARGET:-http://localhost:3004}" ;;
+  fx) target="${TARGET:-http://localhost:3005}" ;;
+  do) target="${TARGET:-http://localhost:3006}" ;;
+  *)
+    echo "Unknown framework: $framework" >&2
+    exit 1
+    ;;
+esac
+
+raw_dir="${RESULTS_RAW_DIR:-results/latest/raw}"
+mkdir -p "$raw_dir"
+out_file="$raw_dir/${framework}.json"
+
+if ! curl -fsS "$target/health" >/dev/null 2>&1; then
+  python3 - <<'PY' "$framework" "$target" "$out_file"
+import json, sys
+framework, target, out_file = sys.argv[1], sys.argv[2], sys.argv[3]
+payload = {
+    "framework": framework,
+    "target": target,
+    "status": "skipped",
+    "reason": "target health endpoint unavailable",
+}
+with open(out_file, "w", encoding="utf-8") as f:
+    json.dump(payload, f, indent=2)
+print(f"SKIP {framework}: health endpoint unavailable")
+PY
+  exit 0
+fi
+
+if PARITY_TARGET="$target" bash scripts/parity-check.sh >/dev/null; then
+  parity_result="passed"
+else
+  python3 - <<'PY' "$framework" "$target" "$out_file"
+import json, sys
+framework, target, out_file = sys.argv[1], sys.argv[2], sys.argv[3]
+payload = {
+    "framework": framework,
+    "target": target,
+    "status": "skipped",
+    "reason": "parity check failed",
+}
+with open(out_file, "w", encoding="utf-8") as f:
+    json.dump(payload, f, indent=2)
+print(f"SKIP {framework}: parity failed")
+PY
+  exit 0
+fi
+
+warmup_requests="${WARMUP_REQUESTS:-100}"
+benchmark_requests="${BENCHMARK_REQUESTS:-300}"
+runs="${BENCHMARK_RUNS:-3}"
+endpoint="${BENCHMARK_ENDPOINT:-/health}"
+
+python3 - <<'PY' "$framework" "$target" "$endpoint" "$warmup_requests" "$benchmark_requests" "$runs" "$out_file" "$parity_result"
+import json
+import statistics
+import subprocess
+import sys
+import time
+import urllib.error
+import urllib.request
+
+framework, target, endpoint, warmup, requests, runs, out_file, parity_result = sys.argv[1:9]
+warmup = int(warmup)
+requests = int(requests)
+runs = int(runs)
+url = target.rstrip("/") + endpoint
+
+def request_once():
+    start = time.perf_counter()
+    try:
+        with urllib.request.urlopen(url, timeout=5) as r:
+            r.read()
+        return time.perf_counter() - start
+    except urllib.error.HTTPError as exc:
+        raise RuntimeError(f"http_error status={exc.code}") from exc
+    except urllib.error.URLError as exc:
+        raise RuntimeError(f"url_error reason={exc.reason}") from exc
+    except Exception as exc:
+        raise RuntimeError(f"request_error {exc}") from exc
+
+# Warmup: tolerate transient failures.
+for _ in range(warmup):
+    try:
+        request_once()
+    except Exception:
+        pass
+
+run_stats = []
+for _ in range(runs):
+    durations = []
+    for _ in range(requests):
+        try:
+            durations.append(request_once())
+        except Exception:
+            pass
+    if not durations:
+        continue
+    total = sum(durations)
+    run_stats.append(
+        {
+            "requests": requests,
+            "duration_seconds": total,
+            "rps": requests / total if total > 0 else 0.0,
+            "latency_ms_p50": statistics.median(durations) * 1000,
+            "latency_ms_p95": statistics.quantiles(durations, n=20)[18] * 1000,
+            "latency_ms_max": max(durations) * 1000,
+        }
+    )
+
+if not run_stats:
+    payload = {
+        "framework": framework,
+        "target": target,
+        "status": "skipped",
+        "reason": "benchmark requests failed",
+        "parity": parity_result,
+    }
+    with open(out_file, "w", encoding="utf-8") as f:
+        json.dump(payload, f, indent=2)
+    print(f"SKIP {framework}: benchmark requests failed")
+    raise SystemExit(0)
+
+median_rps = statistics.median([r["rps"] for r in run_stats])
+median_p50 = statistics.median([r["latency_ms_p50"] for r in run_stats])
+median_p95 = statistics.median([r["latency_ms_p95"] for r in run_stats])
+
+docker_stats = {}
+try:
+    completed = subprocess.run(
+        ["docker", "stats", "--no-stream", "--format", "{{.Name}}|{{.MemUsage}}|{{.CPUPerc}}"],
+        capture_output=True,
+        text=True,
+        check=False,
+    )
+    for line in completed.stdout.splitlines():
+        if not line.strip():
+            continue
+        parts = line.split("|", 2)
+        if len(parts) == 3 and (
+            parts[0] == framework
+            or parts[0].startswith(framework + "-")
+            or parts[0].endswith("-" + framework)
+        ):
+            docker_stats = {"container": parts[0], "memory": parts[1], "cpu": parts[2]}
+            break
+except Exception:
+    pass
+
+payload = {
+    "framework": framework,
+    "target": target,
+    "status": "ok",
+    "parity": parity_result,
+    "benchmark": {
+        "endpoint": endpoint,
+        "warmup_requests": warmup,
+        "requests_per_run": requests,
+        "runs": runs,
+        "run_stats": run_stats,
+        "median": {
+            "rps": median_rps,
+            "latency_ms_p50": median_p50,
+            "latency_ms_p95": median_p95,
+        },
+    },
+    "docker": docker_stats,
+}
+
+with open(out_file, "w", encoding="utf-8") as f:
+    json.dump(payload, f, indent=2)
+
+print(f"OK {framework}: median_rps={median_rps:.2f} p50={median_p50:.2f}ms p95={median_p95:.2f}ms")
+PY
diff --git a/test/fixtures/parity/AGENTS.md b/test/fixtures/parity/AGENTS.md
new file mode 100644
index 0000000..900dfc8
--- /dev/null
+++ b/test/fixtures/parity/AGENTS.md
@@ -0,0 +1,49 @@
+# PROJECT KNOWLEDGE BASE - test/fixtures/parity
+
+**Generated:** 2026-02-07T17:40:00+02:00
+**Commit:** 46f84cd
+**Branch:** feat/benchmark-repo-hardening
+
+## OVERVIEW
+Golden parity contract fixtures: deterministic seed data plus endpoint scenario expectations.
+
+## STRUCTURE
+```text
+test/fixtures/parity/
+├── seed.json                  # Preloaded baseline records
+└── scenarios/
+    ├── health.json            # Liveness contract
+    ├── users-create.json      # POST /users success + validation failures
+    ├── users-read.json        # GET list/detail behavior
+    ├── users-update.json      # PATCH/PUT behavior and validation
+    └── users-delete.json      # DELETE behavior
+```
+
+## WHERE TO LOOK
+
+| Task | Location | Notes |
+|------|----------|-------|
+| Change baseline dataset | `test/fixtures/parity/seed.json` | Sent to seed endpoint before scenarios |
+| Add user create cases | `test/fixtures/parity/scenarios/users-create.json` | Includes success and invalid payload paths |
+| Adjust read contracts | `test/fixtures/parity/scenarios/users-read.json` | Collection/detail response expectations |
+| Adjust update contracts | `test/fixtures/parity/scenarios/users-update.json` | Mutation behavior and error shape checks |
+| Adjust delete contracts | `test/fixtures/parity/scenarios/users-delete.json` | Deletion idempotency/not-found behavior |
+| Health contract | `test/fixtures/parity/scenarios/health.json` | Minimal service readiness check |
+
+
+## CONVENTIONS
+- Each file is a JSON array of scenarios with `name`, `request`, and `response` blocks.
+- `request.method` defaults to `GET` only if omitted by runner logic.
+- `response.status` is mandatory and exact.
+- Use matcher tokens where values are dynamic: `@any_number`, `@is_iso8601`.
+- Tokens can appear as full values or embedded inside strings (for example `/users/@any_number`).
+- Fixture schema sanity is now tested in `cmd/parity-test/main_test.go` (`TestParityFixtures_AreWellFormed`).
+
+## ANTI-PATTERNS (THIS DIRECTORY)
+- Do not encode implementation-specific IDs/timestamps as fixed literals when token matchers are intended.
+- Do not mix unrelated endpoint domains into one scenario file; keep endpoint-grouped fixture files.
+- Do not change expected error envelope casually; parity compares API contract, not internal implementation details.
+
+## NOTES
+- `docs/design/002-api-parity-contract.md` documents the intent behind these fixtures; update both when changing contract semantics.
+- `scripts/run-single.sh` parity-gates benchmark execution using these fixtures before any load run.