Skip to content

Commit eae49c7

Browse files
committed
Phase 7: add semantic run quality checks and reports
1 parent 0f901c6 commit eae49c7

10 files changed

Lines changed: 547 additions & 14 deletions

CONTRIBUTING.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,4 +46,5 @@ python -m unittest discover -s tests -p 'test_*.py'
4646
```bash
4747
python tools/verify_run_artifacts.py .task_runs/<run_id>
4848
```
49-
6. Update docs/runbook when workflow or guardrails change.
49+
6. Confirm `quality_report.json` is present and status is not `FAIL`.
50+
7. Update docs/runbook when workflow or guardrails change.

MIGRATION_PLAN.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ This plan migrates behavioral task code out of the original monorepo into `RPi4_
1111
5. Phase 4: completed (production guardrails, runbook, and contributor boundaries).
1212
6. Phase 5: completed (optional tag-strict production lock and CI automation).
1313
7. Phase 6: completed (artifact integrity validation and verifier CLI).
14+
8. Phase 7: completed (semantic run-quality checks and quality reports).
1415

1516
Inputs agreed during planning:
1617
- Priority: architectural cleanup over direct code copy.
@@ -191,6 +192,11 @@ Validation:
191192
2. Add a CLI verification tool for existing run directories.
192193
3. Add tests covering valid and invalid artifact structures.
193194

195+
### Phase 7: Semantic Run Quality Gates
196+
1. Add protocol-aware semantic quality checks across events and results.
197+
2. Write a `quality_report.json` artifact per run.
198+
3. Enforce quality failures as run failures unless explicitly disabled in debug mode.
199+
194200
## Parity and Test Strategy
195201
### Deterministic parity
196202
Use fixed seeds and fixed presets to compare:

README.md

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ Behavioral task protocols for RPi4 behavior boxes, separated from hardware suppo
44

55
## Current status
66
Phase 0 scaffolding plus Phase 1/2 baselines are in place, and Phase 3
7-
experimental staging is now wired. Phase 4/5/6 release controls are now available:
7+
experimental staging is now wired. Phase 4/5/6/7 release controls are now available:
88
- Shared protocol contract and runtime modules.
99
- Preflight branch/commit checks.
1010
- User/project namespace under `users/`.
@@ -21,7 +21,10 @@ experimental staging is now wired. Phase 4/5/6 release controls are now availabl
2121
- Shared-checkout operator runbook and contributor ownership guidance.
2222
- CI workflow runs smoke/parity tests on push and pull requests.
2323
- Automatic run-artifact structural validation after each task run.
24-
- Debug-only escape hatch: `--no-validate-artifacts`
24+
- Automatic semantic run-quality checks after each task run.
25+
- Debug-only escape hatches:
26+
- `--no-validate-artifacts`
27+
- `--no-validate-quality`
2528

2629
## Layout
2730
- `protocols/`: maintained shared protocol implementations.

RUNBOOK_SHARED_CHECKOUT.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ Runs are validated automatically after completion. To re-check an existing run:
5959
python tools/verify_run_artifacts.py .task_runs/<run_id>
6060
```
6161
Note: production mode does not allow disabling artifact validation.
62+
Note: production mode does not allow disabling semantic run-quality validation.
6263

6364
## Release cadence recommendation
6465
1. Validate on a staging Pi in debug mode.

run_task.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,12 @@
1111
RunMetadata,
1212
append_event,
1313
create_run_paths,
14+
write_quality_report,
1415
write_result,
1516
write_run_metadata,
1617
)
1718
from runtime.preflight import run_preflight
19+
from runtime.quality_checks import evaluate_run_quality
1820
from runtime.release_policy import DEFAULT_RELEASE_POLICY, ReleasePolicy
1921
from runtime.runner import EXPERIMENTAL_PROTOCOLS, SUPPORTED_PROTOCOLS, run_protocol
2022
from runtime.session_config import build_session_config, load_mouse_info, load_session_template
@@ -88,6 +90,11 @@ def parse_args() -> argparse.Namespace:
8890
action="store_true",
8991
help="Skip run artifact validation (debug-only escape hatch).",
9092
)
93+
parser.add_argument(
94+
"--no-validate-quality",
95+
action="store_true",
96+
help="Skip semantic run quality checks (debug-only escape hatch).",
97+
)
9198
return parser.parse_args()
9299

93100

@@ -120,9 +127,15 @@ def resolve_release_policy(require_release_tag: bool) -> ReleasePolicy:
120127
return replace(DEFAULT_RELEASE_POLICY, require_release_tag_in_production=True)
121128

122129

123-
def validate_runtime_options(run_mode: str, no_validate_artifacts: bool) -> None:
130+
def validate_runtime_options(
131+
run_mode: str,
132+
no_validate_artifacts: bool,
133+
no_validate_quality: bool,
134+
) -> None:
124135
if run_mode == "production" and no_validate_artifacts:
125136
raise ValueError("Artifact validation cannot be disabled in production mode.")
137+
if run_mode == "production" and no_validate_quality:
138+
raise ValueError("Run quality validation cannot be disabled in production mode.")
126139

127140

128141
def main() -> int:
@@ -156,7 +169,11 @@ def main() -> int:
156169

157170
require_confirmation = True if args.run_mode == "production" else (not args.yes)
158171
release_policy = resolve_release_policy(require_release_tag=args.require_release_tag)
159-
validate_runtime_options(run_mode=args.run_mode, no_validate_artifacts=args.no_validate_artifacts)
172+
validate_runtime_options(
173+
run_mode=args.run_mode,
174+
no_validate_artifacts=args.no_validate_artifacts,
175+
no_validate_quality=args.no_validate_quality,
176+
)
160177

161178
git_state = run_preflight(
162179
repo_root=repo_root,
@@ -199,6 +216,17 @@ def emit_event(event_type: str, payload: dict[str, object]) -> None:
199216
joined = "\n".join(f"- {error}" for error in validation_errors)
200217
raise RuntimeError(f"Run artifact validation failed:\n{joined}")
201218

219+
quality_report = evaluate_run_quality(run_paths.run_dir)
220+
write_quality_report(run_paths.quality_report_path, quality_report)
221+
if not args.no_validate_quality and quality_report["status"] == "FAIL":
222+
quality_errors = [
223+
finding["message"]
224+
for finding in quality_report.get("findings", [])
225+
if finding.get("level") == "error"
226+
]
227+
joined = "\n".join(f"- {error}" for error in quality_errors)
228+
raise RuntimeError(f"Run quality validation failed:\n{joined}")
229+
202230
print(f"Run complete: {session.run_id}")
203231
print(f"Protocol: {session.protocol}")
204232
print(f"Output directory: {run_paths.run_dir}")

runtime/logging_schema.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class RunPaths:
3131
metadata_path: Path
3232
events_path: Path
3333
result_path: Path
34+
quality_report_path: Path
3435

3536

3637

@@ -43,6 +44,7 @@ def create_run_paths(output_root: Path, run_id: str) -> RunPaths:
4344
metadata_path=run_dir / "run_metadata.json",
4445
events_path=run_dir / "events.jsonl",
4546
result_path=run_dir / "result.json",
47+
quality_report_path=run_dir / "quality_report.json",
4648
)
4749

4850

@@ -67,3 +69,9 @@ def append_event(path: Path, event_type: str, payload: dict[str, object], timest
6769
def write_result(path: Path, result: dict[str, Any]) -> None:
6870
with path.open("w", encoding="utf-8") as handle:
6971
json.dump(result, handle, indent=2, sort_keys=True)
72+
73+
74+
75+
def write_quality_report(path: Path, report: dict[str, Any]) -> None:
76+
with path.open("w", encoding="utf-8") as handle:
77+
json.dump(report, handle, indent=2, sort_keys=True)

0 commit comments

Comments
 (0)