diff --git a/case/evidence_index.sample.json b/case/evidence_index.sample.json new file mode 100644 index 0000000..4022d2e --- /dev/null +++ b/case/evidence_index.sample.json @@ -0,0 +1,59 @@ +{ + "case_id": "my_real_case", + "generated_at": "2025-11-21T00:00:00Z", + "notes": "Sample evidence index. Replace with real export from validator.", + "evidence": [ + { + "id": "CUST-002", + "title": "Blocked Sunday phone call", + "category": "custody", + "priority": 1, + "description": "Other parent blocked scheduled Sunday phone call with children.", + "sources": { "csv": true, "stickies": true, "timeline": true }, + "timeline_events": [ + { + "date": "2024-03-10", + "label": "Sunday call blocked", + "note": "Call time agreed; call not answered or blocked.", + "source": "timeline" + } + ], + "files": [ + { + "path": "DivorceFiles/CL-xxx_2024-03-10_call-log.pdf", + "hash_sha256": "", + "exhibit_label": null + } + ], + "tags": ["phone_access", "interference", "pattern"] + } + ], + "stickies": [ + { + "id": "STICKY-001", + "evidence_id": "CUST-002", + "date": "2024-03-10", + "note": "Wife blocked Sunday call.", + "theme": "custody", + "priority": 1 + } + ], + "timeline": [ + { + "id": "EVT-2024-03-10-CALL", + "date": "2024-03-10", + "label": "Sunday call blocked", + "category": "custody", + "priority": 1, + "details": "Scheduled Sunday phone contact did not occur.", + "evidence_ids": ["CUST-002"], + "source": "csv+sticky+timeline" + } + ], + "unreferenced_ids": [ + { + "id": "CUST-801", + "reason": "Defined in CSV but not referenced in stickies or timeline." + } + ] +} \ No newline at end of file diff --git a/case/evidence_index.schema.json b/case/evidence_index.schema.json new file mode 100644 index 0000000..e4777be --- /dev/null +++ b/case/evidence_index.schema.json @@ -0,0 +1,173 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "ProSe Evidence Index", + "description": "Canonical evidence index schema for ProSe Case Manager.", + "type": "object", + "required": ["case_id", "evidence"], + "properties": { + "case_id": { + "type": "string", + "description": "Internal identifier for the case." + }, + "generated_at": { + "type": "string", + "format": "date-time", + "description": "Timestamp when this index was generated." + }, + "notes": { + "type": "string", + "description": "Optional human-readable notes about this snapshot." + }, + "evidence": { + "type": "array", + "description": "Master list of evidence items for the case.", + "items": { + "type": "object", + "required": ["id", "title", "category"], + "properties": { + "id": { + "type": "string", + "description": "Evidence ID such as CUST-001 or SAFE-002." + }, + "title": { + "type": "string", + "description": "Short title for this evidence item." + }, + "category": { + "type": "string", + "description": "High-level category (e.g. custody, safety, procedural)." + }, + "priority": { + "type": "number", + "description": "Priority score or tier. Lower = more important." + }, + "description": { + "type": "string", + "description": "Longer factual description of what this evidence shows." + }, + "sources": { + "type": "object", + "description": "Which input systems reference this ID.", + "properties": { + "csv": { "type": "boolean" }, + "stickies": { "type": "boolean" }, + "timeline": { "type": "boolean" } + }, + "additionalProperties": false + }, + "timeline_events": { + "type": "array", + "description": "Specific dated events associated with this evidence.", + "items": { + "type": "object", + "properties": { + "date": { + "type": "string", + "format": "date", + "description": "YYYY-MM-DD" + }, + "label": { + "type": "string", + "description": "Short label for the event." + }, + "note": { + "type": "string", + "description": "Optional extra details about the event." + }, + "source": { + "type": "string", + "description": "Where this event came from (timeline, sticky, etc.)." + } + }, + "additionalProperties": false + } + }, + "files": { + "type": "array", + "description": "Physical or digital files backing this evidence.", + "items": { + "type": "object", + "properties": { + "path": { + "type": "string", + "description": "Relative path to file (e.g. DivorceFiles/CL-001_xxx.pdf)." + }, + "hash_sha256": { + "type": "string", + "description": "Optional SHA-256 hash for integrity checks." + }, + "exhibit_label": { + "type": ["string", "null"], + "description": "Court exhibit label once assigned." + } + }, + "additionalProperties": false + } + }, + "tags": { + "type": "array", + "description": "Searchable tags summarizing themes or issues.", + "items": { "type": "string" } + } + }, + "additionalProperties": false + } + }, + "stickies": { + "type": "array", + "description": "Sticky-note style fact snippets linked to evidence IDs.", + "items": { + "type": "object", + "properties": { + "id": { "type": "string" }, + "evidence_id": { "type": "string" }, + "date": { + "type": "string", + "format": "date" + }, + "note": { "type": "string" }, + "theme": { "type": "string" }, + "priority": { "type": "number" } + }, + "additionalProperties": false + } + }, + "timeline": { + "type": "array", + "description": "Chronological events referencing evidence IDs.", + "items": { + "type": "object", + "properties": { + "id": { "type": "string" }, + "date": { + "type": "string", + "format": "date" + }, + "label": { "type": "string" }, + "category": { "type": "string" }, + "priority": { "type": "number" }, + "details": { "type": "string" }, + "evidence_ids": { + "type": "array", + "items": { "type": "string" } + }, + "source": { "type": "string" } + }, + "additionalProperties": false + } + }, + "unreferenced_ids": { + "type": "array", + "description": "Evidence IDs defined but not referenced anywhere yet.", + "items": { + "type": "object", + "properties": { + "id": { "type": "string" }, + "reason": { "type": "string" } + }, + "additionalProperties": false + } + } + }, + "additionalProperties": false +} \ No newline at end of file diff --git a/docs/EVIDENCE_INDEX.md b/docs/EVIDENCE_INDEX.md new file mode 100644 index 0000000..6f3f964 --- /dev/null +++ b/docs/EVIDENCE_INDEX.md @@ -0,0 +1,95 @@ +# ProSe Evidence Index + +The Evidence Index is the **single source of truth** for a case’s structured evidence in ProSe. + +It combines: + +- the master evidence list (`Custody_Mod_Evidence.csv`) +- sticky-note style facts (`sticky_index.json`) +- timeline events (`timeline.csv`) + +into one canonical JSON document that the Case Manager can safely consume. + +--- + +## Goals + +- Ensure every `Evidence_ID` is: + - declared once, + - referenced consistently, + - and never silently “drifts.” +- Provide a stable input for: + - timeline generation, + - affidavit drafting, + - motion templates, + - and audit reports. + +--- + +## Files + +- `case/evidence_index.schema.json` + JSON Schema definition for the index. + +- `case/evidence_index.sample.json` + Example instance of the index, populated with sample data. + +- `engine/agents/evidence_validator.py` + Module that cross-checks the CSV/JSON/timeline source files. + +--- + +## Source Inputs + +The validator expects a case directory (e.g. `my_real_case/`) containing: + +- `Custody_Mod_Evidence.csv` + - Must have a column: `Evidence_ID` +- `sticky_index.json` + - List of objects, each with `evidence_ids: [ ... ]` +- `timeline.csv` + - Column `Evidence_IDs` with `;`-separated IDs + +These three are treated as the “donor truth” that gets normalized into the Evidence Index. + +--- + +## Validation Logic + +For a given case directory: + +1. Load all `Evidence_ID` values from the CSV. +2. Load all `evidence_ids` from stickies. +3. Load all `Evidence_IDs` from the timeline. +4. Compute: + - IDs used in stickies but not in CSV (`unknown_in_stickies`) + - IDs used in timeline but not in CSV (`unknown_in_timeline`) + - IDs in CSV but not referenced anywhere (`unused_evidence`) +5. Return a structured result and human-readable report. + +If there are no unknown IDs in stickies or timeline, status is `OK`. +Otherwise, status is `WARN`. + +--- + +## Case Manager Integration + +The Case Manager can: + +- Call `validate_case(base_path)` from `engine.agents.evidence_validator`. +- Inspect the returned dict to: + - block or warn before generating court-facing documents, + - highlight missing or inconsistent IDs, + - propose follow-up tasks (“map CUST-801 into the timeline”). + +Later, the same data can be used to **export a full `evidence_index.json`** instance that conforms to `evidence_index.schema.json`. + +--- + +## Next Steps + +- Implement an exporter that writes a valid `evidence_index.json` from the three source files. +- Add tests where: + - CSV, stickies, and timeline all agree (status `OK`). + - Known mismatches are present (status `WARN`) and are correctly reported. +- Wire this into the Case Manager’s “pre-flight check” before drafting motions or affidavits. diff --git a/engine/__init__.py b/engine/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/engine/agents/__init__.py b/engine/agents/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/engine/agents/evidence_validator.py b/engine/agents/evidence_validator.py new file mode 100644 index 0000000..5c698fe --- /dev/null +++ b/engine/agents/evidence_validator.py @@ -0,0 +1,166 @@ +""" +Evidence validator for ProSe. + +Cross-checks: +- Custody_Mod_Evidence.csv (master evidence list) +- sticky_index.json (sticky notes referencing evidence_ids) +- timeline.csv (timeline events with Evidence_IDs field) + +and reports: +- unknown IDs in stickies/timeline +- unused evidence IDs in the CSV +""" + +from __future__ import annotations + +import csv +import json +from dataclasses import dataclass, asdict +from pathlib import Path +from typing import Dict, List, Set + + +CUSTODY_CSV_NAME = "Custody_Mod_Evidence.csv" +STICKY_JSON_NAME = "sticky_index.json" +TIMELINE_CSV_NAME = "timeline.csv" + + +@dataclass +class EvidenceValidationResult: + evidence_count: int + sticky_count: int + timeline_count: int + unknown_in_stickies: List[str] + unknown_in_timeline: List[str] + unused_evidence: List[str] + status: str # "OK" or "WARN" + + +def _load_evidence_ids(base: Path) -> Set[str]: + path = base / CUSTODY_CSV_NAME + ids: Set[str] = set() + + if not path.exists(): + print(f"[evidence_validator] Warning: {path} not found") + return ids + + with path.open(newline="", encoding="utf-8") as f: + reader = csv.DictReader(f) + for row in reader: + eid = (row.get("Evidence_ID") or "").strip() + if eid: + ids.add(eid) + return ids + + +def _load_sticky_ids(base: Path) -> Set[str]: + path = base / STICKY_JSON_NAME + ids: Set[str] = set() + + if not path.exists(): + return ids + + data = json.loads(path.read_text(encoding="utf-8")) + # expect a list of objects with "evidence_ids": [...] + for sticky in data: + for eid in sticky.get("evidence_ids", []): + eid_clean = (eid or "").strip() + if eid_clean: + ids.add(eid_clean) + return ids + + +def _load_timeline_ids(base: Path) -> Set[str]: + path = base / TIMELINE_CSV_NAME + ids: Set[str] = set() + + if not path.exists(): + return ids + + with path.open(newline="", encoding="utf-8") as f: + reader = csv.DictReader(f) + for row in reader: + field = row.get("Evidence_IDs") or "" + for eid in [x.strip() for x in field.split(";") if x.strip()]: + ids.add(eid) + return ids + + +def validate_case(base: Path) -> Dict[str, object]: + """ + Validate evidence links for a given case directory. + + :param base: Path to directory containing Custody_Mod_Evidence.csv, + sticky_index.json, timeline.csv + :return: dict suitable for JSON or further processing. + """ + base = Path(base) + + evidence_ids = _load_evidence_ids(base) + sticky_ids = _load_sticky_ids(base) + timeline_ids = _load_timeline_ids(base) + + unknown_in_stickies = sticky_ids - evidence_ids + unknown_in_timeline = timeline_ids - evidence_ids + unused_evidence = evidence_ids - sticky_ids - timeline_ids + + status = "OK" + if unknown_in_stickies or unknown_in_timeline: + status = "WARN" + + result = EvidenceValidationResult( + evidence_count=len(evidence_ids), + sticky_count=len(sticky_ids), + timeline_count=len(timeline_ids), + unknown_in_stickies=sorted(unknown_in_stickies), + unknown_in_timeline=sorted(unknown_in_timeline), + unused_evidence=sorted(unused_evidence), + status=status, + ) + return asdict(result) + + +def print_report(result: Dict[str, object]) -> None: + """ + Pretty-print validation results to the console. + """ + print(f"Evidence IDs in CSV: {result['evidence_count']}") + print(f"Referenced in stickies: {result['sticky_count']}") + print(f"Referenced in timeline: {result['timeline_count']}") + + unknown_in_stickies = result["unknown_in_stickies"] + unknown_in_timeline = result["unknown_in_timeline"] + unused_evidence = result["unused_evidence"] + + if unknown_in_stickies: + print("\n⚠ Unknown Evidence_IDs in stickies (not in CSV):") + for eid in unknown_in_stickies: + print(f" - {eid}") + + if unknown_in_timeline: + print("\n⚠ Unknown Evidence_IDs in timeline (not in CSV):") + for eid in unknown_in_timeline: + print(f" - {eid}") + + if unused_evidence: + print("\nℹ Evidence_IDs in CSV not referenced yet (fine, but FYI):") + for eid in unused_evidence: + print(f" - {eid}") + + if not (unknown_in_stickies or unknown_in_timeline): + print("\n✅ Links look consistent. Nice work.") + else: + print("\n⚠ Validation completed with warnings. Review above items.") + + +if __name__ == "__main__": + # CLI usage: python -m engine.agents.evidence_validator my_real_case + import sys + + if len(sys.argv) > 1: + base_dir = Path(sys.argv[1]) + else: + base_dir = Path("my_real_case") + + result_dict = validate_case(base_dir) + print_report(result_dict) diff --git a/tests/core/test_evidence_validator.py b/tests/core/test_evidence_validator.py new file mode 100644 index 0000000..fd45837 --- /dev/null +++ b/tests/core/test_evidence_validator.py @@ -0,0 +1,20 @@ +from pathlib import Path + +from engine.agents.evidence_validator import validate_case + + +def test_empty_case_directory(tmp_path: Path) -> None: + """ + With no CSV/JSON/Timeline files present, + the validator should not crash and should + report zero counts and OK status. + """ + result = validate_case(tmp_path) + + assert result["evidence_count"] == 0 + assert result["sticky_count"] == 0 + assert result["timeline_count"] == 0 + assert result["unknown_in_stickies"] == [] + assert result["unknown_in_timeline"] == [] + assert result["unused_evidence"] == [] + assert result["status"] == "OK"