diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..23b2f03 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,37 @@ +--- +name: Bug Report +about: Report a bug or issue +title: '[BUG] ' +labels: bug +assignees: '' +--- + +## πŸ› Bug Description +A clear and concise description of what the bug is. + +## πŸ“‹ Steps to Reproduce +1. Go to '...' +2. Click on '...' +3. See error + +## βœ… Expected Behavior +What you expected to happen. + +## ❌ Actual Behavior +What actually happened. + +## πŸ–ΌοΈ Screenshots +If applicable, add screenshots to help explain your problem. + +## 🌍 Environment +- OS: [e.g. Windows, macOS, Linux] +- Version: [e.g. 1.0.0] +- Other relevant details + +## πŸ“ Additional Context +Add any other context about the problem here. + +## βœ”οΈ Before Submitting +- [ ] I've searched existing issues to avoid duplicates +- [ ] I've provided clear steps to reproduce +- [ ] I've included relevant environment details diff --git a/.github/ISSUE_TEMPLATE/cleanup_request.md b/.github/ISSUE_TEMPLATE/cleanup_request.md new file mode 100644 index 0000000..791ed03 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/cleanup_request.md @@ -0,0 +1,39 @@ +--- +name: Cleanup Request +about: Report clutter, unused code, or organization issues +title: '[CLEANUP] ' +labels: cleanup, maintenance +assignees: '' +--- + +## 🧹 Cleanup Description +What needs to be cleaned up or reorganized? + +## πŸ“ Location +Where in the repository is the issue? +- Path: +- Files/directories affected: + +## πŸ” Issue Type +- [ ] Backup files (*.bak, *.old, etc.) +- [ ] Temporary files +- [ ] Unused/dead code +- [ ] Disorganized structure +- [ ] Outdated documentation +- [ ] Other (describe below) + +## πŸ’‘ Suggested Action +How should this be addressed? + +## πŸ“Š Impact +- [ ] Low - Minor cleanup +- [ ] Medium - Affects repository organization +- [ ] High - Significant clutter or disorganization + +## πŸ“ Additional Details +Any other information about the cleanup needed. + +## βœ”οΈ Before Submitting +- [ ] I've verified this is actually clutter/needs cleanup +- [ ] I've provided specific location information +- [ ] I've suggested how to address it diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..e785cb9 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,30 @@ +--- +name: Feature Request +about: Suggest a new feature or enhancement +title: '[FEATURE] ' +labels: enhancement +assignees: '' +--- + +## πŸš€ Feature Description +A clear and concise description of the feature you'd like to see. + +## 🎯 Problem/Motivation +What problem does this feature solve? Why is it needed? + +## πŸ’‘ Proposed Solution +How would you like this feature to work? + +## πŸ”„ Alternatives Considered +Have you considered any alternative solutions or features? + +## πŸ“Š Impact +Who will benefit from this feature? How will it improve the project? + +## πŸ“ Additional Context +Add any other context, mockups, or examples about the feature request. + +## βœ”οΈ Before Submitting +- [ ] I've searched existing issues to avoid duplicates +- [ ] This feature aligns with the project's goals +- [ ] I've clearly described the problem and solution diff --git a/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md b/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md new file mode 100644 index 0000000..448438a --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md @@ -0,0 +1,95 @@ +## πŸ“‹ Pull Request Description + +### What does this PR do? + + +### Why is this change needed? + + +### Related Issues + + +## πŸ§ͺ Testing + +### How has this been tested? + +- [ ] Unit tests added/updated +- [ ] Integration tests added/updated +- [ ] Manual testing completed +- [ ] All tests pass + +### Test Coverage + + +## 🧹 Cleanliness Checklist + +**CRITICAL**: Ensure your PR maintains repository cleanliness standards + +- [ ] **No backup files** (*.bak, *.old, *.backup, etc.) +- [ ] **No temporary files** (*.tmp, temp/, tmp/) +- [ ] **No commented-out code** (delete, don't comment) +- [ ] **No debug statements** (console.log, print, etc.) +- [ ] **No unused imports** or variables +- [ ] **No personal IDE configs** (.vscode/, .idea/, etc.) +- [ ] **No build artifacts** (dist/, build/, compiled files) +- [ ] **No sensitive data** (keys, tokens, passwords) +- [ ] **All files in correct directories** (src/, tests/, docs/, etc.) + +## πŸ“ Code Quality Checklist + +- [ ] Code follows project style guidelines +- [ ] Functions/methods are documented +- [ ] Complex logic has explanatory comments +- [ ] Error handling is appropriate +- [ ] Code is DRY (Don't Repeat Yourself) + +## πŸ“š Documentation + +- [ ] README updated (if needed) +- [ ] Documentation added/updated for new features +- [ ] Examples added/updated (if applicable) +- [ ] CHANGELOG updated (if applicable) + +## πŸ” Review Checklist + +Before requesting review, ensure: + +- [ ] Self-reviewed all changes +- [ ] Commit messages are clear and descriptive +- [ ] Branch is up to date with main +- [ ] No merge conflicts +- [ ] CI/CD checks pass (if applicable) + +## πŸ“Š Impact Assessment + +### Breaking Changes +- [ ] This PR includes breaking changes +- [ ] Migration guide provided (if breaking changes) + +### Performance Impact + + +### Security Considerations + + +## πŸ“Έ Screenshots/Recordings + + +## πŸ’¬ Additional Notes + + +--- + +## βœ… Final Confirmation + +By submitting this PR, I confirm that: + +- [ ] I have read and followed the [CONTRIBUTING.md](../CONTRIBUTING.md) guidelines +- [ ] I have read and accept the [CODE_OF_CONDUCT.md](../CODE_OF_CONDUCT.md) +- [ ] My code is clean, tested, and ready for review +- [ ] I have removed all clutter, backup, and temporary files +- [ ] This PR maintains the high standards of the ProSe repository + +--- + +**Ready for review!** πŸš€ diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..14a9d2b --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,29 @@ +name: ProSe CI + +on: [push, pull_request] + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Lint with flake8 + run: | + pip install flake8 + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Run tests + run: | + # Placeholder for running tests + # python -m pytest + echo "No tests to run yet" diff --git a/.github/workflows/cleanliness-check.yml b/.github/workflows/cleanliness-check.yml new file mode 100644 index 0000000..a938869 --- /dev/null +++ b/.github/workflows/cleanliness-check.yml @@ -0,0 +1,123 @@ +name: Repository Cleanliness Check + +on: + pull_request: + branches: [ main ] + push: + branches: [ main ] + workflow_dispatch: + +jobs: + cleanliness-check: + runs-on: ubuntu-latest + name: Check for Clutter and Repository Health + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Check for backup files + run: | + echo "πŸ” Checking for backup files..." + BACKUP_FILES=$(find . -type f \( -name "*.bak" -o -name "*.old" -o -name "*.backup" -o -name "*~" \) ! -path "./.git/*" || true) + if [ -n "$BACKUP_FILES" ]; then + echo "❌ Found backup files:" + echo "$BACKUP_FILES" + exit 1 + else + echo "βœ… No backup files found" + fi + + - name: Check for temporary files + run: | + echo "πŸ” Checking for temporary files..." + TEMP_FILES=$(find . -type f \( -name "*.tmp" -o -name "*.temp" \) ! -path "./.git/*" || true) + if [ -n "$TEMP_FILES" ]; then + echo "❌ Found temporary files:" + echo "$TEMP_FILES" + exit 1 + else + echo "βœ… No temporary files found" + fi + + - name: Check for OS-generated files + run: | + echo "πŸ” Checking for OS-generated files..." + OS_FILES=$(find . -type f \( -name ".DS_Store" -o -name "Thumbs.db" -o -name "desktop.ini" \) ! -path "./.git/*" || true) + if [ -n "$OS_FILES" ]; then + echo "❌ Found OS-generated files:" + echo "$OS_FILES" + exit 1 + else + echo "βœ… No OS-generated files found" + fi + + - name: Check for IDE configuration files + run: | + echo "πŸ” Checking for IDE configuration directories..." + IDE_CONFIGS=$(find . -type d \( -name ".vscode" -o -name ".idea" \) ! -path "./.git/*" || true) + if [ -n "$IDE_CONFIGS" ]; then + echo "⚠️ Warning: Found IDE configuration directories:" + echo "$IDE_CONFIGS" + echo "These should typically be in .gitignore" + # Not failing on this, just warning + else + echo "βœ… No IDE configuration directories found" + fi + + - name: Check for backup directories + run: | + echo "πŸ” Checking for backup directories..." + BACKUP_DIRS=$(find . -type d \( -name "backup" -o -name "backups" -o -name "old" -o -name "_old" \) ! -path "./.git/*" || true) + if [ -n "$BACKUP_DIRS" ]; then + echo "❌ Found backup directories:" + echo "$BACKUP_DIRS" + exit 1 + else + echo "βœ… No backup directories found" + fi + + - name: Run repository health audit + run: | + echo "πŸ₯ Running repository health audit..." + chmod +x scripts/audit.sh + bash scripts/audit.sh || true + + - name: Check repository structure + run: | + echo "πŸ“ Checking repository structure..." + + # Check essential directories exist + for dir in "src" "tests" "docs" "examples" "scripts"; do + if [ -d "$dir" ]; then + echo "βœ… $dir/ exists" + else + echo "⚠️ Warning: $dir/ directory missing" + fi + done + + # Check essential files exist + for file in "README.md" ".gitignore" "CONTRIBUTING.md" "CODE_OF_CONDUCT.md"; do + if [ -f "$file" ]; then + echo "βœ… $file exists" + else + echo "❌ $file missing" + exit 1 + fi + done + + - name: Summary + run: | + echo "" + echo "=================================" + echo "βœ… Repository cleanliness check passed!" + echo "=================================" + echo "" + echo "This repository maintains high standards:" + echo " βœ“ No backup files" + echo " βœ“ No temporary files" + echo " βœ“ No OS-generated clutter" + echo " βœ“ Proper directory structure" + echo " βœ“ All essential files present" + echo "" + echo "Keep up the great work! πŸŽ‰" diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..42f7cfd --- /dev/null +++ b/.gitignore @@ -0,0 +1,172 @@ +# ProSe - Comprehensive .gitignore for clean repository management + +# ===== Backup Files ===== +# Prevent backup files from cluttering the repository +*.bak +*.backup +*.old +*.orig +*~ +.*.swp +.*.swo +*.backup.* +backup/ +backups/ +old/ +_old/ +.old/ + +# ===== OS Generated Files ===== +# macOS +.DS_Store +.AppleDouble +.LSOverride +._* +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Windows +Thumbs.db +ehthumbs.db +Desktop.ini +$RECYCLE.BIN/ +*.lnk + +# Linux +.directory +.Trash-* + +# ===== Editor/IDE Files ===== +# VSCode +.vscode/ +*.code-workspace + +# JetBrains IDEs +.idea/ +*.iml +*.iws +*.ipr + +# Sublime Text +*.sublime-project +*.sublime-workspace + +# Vim +[._]*.s[a-v][a-z] +[._]*.sw[a-p] +[._]s[a-rt-v][a-z] +[._]ss[a-gi-z] +[._]sw[a-p] + +# Emacs +*~ +\#*\# +/.emacs.desktop +/.emacs.desktop.lock +*.elc + +# ===== Build Artifacts ===== +# Compiled files +*.com +*.class +*.dll +*.exe +*.o +*.so +*.pyc +*.pyo +__pycache__/ +*.py[cod] +*$py.class + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# ===== Dependencies ===== +# Node +node_modules/ +npm-debug.log* +yarn-debug.log* +yarn-error.log* +package-lock.json +yarn.lock + +# Python +venv/ +env/ +ENV/ +.venv + +# ===== Logs ===== +*.log +logs/ +*.log.* + +# ===== Temporary Files ===== +tmp/ +temp/ +.tmp/ +.temp/ +*.tmp +*.temp + +# ===== Test Coverage ===== +.coverage +.coverage.* +htmlcov/ +.pytest_cache/ +.tox/ +coverage/ +*.cover + +# ===== Security ===== +# Never commit sensitive data +*.key +*.pem +*.p12 +*.cer +*.crt +*.der +.env +.env.* +!.env.example +secrets/ +credentials/ + +# ===== Documentation Build ===== +docs/_build/ +site/ +_site/ + +# ===== Misc ===== +.cache/ +*.pid +*.seed +*.pid.lock +.sass-cache/ + +# ProSe Directories +_INBOX/ +out/ +my_real_case/ diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..0d261ca --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,107 @@ +# Code of Conduct - ProSe Repository Standards + +## Our Pledge + +We are committed to maintaining a professional, clean, and well-organized repository that serves as an example of best practices in software development. + +## Repository Standards + +### Cleanliness Standards + +1. **No Clutter**: The repository must remain free of: + - Backup files (*.bak, *.old, *.backup) + - Temporary files (*.tmp, temp/, tmp/) + - Unused code or commented-out blocks + - Old versions of files + - Experimental code that isn't production-ready + +2. **Organization**: All files must be in their proper locations: + - Core engine code in `engine/` + - File tools in `file_organizer/` + - Tests in `tests/` + - Documentation in `docs/` + - Scripts in `scripts/` + - Examples in `examples/` + +3. **Quality Over Quantity**: + - Every line of code should have a purpose + - Remove dead code immediately + - Don't keep "just in case" files + - Use git history for old versions + +### Contribution Standards + +1. **Clean Commits**: + - Clear, descriptive commit messages + - Atomic commits (one logical change per commit) + - No commits containing backup or temp files + +2. **Code Quality**: + - Follow existing code style + - Include tests for new features + - Document public APIs + - Remove debug statements before committing + +3. **Pull Request Quality**: + - Self-review your code before submitting + - Ensure all tests pass + - Update documentation + - Clean commit history + +## Enforcement + +### Review Process + +All contributions will be reviewed for: +- βœ… Code quality and functionality +- βœ… Adherence to cleanliness standards +- βœ… Proper documentation +- βœ… Test coverage +- βœ… Absence of clutter files + +### Violations + +Contributions that violate these standards will be: +1. **First offense**: Requested to clean up and resubmit +2. **Repeated offenses**: May result in rejected PRs +3. **Severe violations**: Immediate rejection (e.g., committing sensitive data) + +## Maintenance Responsibilities + +### For Contributors +- Clean up your workspace before committing +- Use `.gitignore` appropriately +- Remove files you don't intend to commit +- Ask if unsure about including a file + +### For Maintainers +- Conduct thorough reviews +- Maintain .gitignore +- Keep documentation updated +- Perform periodic cleanup audits +- Lead by example + +## Periodic Cleanup + +The repository undergoes regular maintenance: +- **Weekly**: Review for stray files +- **Monthly**: Documentation updates +- **Quarterly**: Dependency updates +- **Annually**: Major refactoring if needed + +## Reporting Issues + +If you notice clutter or organization issues: +1. Create an issue with the `cleanup` label +2. Describe what needs attention +3. Suggest improvements if applicable + +## Attribution + +These standards exist to ensure ProSe remains a professional, maintainable repository that serves as a reliable foundation for integration with other projects. + +--- + +**Version**: 1.0 +**Last Updated**: November 2025 +**Maintainer**: ProSe Team diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..5ba5d20 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,216 @@ + +# Contributing to ProSe + +Thank you for contributing to ProSe! This repository is the **clean hub** that pulls in proven pieces from other projects (like ProSe Agent 2 and ProSe File Organizer) and leaves the chaos behind. + +The goal: a repo that’s safe to clone, easy to understand, and doesn’t ship someone’s backups or live case files. + +--- + +## 🎯 Repository Philosophy + +ProSe is the **mainline repo**. Other projects are **donors**. + +- ProSe_Agent2 and ProSe File Organizer are where experiments and one-off scripts can live. +- This repo only receives **clean, intentional imports** from those donors. +- Live case data (real divorce/custody files) must **never** be committed here. + +Think of this repo as the *cathedral*, not the workshop. + +--- + +## πŸ“‹ Before You Contribute + +### ❌ What NOT to include + +Do **not** commit: + +- Backup files (`*.bak`, `*.old`, `*.backup`, timestamps in file names, etc.) +- Temporary files (`*.tmp`, `tmp/`, `temp/`, scratch scripts) +- Unused or dead code + > If it’s not used, delete it. Git history is the archive. +- Old versions of files (e.g., `file_v2_final_final.py`) +- Build artifacts (`dist/`, `build/`, compiled binaries) +- Dependency folders (`node_modules/`, `.venv/`, `venv/`, etc.) +- IDE/editor configs (`.vscode/`, `.idea/`, `.history/`) +- Personal notes, brain-dumps, or TODO text files + > Use Issues or PR descriptions instead. +- Experimental or half-baked features + > Put those on a feature branch or in a donor repo, not `main`. + +--- + +### 🚫 ABSOLUTE HARD LINE + +- **No live case data.** + Do **not** commit any real legal documents, evidence, or personal data. + - No PDFs from actual cases + - No screenshots with names, addresses, or children + - No exported timelines with real dates/parties + +If you need sample data, use redacted or synthetic examples under `examples/`. + +--- + +### βœ… What TO include + +- Clean, production-ready code +- Migrations or refactors that remove complexity or duplication +- Relevant tests for new behavior +- Documentation for new features or public APIs +- Updates to `README.md` or `docs/` for major features +- Clear commit messages explaining **what** and **why** + +--- + +## πŸ”„ Contribution Workflow + +1. **Fork and clone** the repository. + +2. **Create a feature branch** from `main`: + ```bash + git checkout -b feature/your-feature-name + ``` + +3. **Make your changes, keeping donors in mind**: + - If you’re copying from ProSe_Agent2 or PSFO, **clean it first**. + - Strip out debug code, hardcoded paths, and case-specific assumptions. + +4. **Test your changes thoroughly.** + +5. **Clean up before committing**: + ```bash + # Remove common junk files + find . -name "*.bak" -delete + find . -name "*.tmp" -delete + find . -name "*~" -delete + ``` + +6. **Commit with clear messages**: + ```bash + git add . + git commit -m "feat: add timeline summarizer endpoint" + ``` + +7. **Push and open a Pull Request against `main`.** + +--- + +## πŸ“ Pull Request Guidelines + +### PR Title Format (Conventional Commits) +- `feat:` – New feature +- `fix:` – Bug fix +- `docs:` – Documentation change +- `refactor:` – Code refactoring +- `test:` – Adding or updating tests +- `chore:` – Maintenance / tooling + +**Examples:** +- `feat: add affidavit generation API` +- `fix: handle missing timestamps in timeline parser` +- `refactor: extract drive sync client` + +--- + +### PR Description Should Include +- **What** you changed +- **Why** you changed it (problem/motivation) +- **How** to test it +- Any related issues or design docs + +--- + +### Before Submitting a PR +- [ ] Code is clean and follows project style +- [ ] No backup, temp, or junk files +- [ ] All tests pass (`pytest`, `python -m unittest`, etc.) +- [ ] Documentation updated +- [ ] Commit history clean (squash if needed) +- [ ] **No secrets or personal data** + +--- + +## 🧹 Code Cleanup Checklist + +1. **No commented-out code** + If you don’t need it, delete it. Git remembers. + +2. **No debug prints/log spam** + - Remove debugging `print()` + - Use proper logging levels + +3. **No unused imports** + Clean up with your linter or IDE tools. + +4. **No duplicate logic** + Extract helpers for repeated code. + +5. **No TODO comments** + Convert them to GitHub Issues. + +6. **Consistent formatting** + Use project formatters: `black`, `isort`, `ruff`, `prettier`, etc. + +--- + +## πŸ—οΈ Project Structure + +ProSe is trending toward this layout: + +``` +ProSe/ +β”œβ”€β”€ engine/ # Core orchestration (from ProSe_Agent2) +β”‚ β”œβ”€β”€ core/ # engine.py, orchestrator, process wiring +β”‚ └── agents/ # FileAgent, SyncAgent, TimelineAgent, etc. +β”œβ”€β”€ file_organizer/ # Tools imported from PSFO (cleaned) +β”œβ”€β”€ case/ # Local-only examples (never real case data) +β”‚ β”œβ”€β”€ DivorceFiles/ # Example input +β”‚ └── Generated/ # Example outputs +β”œβ”€β”€ docs/ # Documentation, specs, architecture +β”œβ”€β”€ tests/ # Automated tests +β”œβ”€β”€ scripts/ # Utility scripts (no hardcoded paths) +└── README.md # Project overview +``` + +> ⚠️ **REAL case data should live outside the git repo.** +> `case/` is for structure samples only. + +--- + +## πŸ” Code Review Process + +Reviewers check for: +- Code quality + adherence to guidelines +- No clutter (temp files, backups, artifacts) +- Proper tests +- Clear documentation +- Clean commit history +- **Respect for privacy / data boundaries** + +PRs mixing huge refactors + new features may be asked to split. + +--- + +## 🚫 What Will Be Rejected + +PRs will be rejected if they: +- Contain backup, temp, or experimental junk +- Include large binaries without justification +- **Include real personal/legal data** +- Break existing functionality +- Ignore guidelines +- Include API keys, secrets, or credentials + +--- + +## πŸ’‘ Best Practices + +1. **Keep it boring.** Simple code is maintainable. +2. **Write tests as you go.** +3. **Document behavior, not just functions.** +4. **Commit in small pieces.** +5. **Review your changes before pushing.** +6. **Ask questions early** β€” use Issues. + +--- diff --git a/README.md b/README.md index eed3129..829922a 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,212 @@ # ProSe -the main module for Prose' litagints + +**The Main Module for ProSe Litigants** + +[![Code Quality](https://img.shields.io/badge/code%20quality-clean-brightgreen)]() +[![Maintenance](https://img.shields.io/badge/maintained-yes-green)]() +[![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)]() + +ProSe is a clean, well-organized main repository designed to receive and integrate contributions from multiple source repositories while maintaining the highest standards of code quality and organization. + +## 🎯 Project Goals + +- **Maintain Cleanliness**: Zero tolerance for backup files, temporary files, and clutter +- **Organized Structure**: Everything in its proper place +- **Quality Code**: No unused code, no commented-out blocks, no debug statements +- **Easy Integration**: Seamless integration from donor repositories +- **Long-term Maintainability**: Built to last without accumulating technical debt + +## πŸ“ Repository Structure + +``` +ProSe/ +β”œβ”€β”€ .github/ # GitHub templates and workflows +β”‚ β”œβ”€β”€ ISSUE_TEMPLATE/ # Issue templates (bug, feature, cleanup) +β”‚ └── PULL_REQUEST_TEMPLATE/ # PR template with cleanliness checklist +β”œβ”€β”€ docs/ # Documentation +β”‚ β”œβ”€β”€ INTEGRATION.md # Guide for integrating from donor repos +β”‚ └── MAINTENANCE.md # Repository maintenance guide +β”œβ”€β”€ examples/ # Usage examples (when available) +β”œβ”€β”€ scripts/ # Maintenance and utility scripts +β”‚ β”œβ”€β”€ cleanup.sh # Automated cleanup script +β”‚ └── audit.sh # Repository health audit +β”œβ”€β”€ engine/ # Core orchestration (ProSe_Agent2) +β”œβ”€β”€ file_organizer/ # File tools (PSFO) +β”œβ”€β”€ case/ # Local-only examples +β”œβ”€β”€ tests/ # Test files +β”œβ”€β”€ .gitignore # Comprehensive ignore patterns +β”œβ”€β”€ CODE_OF_CONDUCT.md # Repository standards +β”œβ”€β”€ CONTRIBUTING.md # Contribution guidelines +└── README.md # This file +``` + +## πŸš€ Getting Started + +### For Contributors + +1. **Read the Guidelines** + - [CONTRIBUTING.md](CONTRIBUTING.md) - How to contribute cleanly + - [CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md) - Repository standards + +2. **Fork and Clone** + ```bash + git clone https://github.com/cyserman/ProSe.git + cd ProSe + ``` + +3. **Make Your Changes** + - Follow the contribution guidelines + - Keep it clean (no backup files, no temp files) + - Test thoroughly + +4. **Submit a Pull Request** + - Use the PR template + - Ensure all checklist items are complete + +### For Maintainers + +1. **Regular Maintenance** + ```bash + # Run health audit + bash scripts/audit.sh + + # Run cleanup (interactive) + bash scripts/cleanup.sh + ``` + +2. **Integrating from Donor Repositories** + - See [docs/INTEGRATION.md](docs/INTEGRATION.md) for detailed guide + - Use git subtree, cherry-pick, or manual copy + - Always clean during integration + +## 🧹 Keeping It Clean + +### What's NOT Allowed + +- ❌ Backup files (*.bak, *.old, *.backup, *~) +- ❌ Temporary files (*.tmp, temp/, tmp/) +- ❌ Commented-out code +- ❌ Unused imports or functions +- ❌ Debug print statements +- ❌ IDE configuration files +- ❌ Build artifacts +- ❌ Personal notes or TODO files + +### Automated Checks + +Run these scripts before committing: + +```bash +# Check repository health +bash scripts/audit.sh + +# Clean up clutter (interactive) +bash scripts/cleanup.sh +``` + +### Manual Checks + +```bash +# Find backup files +find . -name "*.bak" -o -name "*.old" -o -name "*~" + +# Find temporary files +find . -name "*.tmp" -o -name "*.temp" + +# Check for uncommitted files +git status +``` + +## πŸ“š Documentation + +- **[CONTRIBUTING.md](CONTRIBUTING.md)** - Contribution guidelines with focus on cleanliness +- **[CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md)** - Repository standards and enforcement +- **[docs/MAINTENANCE.md](docs/MAINTENANCE.md)** - Maintenance schedule and procedures +- **[docs/INTEGRATION.md](docs/INTEGRATION.md)** - Integrating code from donor repositories + +## πŸ”§ Maintenance Scripts + +### audit.sh +Performs comprehensive health checks on the repository: +- Checks for clutter (backup, temp, OS files) +- Validates file organization +- Checks for large files +- Verifies essential files exist +- Reports overall repository health + +```bash +bash scripts/audit.sh +``` + +### cleanup.sh +Interactive cleanup script that helps identify and remove: +- Backup files +- Temporary files +- OS-generated files +- Empty directories +- IDE configuration files + +```bash +bash scripts/cleanup.sh +``` + +## 🀝 Contributing + +We welcome contributions! Please ensure your contributions: + +1. **Are Clean** - No backup, temp, or unnecessary files +2. **Are Tested** - Include tests for new features +3. **Are Documented** - Update docs as needed +4. **Follow Guidelines** - Read CONTRIBUTING.md first + +### Quick Contribution Checklist + +Before submitting a PR: + +- [ ] No backup files (*.bak, *.old, etc.) +- [ ] No temporary files (*.tmp, temp/) +- [ ] No commented-out code +- [ ] No debug statements +- [ ] All files in proper directories +- [ ] Tests pass +- [ ] Documentation updated +- [ ] Ran `bash scripts/audit.sh` successfully + +## πŸ“ž Support + +- **Issues**: Use GitHub Issues with appropriate labels +- **Questions**: Create an issue with the `question` label +- **Cleanup Requests**: Use the cleanup issue template + +## πŸ”„ Integrating from Donor Repositories + +This repository is designed to receive contributions from other repositories. See [docs/INTEGRATION.md](docs/INTEGRATION.md) for: + +- Integration methods (git subtree, cherry-pick, manual copy) +- Pre-integration cleanup checklist +- Step-by-step integration workflow +- Directory mapping guidelines +- Documentation requirements + +## πŸ“Š Repository Health + +Run `bash scripts/audit.sh` to check repository health at any time. + +Current standards: +- βœ… Zero backup files +- βœ… Zero temporary files +- βœ… Organized directory structure +- βœ… Comprehensive .gitignore +- βœ… Complete documentation + +## πŸ“œ License + +[Specify your license here] + +## πŸ™ Acknowledgments + +This repository integrates code from multiple sources while maintaining high standards of quality and organization. + +--- + +**Remember**: A clean repository is a maintainable repository! πŸŽ‰ diff --git a/case/README.md b/case/README.md new file mode 100644 index 0000000..a013078 --- /dev/null +++ b/case/README.md @@ -0,0 +1,10 @@ +# Case Examples +This directory contains **example** case structures only. + +## ⚠️ IMPORTANT +**NEVER put real case data here.** +Real case data (PDFs, evidence, timelines) should live in a `my_real_case/` directory outside of this git repository (or in the gitignored `my_real_case/` folder at the root). + +## Structure +- `DivorceFiles/`: Example input structure +- `Generated/`: Example output structure diff --git a/case/evidence_index.json b/case/evidence_index.json new file mode 100644 index 0000000..1a12ff2 --- /dev/null +++ b/case/evidence_index.json @@ -0,0 +1,140 @@ +{ + "case_id": "my_real_case", + "version": 1, + "generated_at": "2025-11-21T00:00:00Z", + "notes": "Single source of truth for evidence items, sticky notes, and timeline events. Replace example items with real data from the Evidence Validator.", + "evidence": [ + { + "id": "CUST-001", + "title": "Missed / ignored calls", + "category": "custody", + "priority": 1, + "description": "Pattern of missed or ignored calls impacting regular contact with the children.", + "sources": { + "csv": true, + "stickies": false, + "timeline": false + }, + "timeline_events": [], + "files": [], + "tags": [ + "phone_access", + "parent-child-contact" + ] + }, + { + "id": "CUST-002", + "title": "Blocked Sunday phone call", + "category": "custody", + "priority": 1, + "description": "Other parent blocked scheduled Sunday phone call with children.", + "sources": { + "csv": true, + "stickies": true, + "timeline": true + }, + "timeline_events": [ + { + "date": "2024-03-10", + "label": "Sunday call blocked", + "note": "Call time agreed; call not answered or blocked. Logged as custody interference.", + "source": "timeline" + } + ], + "files": [ + { + "path": "DivorceFiles/CL-xxx_2024-03-10_call-log.pdf", + "hash_sha256": "", + "exhibit_label": null + } + ], + "tags": [ + "phone_access", + "interference", + "pattern" + ] + }, + { + "id": "SAFE-002", + "title": "Overnight vehicle at marital home", + "category": "safety", + "priority": 2, + "description": "Evidence of another adult’s car staying overnight at the marital home while children present.", + "sources": { + "csv": true, + "stickies": true, + "timeline": true + }, + "timeline_events": [ + { + "date": "2024-05-05", + "label": "Vehicle overnight at marital home", + "note": "Observed vehicle present overnight during custodial time.", + "source": "timeline" + } + ], + "files": [ + { + "path": "DivorceFiles/PH-xxx_2024-05-05_driveway-photo.jpg", + "hash_sha256": "", + "exhibit_label": null + } + ], + "tags": [ + "overnight_guest", + "safety", + "home_environment" + ] + } + ], + "stickies": [ + { + "id": "STICKY-001", + "evidence_id": "CUST-002", + "date": "2024-03-10", + "note": "Wife blocked Sunday call (same date as CUST-002 timeline entry).", + "theme": "custody", + "priority": 1 + }, + { + "id": "STICKY-002", + "evidence_id": "SAFE-002", + "date": "2024-05-05", + "note": "Ricky’s car overnight at marital home.", + "theme": "safety", + "priority": 2 + } + ], + "timeline": [ + { + "id": "EVT-2024-03-10-CALL", + "date": "2024-03-10", + "evidence_ids": ["CUST-002"], + "label": "Sunday call blocked", + "category": "custody", + "priority": 1, + "details": "Scheduled Sunday phone contact did not occur; call was blocked or ignored.", + "source": "csv+sticky+timeline" + }, + { + "id": "EVT-2024-05-05-CAR", + "date": "2024-05-05", + "evidence_ids": ["SAFE-002"], + "label": "Vehicle overnight at marital home", + "category": "safety", + "priority": 2, + "details": "Vehicle associated with other adult stayed overnight at marital home while children present.", + "source": "csv+sticky+timeline" + } + ], + "unreferenced_ids": [ + { + "id": "CUST-801", + "reason": "Defined in CSV but not referenced in stickies or timeline yet." + }, + { + "id": "KIDS-802", + "reason": "Placeholder ID not yet mapped to any event." + } + ] +} diff --git a/docs/INTEGRATION.md b/docs/INTEGRATION.md new file mode 100644 index 0000000..da211b6 --- /dev/null +++ b/docs/INTEGRATION.md @@ -0,0 +1,313 @@ +# Integrating Code from Donor Repositories + +## πŸ“š Overview + +This guide explains how to integrate code from other repositories (donor repos) into ProSe while maintaining cleanliness and organization. + +## 🎯 Integration Philosophy + +When receiving code from other repositories: +- βœ… **Extract value, not clutter** - Only bring over what's needed +- βœ… **Reorganize during integration** - Don't preserve poor organization +- βœ… **Clean as you integrate** - Fix issues during the move +- βœ… **Document the source** - Track where code came from +- ❌ **Don't bulk copy** - Avoid copying entire repos blindly + +## πŸ”„ Integration Methods + +### Method 1: Git Subtree (Recommended for ongoing sync) + +Best when you want to maintain connection to donor repos and receive updates. + +```bash +# Add donor repo as a remote +git remote add donor1 https://github.com/username/donor-repo-1.git +git fetch donor1 + +# Add as subtree to a specific directory +git subtree add --prefix=engine/proseagent2 proseagent2 main --squash + +# Later, pull updates +git subtree pull --prefix=engine/proseagent2 proseagent2 main --squash +``` + +### Method 2: Selective Cherry-Pick (Recommended for one-time imports) + +Best when you want specific commits or features without the history. + +```bash +# Add donor repo as remote +git remote add donor1 https://github.com/username/donor-repo-1.git +git fetch donor1 + +# Cherry-pick specific commits +git cherry-pick + +# Or cherry-pick a range +git cherry-pick .. +``` + +### Method 3: Manual Copy with Cleanup (Recommended for maximum control) + +Best when donor repos are messy and need significant reorganization. + +```bash +# Clone donor repo separately +git clone https://github.com/username/donor-repo-1.git /tmp/donor1 + +# Copy only what you need +cp -r /tmp/donor1/src/* ./engine/core/ +cp -r /tmp/donor1/tests/* ./tests/engine/ + +# Clean up immediately +bash scripts/cleanup.sh + +# Review and commit +git add . +git commit -m "feat: integrate functionality from donor-repo-1" +``` + +## 🧹 Pre-Integration Cleanup Checklist + +Before integrating code from donor repos, prepare it: + +### 1. **Assess What to Import** +- [ ] Identify valuable code/features +- [ ] List files/directories to skip +- [ ] Document purpose of each component +- [ ] Check for dependencies + +### 2. **Clean the Import** +- [ ] Remove backup files (*.bak, *.old) +- [ ] Remove temporary files (*.tmp, temp/) +- [ ] Remove commented-out code +- [ ] Remove debug statements +- [ ] Remove unused imports +- [ ] Remove personal configs + +### 3. **Reorganize for ProSe** +- [ ] Place core engine code in `engine/` +- [ ] Place file tools in `file_organizer/` +- [ ] Place tests in `tests/` +- [ ] Place docs in `docs/` +- [ ] Update paths and imports +- [ ] Ensure naming consistency + +### 4. **Update Documentation** +- [ ] Document what was integrated +- [ ] Update README.md +- [ ] Add examples if needed +- [ ] Note any breaking changes + +## πŸ“‹ Integration Workflow + +### Step-by-Step Process + +1. **Create Integration Branch** + ```bash + git checkout -b integrate/donor-repo-name + ``` + +2. **Add Donor as Remote** (if using git methods) + ```bash + git remote add donor-name + git fetch donor-name + ``` + +3. **Import Code** (choose method above) + - Use subtree, cherry-pick, or manual copy + +4. **Clean Immediately** + ```bash + # Run cleanup script + bash scripts/cleanup.sh + + # Manual review + find . -name "*.bak" -delete + find . -name "*.tmp" -delete + find . -name "*~" -delete + ``` + +5. **Reorganize Files** + ```bash + # Move to proper locations + mv imported-src/* src/donor-name/ + mv imported-tests/* tests/donor-name/ + mv imported-docs/* docs/donor-name/ + ``` + +6. **Fix Imports and Paths** + - Update all import statements + - Fix file paths + - Update configuration files + +7. **Test Thoroughly** + ```bash + # Run all tests + # Build if applicable + # Manual testing + ``` + +8. **Document Integration** + - Create integration notes in `docs/integrations/` + - Update main README + - Document any changes needed + +9. **Run Quality Checks** + ```bash + bash scripts/audit.sh + ``` + +10. **Commit Clean Changes** + ```bash + git add . + git commit -m "feat: integrate from + + - Imported: + - Cleaned: + - Reorganized: + + Source: + Original commit: " + ``` + +11. **Create Pull Request** + - Use PR template + - Document integration thoroughly + - Request thorough review + +## πŸ—ΊοΈ Directory Mapping + +Map donor repo structure to ProSe structure: + +### Example Mapping + +``` +Donor Repo β†’ ProSe +───────────────────────────────────────────── +ProSe_Agent2 (Core Logic) β†’ engine/ +ProSe-File-Organizer β†’ file_organizer/ +donor1/test/ β†’ tests/donor1/ +donor1/documentation/ β†’ docs/donor1/ +``` + +### Consolidation Rules + +- **Similar functionality** β†’ Merge into existing directories +- **New modules** β†’ Create new organized structure +- **Utilities** β†’ Evaluate if needed, consolidate if possible +- **Tests** β†’ Keep separate by source initially + +## πŸ“ Documentation Requirements + +For each integration, create a file in `docs/integrations/`: + +```markdown +# Integration: [Donor Repo Name] + +**Date**: YYYY-MM-DD +**Integrated by**: @username +**Source**: [repo-url] +**Commit/Tag**: [commit-hash or tag] + +## What Was Integrated + +- Feature/module 1 +- Feature/module 2 +- ... + +## What Was NOT Integrated + +- Old backup files +- Experimental code +- Personal configurations +- ... + +## Changes Made + +- Reorganized [files] to [new location] +- Removed [deprecated code] +- Updated [imports/paths] +- ... + +## Migration Notes + +Any special notes for using the integrated code. + +## Dependencies Added + +List any new dependencies and why they're needed. + +## Testing + +How the integration was tested. +``` + +## 🚫 What NOT to Import + +Never import from donor repos: + +- ❌ Backup files (*.bak, *.old, etc.) +- ❌ Temporary files (*.tmp, temp/) +- ❌ IDE configurations (.vscode/, .idea/) +- ❌ Build artifacts (dist/, build/) +- ❌ Dependencies (node_modules/, venv/) +- ❌ Personal notes or TODO files +- ❌ Commented-out code +- ❌ Experimental/unfinished code +- ❌ Duplicate functionality +- ❌ Unused utilities + +## πŸ”§ Post-Integration Tasks + +After integration: + +1. **Update .gitignore** if needed +2. **Update dependencies** if new ones added +3. **Update CI/CD** if needed +4. **Update documentation** +5. **Announce integration** to team +6. **Archive donor repo** if no longer needed + +## πŸŽ“ Best Practices + +### DO: +βœ… Review all code before importing +βœ… Clean as you integrate +βœ… Test thoroughly after integration +βœ… Document what and why +βœ… Keep integration commits focused +βœ… Maintain git history (when relevant) + +### DON'T: +❌ Bulk import without review +❌ Preserve bad organization +❌ Skip testing +❌ Leave cleanup for later +❌ Import everything blindly +❌ Lose track of source + +## πŸ†˜ Troubleshooting + +### Issue: Too many conflicts during merge +**Solution**: Use manual copy method instead + +### Issue: Donor repo is too messy +**Solution**: Manual copy with aggressive cleanup + +### Issue: Don't want donor repo history +**Solution**: Use `--squash` with subtree or manual copy + +### Issue: Need to track multiple donor repos +**Solution**: Use git subtree with separate prefixes + +## πŸ“ž Questions? + +If you're unsure about how to integrate: +1. Create an issue with the `integration` label +2. Describe what you want to integrate +3. Ask for guidance from maintainers + +--- + +**Remember**: It's better to integrate slowly and cleanly than quickly and messily! diff --git a/docs/MAINTENANCE.md b/docs/MAINTENANCE.md new file mode 100644 index 0000000..b52b898 --- /dev/null +++ b/docs/MAINTENANCE.md @@ -0,0 +1,232 @@ +# ProSe Repository Maintenance Guide + +## 🎯 Purpose + +This guide helps maintainers keep the ProSe repository clean, organized, and free from clutter. + +## πŸ“… Maintenance Schedule + +### Daily +- Review new PRs for cleanliness standards +- Check for any accidentally committed backup/temp files + +### Weekly +- Run cleanup audit script +- Review open issues +- Update documentation if needed + +### Monthly +- Dependency updates (if applicable) +- Documentation review and updates +- Archive old branches + +### Quarterly +- Comprehensive repository audit +- Review and update .gitignore +- Update contributing guidelines if needed + +## 🧹 Cleanup Procedures + +### Finding Clutter Files + +Use these commands to find potential clutter: + +```bash +# Find backup files +find . -type f \( -name "*.bak" -o -name "*.old" -o -name "*.backup" -o -name "*~" \) + +# Find temporary files +find . -type f \( -name "*.tmp" -o -name "*.temp" \) + +# Find backup directories +find . -type d \( -name "backup" -o -name "backups" -o -name "old" -o -name "_old" \) + +# Find large files that might be artifacts +find . -type f -size +1M ! -path "./.git/*" + +# Find common IDE config files that shouldn't be committed +find . -type d \( -name ".vscode" -o -name ".idea" \) +``` + +### Removing Clutter + +```bash +# Remove backup files (BE CAREFUL - VERIFY FIRST!) +find . -name "*.bak" -delete +find . -name "*.old" -delete +find . -name "*~" -delete + +# Remove temporary files +find . -name "*.tmp" -delete + +# Remove empty directories +find . -type d -empty -delete +``` + +### Safe Cleanup Workflow + +1. **Identify**: Run find commands to locate clutter +2. **Review**: Manually check each file/directory +3. **Verify**: Ensure files are truly unnecessary +4. **Remove**: Delete confirmed clutter +5. **Test**: Ensure nothing breaks +6. **Commit**: Commit cleanup with clear message + +## πŸ“Š Repository Health Checks + +### Regular Checks + +```bash +# Check repository size +du -sh . + +# Check .git size (shouldn't grow too large) +du -sh .git + +# List largest files +find . -type f -exec du -h {} + | sort -rh | head -20 + +# Check for uncommitted files +git status + +# Check for untracked files that should be ignored +git status --ignored +``` + +### Quality Checks + +- Are all files in appropriate directories? +- Is documentation up to date? +- Are there any TODO comments that should be issues? +- Are test files properly organized? +- Is .gitignore comprehensive? + +## 🚫 What to Remove + +### Always Remove +- Backup files (*.bak, *.old, *.backup) +- Temporary files (*.tmp, temp/) +- IDE configuration (.vscode/, .idea/) +- Build artifacts (dist/, build/) +- Dependency directories (node_modules/, venv/) +- Log files (*.log) +- OS-generated files (.DS_Store, Thumbs.db) + +### Consider Removing +- Commented-out code blocks +- Unused functions/classes +- Outdated documentation +- Debug print statements +- Duplicate code + +### Never Remove +- Active source code +- Current tests +- Current documentation +- Configuration files in use +- Git history + +## πŸ”„ Handling Contributions + +### Review Checklist for PRs + +1. **File Check** + - [ ] No backup files + - [ ] No temporary files + - [ ] No IDE configs + - [ ] All files in proper locations + +2. **Code Check** + - [ ] No commented-out code + - [ ] No debug statements + - [ ] No unused imports + - [ ] Clean commit history + +3. **Documentation Check** + - [ ] README updated if needed + - [ ] New features documented + - [ ] Examples provided if applicable + +4. **Test Check** + - [ ] Tests included for new features + - [ ] All tests pass + - [ ] Test files properly organized + +### Rejecting PRs + +Reject PRs that: +- Contain backup/temporary files +- Have unclear purpose +- Lack documentation +- Don't follow contribution guidelines +- Break existing functionality + +Provide clear feedback on what needs to be fixed. + +## πŸ“ Directory Structure + +Maintain this structure: + +``` +ProSe/ +β”œβ”€β”€ .github/ # GitHub templates and workflows +β”‚ β”œβ”€β”€ ISSUE_TEMPLATE/ +β”‚ └── PULL_REQUEST_TEMPLATE/ +β”œβ”€β”€ docs/ # Documentation +β”œβ”€β”€ examples/ # Usage examples +β”œβ”€β”€ scripts/ # Maintenance and utility scripts +β”œβ”€β”€ src/ # Source code +β”œβ”€β”€ tests/ # Test files +β”œβ”€β”€ .gitignore # Ignore patterns +β”œβ”€β”€ CODE_OF_CONDUCT.md # Repository standards +β”œβ”€β”€ CONTRIBUTING.md # Contribution guidelines +└── README.md # Project overview +``` + +## πŸ› οΈ Maintenance Scripts + +### Cleanup Script (scripts/cleanup.sh) + +See `scripts/cleanup.sh` for automated cleanup procedures. + +### Audit Script (scripts/audit.sh) + +See `scripts/audit.sh` for repository health checks. + +## πŸ“ž Emergency Procedures + +### If Sensitive Data is Committed + +1. **Don't panic** +2. **Revoke the exposed credentials immediately** +3. **Remove from git history** (requires force push) +4. **Notify affected parties** +5. **Update security practices** + +### If Repository Becomes Cluttered + +1. **Create cleanup branch** +2. **Run audit scripts** +3. **Systematically remove clutter** +4. **Test thoroughly** +5. **Create cleanup PR** +6. **Document what was removed** + +## πŸ“š Resources + +- [GitHub Best Practices](https://docs.github.com/en/repositories/creating-and-managing-repositories/best-practices-for-repositories) +- [Git Ignore Patterns](https://git-scm.com/docs/gitignore) +- [Clean Code Principles](https://www.amazon.com/Clean-Code-Handbook-Software-Craftsmanship/dp/0132350882) + +## πŸŽ“ Training + +New maintainers should: +1. Read this guide thoroughly +2. Review CONTRIBUTING.md and CODE_OF_CONDUCT.md +3. Practice with the audit scripts +4. Shadow experienced maintainers +5. Start with small cleanup tasks + +--- + +**Remember**: Prevention is better than cure. Maintain cleanliness from the start! diff --git a/docs/NMP_ARCHITECTURE.md b/docs/NMP_ARCHITECTURE.md new file mode 100644 index 0000000..c7259fc --- /dev/null +++ b/docs/NMP_ARCHITECTURE.md @@ -0,0 +1,67 @@ +# New Modular Program (NMP) Architecture Blueprint + +## 1. Executive Summary +The New Modular Program (NMP) transforms the monolithic ProSe legal tool into a decoupled, service-oriented architecture. This design separates the "Legal Truth" (Data Core) from the "File Operations" (Utility Workers), allowing for independent scaling and safer data handling. + +## 2. Integration Blueprint + +### Conceptual Architecture +The NMP adopts a microservices-inspired layered architecture: + +```mermaid +graph TD + User[Client / Case Manager] --> API[API Gateway / Orchestrator] + + subgraph "Core Domain (High Integrity)" + API --> EC[Evidence Core Service] + EC --> V[Validator Module] + EC --> I[Index Manager] + I --> DB[(Structured Data\nJSON/CSV)] + end + + subgraph "Utility Domain (Stateless Workers)" + API --> FP[File Processor Service] + FP --> PDF[PDF Safety & OCR] + FP --> FO[File Organizer] + end + + subgraph "External/Donor Integration" + FO -.-> D_PDF[Donor: PDF Scripts] + FO -.-> D_Ren[Donor: Rename Logic] + end +``` + +### Module Roles +1. **Evidence Core Service (ECS):** + * **Role:** The "Single Source of Truth". Manages the `evidence_index.json`, enforces data integrity via `EvidenceValidator`, and handles CRUD operations for evidence items. + * **Key Characteristic:** High reliability, strict schema validation. +2. **File Processor Service (FPS):** + * **Role:** The "Laborer". Handles raw file operations (PDF merging, sanitization, hashing, OCR). + * **Key Characteristic:** Stateless, computationally intensive, isolatable (can crash without corrupting data). +3. **API Gateway / Orchestrator:** + * **Role:** Interfaces with the User or AI Agents. Routes requests to ECS or FPS. + +## 3. Component Inventory (Good Pieces of Code - GPC) +*Retained from Repo A (ProSe Core)* + +| Component ID | Original Source | NMP Role | Justification | +| :--- | :--- | :--- | :--- | +| **Evidence Validator** | `engine/agents/evidence_validator.py` | `ECS.Validator` | Critical logic for cross-referencing CSV/JSON/Timeline data. Ensures data consistency. | +| **Evidence Exporter** | `engine/agents/evidence_validator.py` | `ECS.Exporter` | Generates the canonical `evidence_index.json`. Essential for downstream consumption. | +| **Evidence Schema** | `case/evidence_index.json` (structure) | `ECS.Schema` | Defines the data contract for the entire system. Proven, robust data model. | +| **Batch Core** | `engine/core/batch_enhanced.py` (Memory) | `Orchestrator` | Logic for batch processing commands (if verified as robust). | + +## 4. Donor List (Donor Pieces of Code - DPC) +*Harvested from Repo B (File Organizer Tools)* + +| Component ID | Original Source | NMP Role | Cleanup Action Required | +| :--- | :--- | :--- | :--- | +| **PDF Safety Check** | `file_organizer` (PDF scripts) | `FPS.Safety` | Isolate dependency on specific PDF libs. Abstract into `IPdfSanitizer` interface. | +| **File Renamer** | `file_organizer` (Rename logic) | `FPS.Organizer` | Remove hardcoded paths. Parameterize naming conventions. | +| **Intake Processor** | `file_organizer` (Intake scripts) | `FPS.Ingest` | Decouple from specific folder structures. Make input/output paths configurable. | +| **Voice Code Coach** | `voice_code_coach` (Port 5002) | `Standalone Tool` | Keep as a separate dev-tool service. Do not integrate into Core. | + +## 5. Implementation Plan +1. **Refactor ECS:** Extract `evidence_validator.py` into a Python package `prose.core.evidence` with clear public APIs. +2. **Wrapper for FPS:** Create a wrapper around `file_organizer` scripts to expose them as callable functions/API endpoints, removing CLI dependencies. +3. **Dockerize:** Containerize ECS and FPS separately to enforce decoupling. diff --git a/docs/PSFO_Agent_Report.md b/docs/PSFO_Agent_Report.md new file mode 100644 index 0000000..6f30381 --- /dev/null +++ b/docs/PSFO_Agent_Report.md @@ -0,0 +1,76 @@ +# ProSe "Clean Hub" Report for PSFO Agent + +**Date:** November 2025 +**To:** PSFO Agent (Main Orchestrator) +**From:** ProSe Repository Initializer +**Subject:** Status of Main "Clean Hub" Repository + +--- + +## πŸš€ Executive Summary + +The main ProSe repository has been successfully initialized as a **"Clean Hub"**. This repository is designed to be the stable, production-ready destination for code developed in the donor repositories (`ProSe_Agent2` and `ProSe-File-Organizer`). + +**Current Status:** 🟒 **READY FOR INTEGRATION** + +The infrastructure is in place to receive code, but **no donor code has been merged yet**. The repository is currently a clean vessel waiting for the "good parts" to be poured in. + +--- + +## πŸ—οΈ Repository Philosophy + +We have established a strict separation of concerns: + +1. **The Donors (The Workshop):** + * `ProSe_Agent2`: Experimental engine code, agent logic. + * `ProSe-File-Organizer` (PSFO): File handling scripts, organization tools. + * *Status:* These contain the "messy reality" β€” backups, temp files, and rapid iterations. + +2. **ProSe Main (The Cathedral):** + * **Strictly Clean:** Zero tolerance for `.bak`, `.tmp`, or dead code. + * **Privacy First:** **ABSOLUTELY NO LIVE CASE DATA.** + * **Structured:** Code is reorganized into a logical hierarchy (`engine/`, `file_organizer/`, `case/`), not just dumped in the root. + +--- + +## πŸ“‚ Target Structure + +We have defined the target structure for the integration. Code from donors should be mapped as follows: + +| Source (Donor) | Destination (ProSe Main) | Description | +| :--- | :--- | :--- | +| `ProSe_Agent2/` | `engine/` | Core orchestration, agents, and logic. | +| `ProSe-File-Organizer/` | `file_organizer/` | File processing, PDF tools, organization scripts. | +| *(Local User Data)* | `case/` | **GITIGNORED.** Local-only folders for input/output. | + +--- + +## πŸ›‘οΈ Safety & Privacy Protocols + +To ensure this repository remains safe for public/team usage: + +1. **Gitignore Firewall:** + * `_INBOX/`, `out/`, and `my_real_case/` are explicitly ignored. + * This prevents accidental commitment of sensitive affidavits, financial records, or evidence. + +2. **Cleanliness Enforcers:** + * `scripts/cleanup.sh`: Interactive tool to scrub junk before committing. + * `scripts/audit.sh`: CI/CD check that fails if backup files are detected. + * `CONTRIBUTING.md`: Updated with explicit "No Live Data" warnings. + +--- + +## πŸ“‹ Next Steps for PSFO Agent + +The repository is ready. The immediate next task is the **Migration Phase**: + +1. **Select:** Identify the specific, working modules from PSFO and Agent2. +2. **Clean:** Run them through a cleanup pass (remove hardcoded paths, strip personal data). +3. **Import:** Copy them into `engine/` or `file_organizer/` in this repo. +4. **Verify:** Run the smoke tests to ensure the "transplant" was successful. + +**Recommendation:** Use **Method 3 (Manual Copy)** from `docs/INTEGRATION.md` for the first migration. This allows you to leave the accumulated "cruft" behind and only bring the best code forward. + +--- + +*End of Report* diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..d395996 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,38 @@ +# Documentation + +This directory contains all ProSe documentation. + +## Current Documentation + +- **[INTEGRATION.md](INTEGRATION.md)** - Guide for integrating code from donor repositories +- **[MAINTENANCE.md](MAINTENANCE.md)** - Repository maintenance procedures and schedules + +## Organization + +Additional documentation should be organized by topic: + +``` +docs/ +β”œβ”€β”€ integrations/ # Integration notes for each donor repo +β”œβ”€β”€ api/ # API documentation +β”œβ”€β”€ guides/ # User guides +└── development/ # Development documentation +``` + +## Creating Integration Notes + +When integrating from a donor repository, create a file in `integrations/`: + +``` +docs/integrations/donor-repo-name-YYYY-MM-DD.md +``` + +See [INTEGRATION.md](INTEGRATION.md) for the template. + +## Guidelines + +- Keep documentation up to date +- Use clear, concise language +- Include examples where helpful +- No outdated documentation +- Remove deprecated docs when features are removed diff --git a/docs/STRATEGIC_EXPANSION.md b/docs/STRATEGIC_EXPANSION.md new file mode 100644 index 0000000..1da076a --- /dev/null +++ b/docs/STRATEGIC_EXPANSION.md @@ -0,0 +1,58 @@ +# Strategic Expansion & Optimization Roadmap + +## Part 1: Strategic Expansion Proposal (The "Good Hard Look") + +The core technology of ProSe (structured evidence mapping, cross-referencing, and document assembly) has applications far beyond family law. + +### Opportunity 1: Academic Research & Dissertation Management +* **Concept:** Adapting the "Evidence Index" into a "Citation Matrix". Researchers struggle to map hundreds of primary sources (PDFs) to specific arguments (Timeline/Stickies) and draft text. +* **Why:** High value, high complexity, similar workflow to litigation (Fact -> Evidence -> Argument). +* **Architectural Tweak:** Rename `Evidence_ID` to `Source_ID`. Add support for BibTeX/Zotero import in the `File Processor Service`. +* **Partners:** University Libraries, Research Software providers (e.g., creators of Zotero/Mendeley). + +### Opportunity 2: Small Business Regulatory Compliance (OSHA/ISO) +* **Concept:** Automating the "Audit Trail". Businesses must prove compliance (Evidence) against specific regulations (Timeline/Requirements) across dates. +* **Why:** Mandatory market, recurring revenue. The `Validator` logic is perfect for ensuring "no missing proof" for a required audit point. +* **Architectural Tweak:** Add a `Regulation_Standard` layer to the schema (replacing "Case Law"). Pre-load standard compliance templates. +* **Partners:** Industry-specific associations (e.g., Construction Safety Councils), ISO Consultants. + +### Opportunity 3: Construction Claim Dispute Resolution +* **Concept:** Managing "Change Orders" and "Delay Claims". Construction disputes rely heavily on timelines and photo evidence (site photos) mapped to specific dates and contract clauses. +* **Why:** High dollar value disputes. The `timeline.csv` structure is exactly what forensic schedulers use. +* **Architectural Tweak:** Enhance `timeline.csv` to support "Planned vs. Actual" date columns. Optimize image handling in `File Processor`. +* **Partners:** Construction Project Management Software (Procore, Autodesk), Dispute Resolution Boards. + +--- + +## Part 2: Optimization Roadmap (Operational Excellence) + +To achieve "best-in-class" performance and mitigate risk, the following technological investments are recommended. + +### 1. Automated Document Classification (AI/ML) +* **Current State:** Manual sorting or filename-based rules. +* **Upgrade:** Implement a local NLP model (BERT-based or similar) within the `File Processor Service` to auto-tag documents (e.g., "Bank Statement", "School Report", "Police Log") upon ingestion. +* **Impact:** Drastically reduces user setup time. Increases accuracy of the "Evidence Index". +* **Risk Mitigation:** Runs locally (privacy-preserving). Reduces human error in mislabeling sensitive files. + +### 2. Immutable Audit Logs (Blockchain/Merkle Tree) +* **Current State:** File hashes are stored, but the `evidence_index.json` is mutable. +* **Upgrade:** Implement a Merkle Tree structure for the `Evidence Core`. Every change to the timeline or evidence set generates a unique root hash. Periodically anchor this hash to a public ledger or a digital notary service. +* **Impact:** "Tamper-proof" evidence chains. Essential for court admissibility and trust in high-conflict cases. +* **Risk Mitigation:** Proves that evidence was not altered after a certain date. + +### 3. PII Redaction Pipeline (Computer Vision) +* **Current State:** Manual redaction or reliance on basic PDF tools. +* **Upgrade:** Integrate a Computer Vision module into the `File Processor Service` to automatically detect and suggest redactions for SSNs, phone numbers, and minors' faces in photos. +* **Impact:** Protects user privacy. Speeds up discovery production. +* **Risk Mitigation:** Significantly lowers the risk of accidental data leaks (Unauthorized Practice of Law / Privacy violations). + +--- + +## Risk Analysis: Expanding Pro Se Services + +* **Unauthorized Practice of Law (UPL):** + * *Risk:* The system might be seen as giving "legal advice" rather than "legal information". + * *Mitigation:* Strict hard-coded disclaimers. The system must frame all outputs as "Drafts for Review". The `Validator` only checks *consistency*, not *legal strategy*. +* **Data Privacy:** + * *Risk:* Storing sensitive user data (financials, custody photos). + * *Mitigation:* The "Clean Hub" architecture (current design) is excellent. Ensure `File Processor` remains stateless and cleans up temp files immediately. Zero-knowledge encryption for any cloud sync. diff --git a/docs/Strategic_Analysis.md b/docs/Strategic_Analysis.md new file mode 100644 index 0000000..a97be0d --- /dev/null +++ b/docs/Strategic_Analysis.md @@ -0,0 +1,68 @@ +# Strategic Analysis & Future Roadmap + +**Date:** November 2025 +**Context:** Reflection on the ProSe architecture, clean hub transition, and Evidence Validator results. + +--- + +## πŸ”₯ Strategic & Practical Assessment + +### 1️⃣ The Foundation is Solid +The system isn't just a "summary" tool anymore; it's a professional-grade case system with a clean data model. +- **Key Assets:** Stable IDs, category namespaces, priority levels, multi-table indexing, and cross-validation. +- **Value:** This is leverage. It's the kind of system courts pay experts to build. + +### 2️⃣ The "Boss" (Case Manager) is Operational +With the new `CONTRIBUTING.md`, strict `engine/` structure, and repo cleanup: +- We have conditions for **clean ingestion**, **predictable output**, and **safe separation** of personal data. +- The "Boss" isn't defined by authority, but by **stability**. + +### 3️⃣ The "Truth Table" (Validator Output) +The validator output serves as the **ground truth** for the AI agents. +- It maps IDs across CSVs, stickies, and timelines. +- It flags non-referenced IDs. +- **Impact:** This allows the Case Manager to move from "helpful assistant" to "operational engine" for generating affidavits, motions, and contradiction detection. + +### 4️⃣ Modular & Scalable +The evidence structure is plug-and-play: +- New CSV rows don't break the system. +- IDs can scale from 9 to 900. +- IDs can be converted to exhibits without renaming files. + +### 5️⃣ Risk Management: Preventing Fragmentation +The biggest risk has been fragmentation (multiple versions, scattered backups). +- **Solution:** The "Clean Hub" repository prevents drift. +- **Mechanism:** The Evidence Validator prevents chaos. +- **Result:** Everything is finally lining up. + +### 6️⃣ Next Evolution: Single Source of Truth +The goal is a single JSON file: **`case/evidence_index.json`**. +- It becomes the input for the Case Manager. +- It acts as the "truth table" for all generated docs. +- It provides the safety net against human error. + +### 7️⃣ Elevating the Case Manager +The Case Manager GPT evolves from a chat assistant to: +- **Case Architect** +- **Evidence Auditor** +- **Factual Consistency Engine** +- **Motion Generator** + +### 8️⃣ Conclusion +We are building a **legal knowledge engine** backed by validated structured data, not just a file organizer. + +--- + +## 🧭 Recommended Next Move + +**Immediate Action:** +Convert the Evidence Validator output into **`case/evidence_index.json`** in the ProSe repo. + +**Future Capabilities to Build:** +1. The Parser +2. The Summarizer +3. The Affidavit Generator +4. The Motion Builder +5. The Timeline Compiler + +*This structure allows the Case Manager to stop being "helpful" and start being in charge.* diff --git a/docs/Summary_Generator_Plan.md b/docs/Summary_Generator_Plan.md new file mode 100644 index 0000000..10f6cf6 --- /dev/null +++ b/docs/Summary_Generator_Plan.md @@ -0,0 +1,5 @@ +# Summary Generator Plan + +Defines a clean folder structure and three-tier rollout (manual β†’ semi-auto β†’ automated brief builder). +Input/Output model is already compatible with your current Git-synced pipeline. +Integration point: attach to your out/ folder and use generated timeline.csv as a structured data feed. diff --git a/docs/System_Architecture_Report.md b/docs/System_Architecture_Report.md new file mode 100644 index 0000000..c4971f0 --- /dev/null +++ b/docs/System_Architecture_Report.md @@ -0,0 +1,4 @@ +# System Architecture Report + +Confirms the autonomous agent framework foundation (Python multiprocess, GPT summarization, REST control layer). +You’re already halfway to a full self-healing litigation assistant. The watchdog can supervise prose_core agents and summary generation tasks in one loop. diff --git a/docs/integrations/README.md b/docs/integrations/README.md new file mode 100644 index 0000000..f733926 --- /dev/null +++ b/docs/integrations/README.md @@ -0,0 +1,35 @@ +# Integration Notes + +This directory contains notes about code integrated from donor repositories. + +## Purpose + +Each integration should be documented with: +- What was integrated +- When it was integrated +- Where it came from +- What changes were made +- What was NOT integrated (and why) + +## Naming Convention + +Files should be named: +``` +[donor-repo-name]-[YYYY-MM-DD].md +``` + +For example: +- `donor-repo-1-2025-11-21.md` +- `legacy-system-2025-12-01.md` + +## Template + +See [../INTEGRATION.md](../INTEGRATION.md) for the integration documentation template. + +## Benefits + +Documenting integrations helps: +- Track where code came from +- Understand what was cleaned during integration +- Reference for future updates +- Audit trail for maintainability diff --git a/engine/agents/evidence_validator.py b/engine/agents/evidence_validator.py new file mode 100644 index 0000000..f57e77e --- /dev/null +++ b/engine/agents/evidence_validator.py @@ -0,0 +1,439 @@ +""" +Evidence validator and exporter for ProSe. + +Cross-checks: +- Custody_Mod_Evidence.csv (master evidence list) +- sticky_index.json (sticky notes referencing evidence_ids) +- timeline.csv (timeline events with Evidence_IDs field) + +and reports: +- unknown IDs in stickies/timeline +- unused evidence IDs in the CSV + +It can also export a canonical evidence_index.json that conforms +to case/evidence_index.schema.json. +""" + +from __future__ import annotations + +import csv +import json +import datetime +from dataclasses import dataclass, asdict +from pathlib import Path +from typing import Any, Dict, List, Set + +# Filenames expected inside a case directory +CUSTODY_CSV_NAME = "Custody_Mod_Evidence.csv" +STICKY_JSON_NAME = "sticky_index.json" +TIMELINE_CSV_NAME = "timeline.csv" + + +@dataclass +class EvidenceValidationResult: + """ + Simple container for validation summary data. + """ + evidence_count: int + sticky_count: int + timeline_count: int + unknown_in_stickies: List[str] + unknown_in_timeline: List[str] + unused_evidence: List[str] + status: str # "OK" or "WARN" + + +def _load_evidence_ids(base: Path) -> Set[str]: + """ + Load all Evidence_ID values from the Custody_Mod_Evidence.csv. + """ + path = base / CUSTODY_CSV_NAME + ids: Set[str] = set() + + if not path.exists(): + print(f"[evidence_validator] Warning: {path} not found") + return ids + + with path.open(newline="", encoding="utf-8") as f: + reader = csv.DictReader(f) + for row in reader: + eid = (row.get("Evidence_ID") or "").strip() + if eid: + ids.add(eid) + return ids + + +def _load_sticky_ids(base: Path) -> Set[str]: + """ + Load Evidence_IDs referenced in sticky_index.json. + Expects a list of objects with 'evidence_ids': [...] + """ + path = base / STICKY_JSON_NAME + ids: Set[str] = set() + + if not path.exists(): + return ids + + data = json.loads(path.read_text(encoding="utf-8")) + for sticky in data: + for eid in sticky.get("evidence_ids", []): + eid_clean = (eid or "").strip() + if eid_clean: + ids.add(eid_clean) + return ids + + +def _load_timeline_ids(base: Path) -> Set[str]: + """ + Load Evidence_IDs referenced in timeline.csv. + Expects 'Evidence_IDs' column with ';'-separated IDs. + """ + path = base / TIMELINE_CSV_NAME + ids: Set[str] = set() + + if not path.exists(): + return ids + + with path.open(newline="", encoding="utf-8") as f: + reader = csv.DictReader(f) + for row in reader: + field = row.get("Evidence_IDs") or "" + for eid in [x.strip() for x in field.split(";") if x.strip()]: + ids.add(eid) + return ids + + +def validate_case(base: Path) -> Dict[str, Any]: + """ + Validate evidence links for a given case directory. + + :param base: Path to directory containing: + - Custody_Mod_Evidence.csv + - sticky_index.json + - timeline.csv + :return: dict suitable for JSON or further processing. + """ + base = Path(base) + + evidence_ids = _load_evidence_ids(base) + sticky_ids = _load_sticky_ids(base) + timeline_ids = _load_timeline_ids(base) + + unknown_in_stickies = sticky_ids - evidence_ids + unknown_in_timeline = timeline_ids - evidence_ids + unused_evidence = evidence_ids - sticky_ids - timeline_ids + + status = "OK" + if unknown_in_stickies or unknown_in_timeline: + status = "WARN" + + result = EvidenceValidationResult( + evidence_count=len(evidence_ids), + sticky_count=len(sticky_ids), + timeline_count=len(timeline_ids), + unknown_in_stickies=sorted(unknown_in_stickies), + unknown_in_timeline=sorted(unknown_in_timeline), + unused_evidence=sorted(unused_evidence), + status=status, + ) + return asdict(result) + + +def print_report(result: Dict[str, Any]) -> None: + """ + Pretty-print validation results to the console. + """ + print(f"Evidence IDs in CSV: {result['evidence_count']}") + print(f"Referenced in stickies: {result['sticky_count']}") + print(f"Referenced in timeline: {result['timeline_count']}") + + unknown_in_stickies = result["unknown_in_stickies"] + unknown_in_timeline = result["unknown_in_timeline"] + unused_evidence = result["unused_evidence"] + + if unknown_in_stickies: + print("\n⚠ Unknown Evidence_IDs in stickies (not in CSV):") + for eid in unknown_in_stickies: + print(f" - {eid}") + + if unknown_in_timeline: + print("\n⚠ Unknown Evidence_IDs in timeline (not in CSV):") + for eid in unknown_in_timeline: + print(f" - {eid}") + + if unused_evidence: + print("\nβ„Ή Evidence_IDs in CSV not referenced yet (fine, but FYI):") + for eid in unused_evidence: + print(f" - {eid}") + + if not (unknown_in_stickies or unknown_in_timeline): + print("\nβœ… Links look consistent. Nice work.") + else: + print("\n⚠ Validation completed with warnings. Review above items.") + + +# ========= EXPORTER HELPERS ========= + + +def _load_evidence_rows(base: Path) -> List[Dict[str, Any]]: + """ + Load full evidence rows from Custody_Mod_Evidence.csv. + + Expected columns (extra columns are ignored): + Evidence_ID, Title, Category, Priority, Description, Tags + """ + path = base / CUSTODY_CSV_NAME + rows: List[Dict[str, Any]] = [] + + if not path.exists(): + print(f"[evidence_exporter] Warning: {path} not found") + return rows + + with path.open(newline="", encoding="utf-8") as f: + reader = csv.DictReader(f) + for row in reader: + rows.append(row) + return rows + + +def _load_stickies_raw(base: Path) -> List[Dict[str, Any]]: + """ + Load sticky_index.json as-is. + + Expected shape: list of objects with at least: + - evidence_ids: [ ... ] OR evidence_id: "..." + - date, note, theme, priority (optional) + """ + path = base / STICKY_JSON_NAME + if not path.exists(): + return [] + try: + data = json.loads(path.read_text(encoding="utf-8")) + if isinstance(data, list): + return data + print("[evidence_exporter] Warning: sticky_index.json is not a list") + return [] + except json.JSONDecodeError as e: + print(f"[evidence_exporter] Error decoding {path}: {e}") + return [] + + +def _load_timeline_rows(base: Path) -> List[Dict[str, Any]]: + """ + Load full timeline rows from timeline.csv. + + Expected columns: + Date, Label, Category, Priority, Details, Evidence_IDs, Source, (optional ID) + """ + path = base / TIMELINE_CSV_NAME + rows: List[Dict[str, Any]] = [] + + if not path.exists(): + return rows + + with path.open(newline="", encoding="utf-8") as f: + reader = csv.DictReader(f) + for row in reader: + rows.append(row) + return rows + + +def build_evidence_index(base: Path, case_id: str | None = None) -> Dict[str, Any]: + """ + Build a full evidence_index.json-style dict from: + - Custody_Mod_Evidence.csv + - sticky_index.json + - timeline.csv + + This does NOT write to disk; use write_evidence_index() for that. + """ + base = Path(base) + + # 1. Load raw sources + evidence_rows = _load_evidence_rows(base) + sticky_rows = _load_stickies_raw(base) + timeline_rows = _load_timeline_rows(base) + + # 2. Seed evidence dict from CSV + evidence_map: Dict[str, Dict[str, Any]] = {} + referenced_in_stickies: Set[str] = set() + referenced_in_timeline: Set[str] = set() + + for row in evidence_rows: + eid = (row.get("Evidence_ID") or "").strip() + if not eid: + continue + + title = (row.get("Title") or row.get("Short_Title") or "").strip() + category = (row.get("Category") or "").strip().lower() or "uncategorized" + + # Priority: best-effort int conversion + priority_raw = (row.get("Priority") or "").strip() + try: + priority = int(priority_raw) if priority_raw else None + except ValueError: + priority = None + + description = (row.get("Description") or "").strip() + + tags_raw = (row.get("Tags") or "").strip() + tags = [t.strip() for t in tags_raw.split(",") if t.strip()] if tags_raw else [] + + evidence_map[eid] = { + "id": eid, + "title": title or eid, + "category": category, + "priority": priority, + "description": description, + "sources": { + "csv": True, + "stickies": False, + "timeline": False, + }, + "timeline_events": [], + "files": [], + "tags": tags, + } + + # 3. Build stickies section and update evidence sources + stickies_out: List[Dict[str, Any]] = [] + for sticky in sticky_rows: + # Normalize evidence_ids + evidence_ids_field = sticky.get("evidence_ids") + if not evidence_ids_field: + single = sticky.get("evidence_id") + if single: + evidence_ids = [single] + else: + evidence_ids = [] + else: + evidence_ids = evidence_ids_field + + normalized_eids: List[str] = [] + for eid in evidence_ids: + eid_clean = (eid or "").strip() + if not eid_clean: + continue + normalized_eids.append(eid_clean) + referenced_in_stickies.add(eid_clean) + if eid_clean in evidence_map: + evidence_map[eid_clean]["sources"]["stickies"] = True + + entry = { + "id": sticky.get("id") or "", + "evidence_id": normalized_eids[0] if len(normalized_eids) == 1 else None, + "evidence_ids": normalized_eids, + "date": sticky.get("date") or "", + "note": sticky.get("note") or "", + "theme": sticky.get("theme") or "", + "priority": sticky.get("priority"), + } + stickies_out.append(entry) + + # 4. Build timeline section and update evidence timeline_events + timeline_out: List[Dict[str, Any]] = [] + for row in timeline_rows: + date = (row.get("Date") or "").strip() + label = (row.get("Label") or "").strip() + category = (row.get("Category") or "").strip().lower() or "" + details = (row.get("Details") or "").strip() + priority_raw = (row.get("Priority") or "").strip() + try: + priority = int(priority_raw) if priority_raw else None + except ValueError: + priority = None + source = (row.get("Source") or "").strip() + + evidence_ids_field = row.get("Evidence_IDs") or "" + evidence_ids = [x.strip() for x in evidence_ids_field.split(";") if x.strip()] + + evt_id = row.get("ID") or f"EVT-{date}-{label.replace(' ', '-')}".strip() + + for eid in evidence_ids: + referenced_in_timeline.add(eid) + if eid in evidence_map: + evidence_map[eid]["sources"]["timeline"] = True + evidence_map[eid]["timeline_events"].append( + { + "date": date, + "label": label, + "note": details, + "source": source or "timeline", + } + ) + + timeline_out.append( + { + "id": evt_id, + "date": date, + "label": label, + "category": category, + "priority": priority, + "details": details, + "evidence_ids": evidence_ids, + "source": source or "timeline", + } + ) + + # 5. Unreferenced IDs (from CSV but not used anywhere) + unreferenced_ids: List[Dict[str, str]] = [] + for eid in sorted(evidence_map.keys()): + if eid not in referenced_in_stickies and eid not in referenced_in_timeline: + unreferenced_ids.append( + { + "id": eid, + "reason": "Defined in CSV but not referenced in stickies or timeline.", + } + ) + + # 6. Assemble final index dict + if case_id is None: + # Default: use directory name as case_id + case_id = base.name + + index: Dict[str, Any] = { + "case_id": case_id, + "generated_at": datetime.datetime.utcnow().isoformat() + "Z", + "notes": "", + "evidence": list(evidence_map.values()), + "stickies": stickies_out, + "timeline": timeline_out, + "unreferenced_ids": unreferenced_ids, + } + return index + + +def write_evidence_index(base: Path, output_path: Path | None = None, case_id: str | None = None) -> Path: + """ + Build and write evidence_index.json for the given case directory. + + - base: case directory containing the CSV/JSON/Timeline inputs + - output_path: where to write the evidence_index.json file + defaults to base / "evidence_index.json" + """ + base = Path(base) + if output_path is None: + output_path = base / "evidence_index.json" + + index = build_evidence_index(base, case_id=case_id) + output_path.write_text(json.dumps(index, indent=2), encoding="utf-8") + print(f"[evidence_exporter] Wrote evidence index to {output_path}") + return output_path + + +if __name__ == "__main__": + # CLI usage examples: + # python -m engine.agents.evidence_validator my_real_case + # python -m engine.agents.evidence_validator my_real_case --export + import sys + + if len(sys.argv) > 1: + base_dir = Path(sys.argv[1]) + else: + base_dir = Path("my_real_case") + + if "--export" in sys.argv: + write_evidence_index(base_dir) + else: + result_dict = validate_case(base_dir) + print_report(result_dict) diff --git a/engine/core/README.md b/engine/core/README.md new file mode 100644 index 0000000..cd81744 --- /dev/null +++ b/engine/core/README.md @@ -0,0 +1,35 @@ +# Engine Core + +This directory contains the core orchestration logic for ProSe (primarily from ProSe_Agent2). + +## Organization + +Code should be organized by function: + +``` +engine/ +β”œβ”€β”€ core/ # Main engine loop and orchestrator +β”œβ”€β”€ agents/ # Specific agent implementations +└── utils/ # Shared utilities +``` + +## Guidelines + +- Keep code organized by feature or source +- No backup files (*.bak, *.old) +- No temporary files (*.tmp) +- Remove commented-out code +- Document all public APIs +- Follow existing code style + +## Adding Code + +When adding code from donor repositories: + +1. Place core logic here or in `engine/agents/` +2. Clean the code before committing +3. Update imports and paths +4. Add tests in `tests/` +5. Update documentation + +See [../../docs/INTEGRATION.md](../../docs/INTEGRATION.md) for detailed integration guidelines. diff --git a/engine/core/batch_enhanced.py b/engine/core/batch_enhanced.py new file mode 100644 index 0000000..0525bd9 --- /dev/null +++ b/engine/core/batch_enhanced.py @@ -0,0 +1,6 @@ + +def main(): + print("Running batch enhanced...") + +if __name__ == "__main__": + main() diff --git a/engine/core/judge_timeline_pdf.py b/engine/core/judge_timeline_pdf.py new file mode 100644 index 0000000..4cbb19a --- /dev/null +++ b/engine/core/judge_timeline_pdf.py @@ -0,0 +1,6 @@ + +def main(): + print("Judging timeline PDF...") + +if __name__ == "__main__": + main() diff --git a/engine/core/prose_core/__init__.py b/engine/core/prose_core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/engine/core/prose_core/hashing.py b/engine/core/prose_core/hashing.py new file mode 100644 index 0000000..fd0a1bd --- /dev/null +++ b/engine/core/prose_core/hashing.py @@ -0,0 +1,3 @@ + +def hash_file(): + pass diff --git a/engine/core/prose_core/pdf_safety.py b/engine/core/prose_core/pdf_safety.py new file mode 100644 index 0000000..9648a34 --- /dev/null +++ b/engine/core/prose_core/pdf_safety.py @@ -0,0 +1,3 @@ + +def check_safety(): + pass diff --git a/engine/core/prose_core/timeline_writer.py b/engine/core/prose_core/timeline_writer.py new file mode 100644 index 0000000..544173b --- /dev/null +++ b/engine/core/prose_core/timeline_writer.py @@ -0,0 +1,3 @@ + +def write_timeline(): + pass diff --git a/engine/core/start_prose.py b/engine/core/start_prose.py new file mode 100644 index 0000000..d9bb215 --- /dev/null +++ b/engine/core/start_prose.py @@ -0,0 +1,6 @@ + +def main(): + print("Starting ProSe watcher...") + +if __name__ == "__main__": + main() diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 0000000..a5e77ba --- /dev/null +++ b/examples/README.md @@ -0,0 +1,33 @@ +# Examples + +This directory contains usage examples for ProSe. + +## Organization + +Examples should be organized by feature or use case: + +``` +examples/ +β”œβ”€β”€ basic/ # Basic usage examples +β”œβ”€β”€ advanced/ # Advanced usage examples +β”œβ”€β”€ donor1/ # Examples for donor1 functionality +└── donor2/ # Examples for donor2 functionality +``` + +## Guidelines + +- Keep examples simple and focused +- Include comments explaining what's happening +- Ensure examples actually work +- No backup or temporary files +- Clean up any generated files in examples + +## Adding Examples + +When adding examples: + +1. Create a descriptive directory name +2. Add a README explaining the example +3. Keep code clean and well-commented +4. Test that the example works +5. Document any dependencies or setup required diff --git a/file_organizer/README.md b/file_organizer/README.md new file mode 100644 index 0000000..f5853a6 --- /dev/null +++ b/file_organizer/README.md @@ -0,0 +1,11 @@ +# File Organizer Tools +This directory contains tools imported from the `ProSe-File-Organizer` (PSFO) repository. + +## Purpose +Scripts here handle: +- PDF processing and safety checks +- File renaming and organization +- Intake processing + +## Integration Note +When importing from PSFO, ensure you strip out any hardcoded paths or personal configurations before committing here. diff --git a/scripts/audit.sh b/scripts/audit.sh new file mode 100755 index 0000000..b8c867d --- /dev/null +++ b/scripts/audit.sh @@ -0,0 +1,167 @@ +#!/bin/bash + +# ProSe Repository Health Audit Script +# Performs comprehensive health checks on the repository + +set -e + +echo "πŸ” ProSe Repository Health Audit" +echo "=================================" +echo "" + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' + +print_header() { + echo -e "${BLUE}>>> $1${NC}" +} + +print_pass() { + echo -e "${GREEN}βœ“${NC} $1" +} + +print_warn() { + echo -e "${YELLOW}⚠${NC} $1" +} + +print_fail() { + echo -e "${RED}βœ—${NC} $1" +} + +ISSUES=0 + +# 1. Repository Size Check +print_header "Repository Size" +REPO_SIZE=$(du -sh . | cut -f1) +GIT_SIZE=$(du -sh .git 2>/dev/null | cut -f1 || echo "N/A") +echo " Total size: $REPO_SIZE" +echo " .git size: $GIT_SIZE" +echo "" + +# 2. File Organization Check +print_header "File Organization" + +# Check if directories exist +if [ -d "engine" ]; then + print_pass "engine/ directory exists" +else + print_warn "engine/ directory missing" + ((ISSUES++)) +fi + +if [ -d "file_organizer" ]; then + print_pass "file_organizer/ directory exists" +else + print_warn "file_organizer/ directory missing" + ((ISSUES++)) +fi + +if [ -d "tests" ]; then + print_pass "tests/ directory exists" +else + print_warn "tests/ directory missing" + ((ISSUES++)) +fi + +if [ -d "docs" ]; then + print_pass "docs/ directory exists" +else + print_warn "docs/ directory missing" + ((ISSUES++)) +fi + +echo "" + +# 3. Check for clutter +print_header "Clutter Detection" + +BACKUP_COUNT=$(find . -type f \( -name "*.bak" -o -name "*.old" -o -name "*.backup" -o -name "*~" \) ! -path "./.git/*" 2>/dev/null | wc -l) +if [ "$BACKUP_COUNT" -eq 0 ]; then + print_pass "No backup files found" +else + print_fail "Found $BACKUP_COUNT backup files" + ((ISSUES++)) +fi + +TEMP_COUNT=$(find . -type f \( -name "*.tmp" -o -name "*.temp" \) ! -path "./.git/*" 2>/dev/null | wc -l) +if [ "$TEMP_COUNT" -eq 0 ]; then + print_pass "No temporary files found" +else + print_fail "Found $TEMP_COUNT temporary files" + ((ISSUES++)) +fi + +OS_COUNT=$(find . -type f \( -name ".DS_Store" -o -name "Thumbs.db" \) ! -path "./.git/*" 2>/dev/null | wc -l) +if [ "$OS_COUNT" -eq 0 ]; then + print_pass "No OS-generated files found" +else + print_fail "Found $OS_COUNT OS-generated files" + ((ISSUES++)) +fi + +echo "" + +# 4. Essential Files Check +print_header "Essential Files" + +for file in "README.md" ".gitignore" "CONTRIBUTING.md" "CODE_OF_CONDUCT.md"; do + if [ -f "$file" ]; then + print_pass "$file exists" + else + print_warn "$file missing" + ((ISSUES++)) + fi +done + +echo "" + +# 5. Git Status +print_header "Git Status" +if git diff --quiet && git diff --cached --quiet; then + print_pass "Working directory clean" +else + print_warn "Uncommitted changes present" +fi + +UNTRACKED=$(git ls-files --others --exclude-standard | wc -l) +if [ "$UNTRACKED" -eq 0 ]; then + print_pass "No untracked files" +else + print_warn "$UNTRACKED untracked files" +fi + +echo "" + +# 6. Large Files +print_header "Large Files Check" +LARGE_COUNT=$(find . -type f -size +1M ! -path "./.git/*" 2>/dev/null | wc -l) +if [ "$LARGE_COUNT" -eq 0 ]; then + print_pass "No large files (>1MB) found" +else + print_warn "Found $LARGE_COUNT large files" + echo " Largest files:" + find . -type f -size +1M ! -path "./.git/*" -exec du -h {} \; 2>/dev/null | sort -rh | head -5 | sed 's/^/ /' +fi + +echo "" + +# Summary +echo "=================================" +if [ $ISSUES -eq 0 ]; then + echo -e "${GREEN}βœ… Repository Health: EXCELLENT${NC}" + echo "No issues found!" +else + echo -e "${YELLOW}⚠️ Repository Health: NEEDS ATTENTION${NC}" + echo "Found $ISSUES potential issues" + echo "" + echo "Run 'bash scripts/cleanup.sh' to address some issues automatically" +fi + +echo "" +echo "See docs/MAINTENANCE.md for more information" + +exit $ISSUES diff --git a/scripts/cleanup.sh b/scripts/cleanup.sh new file mode 100755 index 0000000..9df9e90 --- /dev/null +++ b/scripts/cleanup.sh @@ -0,0 +1,169 @@ +#!/bin/bash + +# ProSe Repository Cleanup Script +# This script helps identify and remove clutter from the repository + +set -e + +echo "🧹 ProSe Repository Cleanup Script" +echo "===================================" +echo "" + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +# Function to print colored output +print_info() { + echo -e "${GREEN}[INFO]${NC} $1" +} + +print_warning() { + echo -e "${YELLOW}[WARNING]${NC} $1" +} + +print_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +# Check if we're in a git repository +if [ ! -d ".git" ]; then + print_error "Not in a git repository root!" + exit 1 +fi + +echo "Starting cleanup audit..." +echo "" + +# 1. Find backup files +print_info "Checking for backup files..." +BACKUP_FILES=$(find . -type f \( -name "*.bak" -o -name "*.old" -o -name "*.backup" -o -name "*~" \) ! -path "./.git/*" 2>/dev/null || true) + +if [ -n "$BACKUP_FILES" ]; then + print_warning "Found backup files:" + echo "$BACKUP_FILES" + echo "" + read -p "Remove these backup files? (y/N): " -n 1 -r + echo + if [[ $REPLY =~ ^[Yy]$ ]]; then + find . -type f \( -name "*.bak" -o -name "*.old" -o -name "*.backup" -o -name "*~" \) ! -path "./.git/*" -delete + print_info "Backup files removed!" + fi +else + print_info "No backup files found βœ“" +fi +echo "" + +# 2. Find temporary files +print_info "Checking for temporary files..." +TEMP_FILES=$(find . -type f \( -name "*.tmp" -o -name "*.temp" \) ! -path "./.git/*" 2>/dev/null || true) + +if [ -n "$TEMP_FILES" ]; then + print_warning "Found temporary files:" + echo "$TEMP_FILES" + echo "" + read -p "Remove these temporary files? (y/N): " -n 1 -r + echo + if [[ $REPLY =~ ^[Yy]$ ]]; then + find . -type f \( -name "*.tmp" -o -name "*.temp" \) ! -path "./.git/*" -delete + print_info "Temporary files removed!" + fi +else + print_info "No temporary files found βœ“" +fi +echo "" + +# 3. Find backup directories +print_info "Checking for backup directories..." +BACKUP_DIRS=$(find . -type d \( -name "backup" -o -name "backups" -o -name "old" -o -name "_old" \) ! -path "./.git/*" 2>/dev/null || true) + +if [ -n "$BACKUP_DIRS" ]; then + print_warning "Found backup directories:" + echo "$BACKUP_DIRS" + echo "" + print_warning "Review these directories manually before removing!" +else + print_info "No backup directories found βœ“" +fi +echo "" + +# 4. Find IDE configuration files +print_info "Checking for IDE configuration files..." +IDE_CONFIGS=$(find . -type d \( -name ".vscode" -o -name ".idea" \) ! -path "./.git/*" 2>/dev/null || true) + +if [ -n "$IDE_CONFIGS" ]; then + print_warning "Found IDE configuration directories:" + echo "$IDE_CONFIGS" + echo "" + print_warning "These should be in .gitignore and not committed!" +else + print_info "No IDE configuration files found βœ“" +fi +echo "" + +# 5. Find empty directories +print_info "Checking for empty directories..." +EMPTY_DIRS=$(find . -type d -empty ! -path "./.git/*" 2>/dev/null || true) + +if [ -n "$EMPTY_DIRS" ]; then + print_warning "Found empty directories:" + echo "$EMPTY_DIRS" + echo "" + read -p "Remove empty directories? (y/N): " -n 1 -r + echo + if [[ $REPLY =~ ^[Yy]$ ]]; then + find . -type d -empty ! -path "./.git/*" -delete 2>/dev/null || true + print_info "Empty directories removed!" + fi +else + print_info "No empty directories found βœ“" +fi +echo "" + +# 6. Check for large files +print_info "Checking for large files (>1MB)..." +LARGE_FILES=$(find . -type f -size +1M ! -path "./.git/*" 2>/dev/null || true) + +if [ -n "$LARGE_FILES" ]; then + print_warning "Found large files:" + find . -type f -size +1M ! -path "./.git/*" -exec du -h {} \; 2>/dev/null | sort -rh + echo "" + print_warning "Review these files - they might be build artifacts or should be in .gitignore" +else + print_info "No large files found βœ“" +fi +echo "" + +# 7. Check for common clutter patterns +print_info "Checking for OS-generated files..." +OS_FILES=$(find . -type f \( -name ".DS_Store" -o -name "Thumbs.db" -o -name "desktop.ini" \) ! -path "./.git/*" 2>/dev/null || true) + +if [ -n "$OS_FILES" ]; then + print_warning "Found OS-generated files:" + echo "$OS_FILES" + echo "" + read -p "Remove these OS files? (y/N): " -n 1 -r + echo + if [[ $REPLY =~ ^[Yy]$ ]]; then + find . -type f \( -name ".DS_Store" -o -name "Thumbs.db" -o -name "desktop.ini" \) ! -path "./.git/*" -delete + print_info "OS-generated files removed!" + fi +else + print_info "No OS-generated files found βœ“" +fi +echo "" + +# Summary +echo "===================================" +echo "βœ… Cleanup audit complete!" +echo "" +print_info "Next steps:" +echo " 1. Review any warnings above" +echo " 2. Manually check flagged items" +echo " 3. Update .gitignore if needed" +echo " 4. Run 'git status' to see changes" +echo " 5. Commit cleanup if appropriate" +echo "" +print_info "For more information, see docs/MAINTENANCE.md" diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 0000000..a1c3094 --- /dev/null +++ b/tests/README.md @@ -0,0 +1,44 @@ +# Tests + +This directory contains all test files for ProSe. + +## Organization + +Tests should be organized to mirror the source code structure: + +``` +tests/ +β”œβ”€β”€ donor1/ # Tests for code from donor1 +β”œβ”€β”€ donor2/ # Tests for code from donor2 +β”œβ”€β”€ core/ # Tests for core functionality +└── integration/ # Integration tests +``` + +## Guidelines + +- Test file names should match source files (e.g., `test_module.py` for `module.py`) +- Include both unit tests and integration tests +- Aim for high test coverage +- Keep tests clean and well-documented +- No debug print statements in committed tests + +## Running Tests + +[Add instructions for running tests once test framework is set up] + +```bash +# Example (adjust based on your test framework) +# python -m pytest tests/ +# npm test +# go test ./... +``` + +## Adding Tests + +When integrating code from donor repositories: + +1. Bring over relevant tests +2. Update import paths +3. Ensure tests pass +4. Add additional tests if coverage is lacking +5. Remove any test fixtures or temp files after tests run diff --git a/tests/smoke/__init__.py b/tests/smoke/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/smoke/test_evidence_links.py b/tests/smoke/test_evidence_links.py new file mode 100644 index 0000000..bcf1930 --- /dev/null +++ b/tests/smoke/test_evidence_links.py @@ -0,0 +1,78 @@ +import csv +import json +from pathlib import Path + +BASE = Path("my_real_case") # adjust base if needed + +custody_csv = BASE / "Custody_Mod_Evidence.csv" +sticky_json = BASE / "sticky_index.json" +timeline_csv = BASE / "timeline.csv" + +def load_evidence_ids(): + ids = set() + if not custody_csv.exists(): + print(f"Error: {custody_csv} not found") + return ids + with custody_csv.open(newline='', encoding="utf-8") as f: + reader = csv.DictReader(f) + for row in reader: + eid = row.get("Evidence_ID") + if eid: + ids.add(eid.strip()) + return ids + +def load_sticky_ids(): + ids = set() + if not sticky_json.exists(): + return ids + data = json.loads(sticky_json.read_text(encoding="utf-8")) + for sticky in data: + for eid in sticky.get("evidence_ids", []): + ids.add(eid.strip()) + return ids + +def load_timeline_ids(): + ids = set() + if not timeline_csv.exists(): + return ids + with timeline_csv.open(newline='', encoding="utf-8") as f: + reader = csv.DictReader(f) + for row in reader: + field = row.get("Evidence_IDs") or "" + for eid in [x.strip() for x in field.split(";") if x.strip()]: + ids.add(eid) + return ids + +def main(): + evidence_ids = load_evidence_ids() + sticky_ids = load_sticky_ids() + timeline_ids = load_timeline_ids() + + print(f"Evidence IDs in CSV: {len(evidence_ids)}") + print(f"Referenced in stickies: {len(sticky_ids)}") + print(f"Referenced in timeline: {len(timeline_ids)}") + + unknown_in_stickies = sticky_ids - evidence_ids + unknown_in_timeline = timeline_ids - evidence_ids + + if unknown_in_stickies: + print("\n⚠ Unknown Evidence_IDs in stickies (not in CSV):") + for eid in sorted(unknown_in_stickies): + print(" -", eid) + + if unknown_in_timeline: + print("\n⚠ Unknown Evidence_IDs in timeline (not in CSV):") + for eid in sorted(unknown_in_timeline): + print(" -", eid) + + unused_evidence = evidence_ids - sticky_ids - timeline_ids + if unused_evidence: + print("\nβ„Ή Evidence_IDs in CSV not referenced yet (fine, but FYI):") + for eid in sorted(unused_evidence): + print(" -", eid) + + if not (unknown_in_stickies or unknown_in_timeline): + print("\nβœ… Links look consistent. Nice work.") + +if __name__ == "__main__": + main()