diff --git a/.github/workflows/skill-validation.yml b/.github/workflows/skill-validation.yml index 5dab68f..516f281 100644 --- a/.github/workflows/skill-validation.yml +++ b/.github/workflows/skill-validation.yml @@ -9,185 +9,120 @@ permissions: contents: read jobs: - # Job 1: Generate matrix from config prepare: runs-on: ubuntu-latest if: github.event.issue.pull_request && contains(github.event.comment.body, '/test') && github.actor == 'Ariel-Rodriguez' outputs: - matrix: ${{ steps.generate-matrix.outputs.matrix }} - filter: ${{ steps.parse.outputs.filter }} - use-parallel: ${{ steps.parse.outputs.use-parallel }} - + matrix: ${{ steps.matrix.outputs.result }} + steps: + - uses: actions/checkout@v4 + - uses: astral-sh/setup-uv@v5 + - name: Parse test command - id: parse + id: command run: | - COMMENT="${{ github.event.comment.body }}" - FILTER="all" - USE_PARALLEL="false" - - # Check for specific provider filter - if echo "$COMMENT" | grep -q "/test copilot"; then - FILTER="copilot" - elif echo "$COMMENT" | grep -q "/test ollama"; then - FILTER="ollama" - elif echo "$COMMENT" | grep -q "/test gemini"; then - FILTER="gemini" - elif echo "$COMMENT" | grep -q "/test all"; then - FILTER="all" - fi - - # Check for parallel flag - if echo "$COMMENT" | grep -q "parallel"; then - USE_PARALLEL="true" + SKILLS=$(python3 ci/parse_test_command.py "${{ github.event.comment.body }}") + echo "skills=$SKILLS" >> $GITHUB_OUTPUT + if [ -n "$SKILLS" ]; then + echo "Override skills: $SKILLS" + else + echo "Auto-detecting changed skills..." fi - - echo "filter=$FILTER" >> $GITHUB_OUTPUT - echo "use-parallel=$USE_PARALLEL" >> $GITHUB_OUTPUT - echo "Testing with filter: $FILTER (parallel: $USE_PARALLEL)" - - - name: Checkout code - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - - name: Install PyYAML - run: pip install pyyaml - name: Generate matrix - id: generate-matrix - run: | - MATRIX=$(python3 ci/matrix_generator.py --filter-provider "${{ steps.parse.outputs.filter }}") - echo "matrix=$MATRIX" >> $GITHUB_OUTPUT - echo "Generated matrix:" - echo "$MATRIX" | jq . - - # Job 2: Run evaluations (parallel or sequential based on parse) - evaluate: - needs: prepare - runs-on: ubuntu-latest - if: needs.prepare.outputs.use-parallel == 'false' - - steps: - - name: Checkout code - uses: actions/checkout@v4 - with: - ref: refs/pull/${{ github.event.issue.number }}/head - - - name: Setup Node.js - uses: actions/setup-node@v4 - with: - node-version: '20' - - - name: Install Copilot CLI - run: npm install -g @github/copilot - - - name: Install uv - uses: astral-sh/setup-uv@v5 - with: - enable-cache: true - - - name: Set up Python - run: uv python install 3.11 - - - name: Run orchestration + id: matrix env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - COPILOT_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - OLLAMA_API_KEY: ${{ secrets.OLLAMA_API_KEY }} + PR_NUMBER: ${{ github.event.issue.number }} run: | - chmod +x ci/*.sh - ./ci/orchestrate_evaluations.sh ${{ github.event.issue.number }} --filter-provider "${{ needs.prepare.outputs.filter }}" + python3 ci/generate_matrix.py "$PR_NUMBER" ${{ steps.command.outputs.skills }} > /tmp/matrix.json - - name: Post results - uses: marocchino/sticky-pull-request-comment@v2 - if: always() && hashFiles('comment.md') != '' - with: - number: ${{ github.event.issue.number }} - path: comment.md + echo "result<> $GITHUB_OUTPUT + cat /tmp/matrix.json >> $GITHUB_OUTPUT + echo "EOF" >> $GITHUB_OUTPUT + + python3 -c "import json; m=json.load(open('/tmp/matrix.json')); print(f'Generated {len(m[\"include\"])} jobs')" - # Job 3: Run evaluations in parallel (matrix strategy) - evaluate-parallel: + evaluate: needs: prepare runs-on: ubuntu-latest - if: needs.prepare.outputs.use-parallel == 'true' strategy: fail-fast: false + max-parallel: 2 matrix: ${{ fromJson(needs.prepare.outputs.matrix) }} - + steps: - - name: Checkout code - uses: actions/checkout@v4 - with: - ref: refs/pull/${{ github.event.issue.number }}/head + - uses: actions/checkout@v4 - - name: Setup Node.js - uses: actions/setup-node@v4 + - uses: actions/setup-node@v4 + if: matrix.provider == 'copilot' with: - node-version: '20' + node-version: "20" - name: Install Copilot CLI if: matrix.provider == 'copilot' run: npm install -g @github/copilot - - name: Install uv - uses: astral-sh/setup-uv@v5 + - uses: astral-sh/setup-uv@v5 with: enable-cache: true - - name: Set up Python - run: uv python install 3.11 - - - name: Detect changes - id: changes - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Set artifact name + id: artifact run: | - chmod +x ci/detect_changes.sh - MODIFIED_SKILLS=$(./ci/detect_changes.sh ${{ github.event.issue.number }}) - echo "modified_skills=$MODIFIED_SKILLS" >> $GITHUB_OUTPUT + ARTIFACT_NAME="results-${{ matrix.provider }}-${{ matrix.model }}-${{ matrix.skill || 'all' }}" + ARTIFACT_NAME="${ARTIFACT_NAME//:/--}" + echo "name=$ARTIFACT_NAME" >> $GITHUB_OUTPUT - - name: Run evaluation for ${{ matrix.display_name }} + - name: Evaluate ${{ matrix.display_name }} env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} COPILOT_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} OLLAMA_API_KEY: ${{ secrets.OLLAMA_API_KEY }} - MODIFIED_SKILLS: ${{ steps.changes.outputs.modified_skills }} run: | - chmod +x ci/run_evaluation.sh - ./ci/run_evaluation.sh "${{ matrix.provider }}" "${{ matrix.model }}" 50 "${{ matrix.extra_args }}" + CMD="uv run --project tests --frozen tests/evaluator.py" + CMD="$CMD --provider ${{ matrix.provider }}" + CMD="$CMD --model ${{ matrix.model }}" + CMD="$CMD --judge --verbose --report --threshold 50" + + if [ -n "${{ matrix.extra_args }}" ]; then + CMD="$CMD ${{ matrix.extra_args }}" + fi + + if [ -n "${{ matrix.skill }}" ]; then + CMD="$CMD --skill ${{ matrix.skill }}" + else + CMD="$CMD --all" + fi + + echo "Running: $CMD" + eval "$CMD" - name: Upload results - uses: actions/upload-artifact@v4 if: always() + uses: actions/upload-artifact@v4 with: - name: results-${{ matrix.provider }}-${{ matrix.model }} - path: tests/results/${{ matrix.provider }}/${{ matrix.model }}/ + name: ${{ steps.artifact.outputs.name }} + path: tests/results/ + retention-days: 1 - # Job 4: Consolidate parallel results consolidate: - needs: [prepare, evaluate-parallel] + needs: evaluate runs-on: ubuntu-latest - if: needs.prepare.outputs.use-parallel == 'true' && always() - + if: always() + steps: - - name: Checkout code - uses: actions/checkout@v4 + - uses: actions/checkout@v4 - - name: Download all artifacts - uses: actions/download-artifact@v4 + - uses: actions/download-artifact@v4 with: path: tests/results/ - name: Consolidate results - run: | - chmod +x ci/consolidate_results.sh - ./ci/consolidate_results.sh || true + run: python3 ci/consolidate_results.py - - name: Post consolidated results + - name: Post to PR uses: marocchino/sticky-pull-request-comment@v2 if: hashFiles('comment.md') != '' with: diff --git a/ci/collect-artifacts.sh b/ci/collect-artifacts.sh deleted file mode 100755 index ca7d9c0..0000000 --- a/ci/collect-artifacts.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash -set -e - -echo "Collecting artifacts..." -find artifacts -name "*.zip" -exec mv {} . \; - -echo "βœ“ Artifacts collected:" -ls -lh *.zip diff --git a/ci/config.yaml b/ci/config.yaml index 743024e..394582c 100644 --- a/ci/config.yaml +++ b/ci/config.yaml @@ -4,11 +4,11 @@ ollama: enabled: true local: false - context: 32000 + context: 28000 models: - - gpt-oss:20b-cloud - rnj-1:8b-cloud - devstral-small-2:24b-cloud +# - gpt-oss:20b-cloud copilot: enabled: false diff --git a/ci/consolidate_results.py b/ci/consolidate_results.py new file mode 100644 index 0000000..6591efd --- /dev/null +++ b/ci/consolidate_results.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python3 +""" +Consolidate results from parallel evaluations. + +Aggregates outputs from multiple provider/model runs and generates summary. +""" + +import json +import sys +from pathlib import Path + + +def main(): + results_base = Path("tests/results") + + print("==> Consolidating results from all evaluations") + + # Find all summary.json files (artifacts download to flat structure) + summary_files = sorted(results_base.glob("*/summary.json")) + + if not summary_files: + print(f"No results found in {results_base}") + print(f"Directory contents: {list(results_base.glob('*'))}") + # Write empty comment so job doesn't fail + Path("comment.md").write_text("# Evaluation Results\n\nNo results found.\n") + sys.exit(0) + + # Check for failures + failed = 0 + succeeded = 0 + all_results = [] + + print("\nResults:") + print("========") + + for summary_file in summary_files: + try: + summary = json.loads(summary_file.read_text()) + artifact_name = summary_file.parent.name + + all_results.append({ + "artifact": artifact_name, + "summary": summary, + }) + + print(f"βœ… {artifact_name}") + succeeded += 1 + + except json.JSONDecodeError: + print(f"❌ {summary_file.parent.name} - Could not parse JSON") + failed += 1 + + print("\nSummary:") + print("========") + print(f"Processed: {len(all_results)}") + print(f"Failed: {failed}") + + # Generate comment + comment = "# πŸ“Š Evaluation Results\n\n" + + if all_results: + comment += f"Processed {len(all_results)} evaluation(s).\n\n" + + # Build table with results + comment += "| Test Name | Model | Baseline | With Skill | Cases Pass | Winner |\n" + comment += "|-----------|-------|----------|------------|------------|--------|\n" + + # Rating hierarchy for comparison + rating_hierarchy = {'vague': 0, 'regular': 1, 'good': 2, 'outstanding': 3} + + for result in all_results: + summary = result['summary'] + artifact = result['artifact'] + + # Debug: Log top-level keys + print(f"\nπŸ“‹ Processing: {artifact}") + print(f" Top-level keys: {list(summary.keys())}") + + # Extract key data from nested results[0] + eval_result = summary.get('results', [{}])[0] if summary.get('results') else {} + print(f" Nested result keys: {list(eval_result.keys())}") + + skill = eval_result.get('skill', 'N/A') + model = eval_result.get('model', 'N/A') + baseline_rating = eval_result.get('baseline_rating', 'N/A') + skill_rating = eval_result.get('skill_rating', 'N/A') + baseline_pass = eval_result.get('baseline_pass_count', 'N/A') + skill_pass = eval_result.get('skill_pass_count', 'N/A') + overall_better = eval_result.get('judgment', {}).get('overall_better', 'N/A') + + print(f" Extracted: model={model}, baseline={baseline_rating}, skill={skill_rating}, winner={overall_better}") + + # Determine winner + if overall_better == 'A': + winner = "Baseline" + elif overall_better == 'B': + winner = "With Skill" + elif overall_better == 'TIE': + winner = "Tie" + else: + winner = "N/A" + + # Determine emoji for cases pass (skill >= baseline in rating hierarchy) + baseline_score = rating_hierarchy.get(baseline_rating, -1) + skill_score = rating_hierarchy.get(skill_rating, -1) + pass_emoji = "βœ…" if skill_score >= baseline_score else "❌" + + # Build row + test_link = f"[{artifact}]()" + comment += f"| {test_link} | {model} | {baseline_rating} | {skill_rating} | {pass_emoji} {skill_pass} | {winner} |\n" + + comment += "\n" + else: + comment += "No evaluation results found.\n" + + Path("comment.md").write_text(comment) + print("\nβœ“ Comment saved to comment.md") + + sys.exit(0) + + +if __name__ == "__main__": + import os + main() diff --git a/ci/consolidate_results.sh b/ci/consolidate_results.sh deleted file mode 100755 index 6f1e270..0000000 --- a/ci/consolidate_results.sh +++ /dev/null @@ -1,81 +0,0 @@ -#!/bin/bash -set -e - -# consolidate_results.sh -# Consolidates results from multiple parallel evaluation runs. -# Composition Over Coordination: Aggregates outputs from independent units. -# Single Responsibility: Only consolidates, doesn't evaluate. - -RESULTS_BASE="tests/results" - -echo "==> Consolidating results from all evaluations" - -# Find all result directories -RESULT_DIRS=$(find "$RESULTS_BASE" -type d -name "*" -mindepth 2) - -if [ -z "$RESULT_DIRS" ]; then - echo "No results found in $RESULTS_BASE" - exit 1 -fi - -# Check for failures -FAILED=0 -SUCCEEDED=0 -ALL_RESULTS="" - -echo "" -echo "Results by provider/model:" -echo "==========================" - -for dir in $RESULT_DIRS; do - if [ ! -f "$dir/exit_code" ]; then - continue - fi - - EXIT_CODE=$(cat "$dir/exit_code") - PROVIDER_MODEL=$(echo "$dir" | sed "s|$RESULTS_BASE/||") - - if [ "$EXIT_CODE" -eq 0 ]; then - echo "βœ… $PROVIDER_MODEL - PASSED" - SUCCEEDED=$((SUCCEEDED + 1)) - else - echo "❌ $PROVIDER_MODEL - FAILED (exit code: $EXIT_CODE)" - FAILED=$((FAILED + 1)) - fi - - # Collect summary if exists - if [ -f "$dir/summary.json" ]; then - ALL_RESULTS="$ALL_RESULTS\n### Results: $PROVIDER_MODEL\n" - ALL_RESULTS="$ALL_RESULTS$(cat "$dir/summary.json")\n" - fi -done - -echo "" -echo "Summary:" -echo "========" -echo "Succeeded: $SUCCEEDED" -echo "Failed: $FAILED" - -# Generate consolidated comment for PR -if [ -n "$PR_NUMBER" ] && [ "$FAILED" -eq 0 ]; then - echo "" - echo "Generating consolidated PR comment..." - - COMMENT="## πŸŽ‰ All Evaluations Passed\n\n" - COMMENT="${COMMENT}Tested across $SUCCEEDED provider/model combinations.\n\n" - COMMENT="${COMMENT}${ALL_RESULTS}" - - echo -e "$COMMENT" > comment.md - echo "βœ“ Consolidated comment saved to comment.md" -fi - -# Exit with failure if any evaluation failed -if [ "$FAILED" -gt 0 ]; then - echo "" - echo "❌ $FAILED evaluation(s) failed" - exit 1 -fi - -echo "" -echo "βœ… All evaluations passed" -exit 0 diff --git a/ci/detect_changes.py b/ci/detect_changes.py new file mode 100644 index 0000000..2fc0028 --- /dev/null +++ b/ci/detect_changes.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 +""" +Detect modified skills in a PR. + +Outputs space-separated skill names to stdout (for GitHub Actions integration). +""" + +import subprocess +import sys +from pathlib import Path + + +def main(): + if len(sys.argv) < 2: + print("Error: PR number not provided.", file=sys.stderr) + print("Usage: detect_changes.py ", file=sys.stderr) + sys.exit(1) + + pr_number = sys.argv[1] + + # Check for GITHUB_TOKEN + import os + if not os.environ.get("GITHUB_TOKEN"): + print("Error: GITHUB_TOKEN not set", file=sys.stderr) + sys.exit(1) + + try: + # Get modified files from PR + result = subprocess.run( + ["gh", "pr", "diff", pr_number, "--name-only"], + capture_output=True, + text=True, + check=False, + ) + + if result.returncode != 0: + print("", end="") # Empty output + return + + # Extract skill names from skills/* paths + skills = set() + for line in result.stdout.strip().split("\n"): + if line.startswith("skills/"): + skill_name = line.split("/")[1] + if skill_name: + skills.add(skill_name) + + # Output space-separated names + print(" ".join(sorted(skills))) + + except Exception as e: + print(f"Error: {e}", file=sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/ci/detect_changes.sh b/ci/detect_changes.sh deleted file mode 100755 index 9f4040c..0000000 --- a/ci/detect_changes.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash -set -e - -# detect_changes.sh -# Detects modified skills in a PR. -# Single Responsibility: Only detects changes, doesn't run evaluations. -# Explicit Boundaries: Separates change detection from evaluation logic. - -PR_NUMBER=$1 - -if [ -z "$PR_NUMBER" ]; then - echo "Error: PR number not provided." - echo "Usage: $0 " - exit 1 -fi - -if [ -z "$GITHUB_TOKEN" ]; then - echo "Error: GITHUB_TOKEN not set" - exit 1 -fi - -echo "Detecting modified skills in PR #$PR_NUMBER..." - -# Detect modified skills -MODIFIED_FILES=$(gh pr diff "$PR_NUMBER" --name-only) -MODIFIED_SKILLS=$(echo "$MODIFIED_FILES" | grep '^skills/' | cut -d'/' -f2 | sort -u | xargs) - -if [ -n "$MODIFIED_SKILLS" ]; then - echo "Modified skills: $MODIFIED_SKILLS" - echo "$MODIFIED_SKILLS" -else - echo "No skills modified - will test all" - echo "" -fi diff --git a/ci/generate_matrix.py b/ci/generate_matrix.py new file mode 100644 index 0000000..daa38f3 --- /dev/null +++ b/ci/generate_matrix.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 +""" +Generate matrix from config with optional skill override. + +If skills are provided, creates one matrix item per skill per model. +Otherwise uses orchestrator to detect changed skills. +""" + +import subprocess +import json +import sys +from pathlib import Path + + +def generate_base_matrix(filter_provider: str = "all") -> dict: + """Generate base matrix from config.""" + result = subprocess.run( + ["uv", "run", "--with", "pyyaml", "ci/matrix_generator.py", "--filter-provider", filter_provider], + capture_output=True, + text=True, + ) + + if result.returncode != 0: + print(f"Error generating matrix: {result.stderr}", file=sys.stderr) + sys.exit(1) + + try: + return json.loads(result.stdout) + except json.JSONDecodeError: + print(f"Error parsing matrix JSON: {result.stdout}", file=sys.stderr) + sys.exit(1) + + +def expand_with_override_skills(base_matrix: dict, skills: list) -> dict: + """Expand matrix with override skills.""" + matrix = {"include": []} + + for item in base_matrix["include"]: + for skill in skills: + new_item = item.copy() + new_item["skill"] = skill + new_item["display_name"] = f"{item['display_name']} / {skill}" + matrix["include"].append(new_item) + + return matrix + + +def main(): + if len(sys.argv) < 2: + print("Usage: generate_matrix.py [skill1 skill2 ...]") + sys.exit(1) + + pr_number = sys.argv[1] + override_skills = sys.argv[2:] if len(sys.argv) > 2 else [] + + if override_skills: + # Use override skills + print(f"Using override skills: {override_skills}", file=sys.stderr) + base_matrix = generate_base_matrix("all") + matrix = expand_with_override_skills(base_matrix, override_skills) + else: + # Auto-detect from PR + print(f"Auto-detecting changed skills in PR #{pr_number}", file=sys.stderr) + result = subprocess.run( + ["python3", "ci/orchestrate_evaluations.py", str(pr_number), "--matrix-only", "--filter-provider", "all"], + capture_output=True, + text=True, + ) + + if result.returncode != 0: + print(f"Error detecting changes: {result.stderr}", file=sys.stderr) + sys.exit(1) + + matrix = json.loads(result.stdout) + + # Output matrix + print(json.dumps(matrix)) + + +if __name__ == "__main__": + main() diff --git a/ci/matrix_generator.py b/ci/matrix_generator.py index d85eccc..1ea8537 100644 --- a/ci/matrix_generator.py +++ b/ci/matrix_generator.py @@ -152,8 +152,8 @@ def main(): if len(matrix["include"]) == 0: print("Warning: No enabled providers match filter criteria", file=sys.stderr) - # Output as JSON - print(json.dumps(matrix, indent=2)) + # Output ONLY JSON (no extra output to stdout) + print(json.dumps(matrix)) if __name__ == "__main__": diff --git a/ci/orchestrate_evaluations.py b/ci/orchestrate_evaluations.py new file mode 100644 index 0000000..7436301 --- /dev/null +++ b/ci/orchestrate_evaluations.py @@ -0,0 +1,247 @@ +#!/usr/bin/env python3 +""" +Orchestrate skill evaluations across multiple providers/models. + +Refactored for scalability following programming skills: +- Policy-Mechanism Separation: Config drives execution +- Composition Over Coordination: Small focused scripts combined +- Single Responsibility: Each script does one thing +- Explicit State Invariants: Configuration validated upfront +- Single Direction Data Flow: Clear pipeline of operations +""" + +import subprocess +import sys +import os +import json +from pathlib import Path +import argparse + + +def validate_environment(): + """Validate required environment variables.""" + if not os.environ.get("GITHUB_TOKEN"): + print("❌ Error: GITHUB_TOKEN not set") + sys.exit(1) + + if not os.environ.get("OLLAMA_API_KEY"): + print("⚠ Warning: OLLAMA_API_KEY not set (required for Ollama)") + + return True + + +def detect_changes(pr_number: int) -> str: + """Detect modified skills in PR.""" + if not os.environ.get("GITHUB_TOKEN"): + return "" + + result = subprocess.run( + ["python3", "ci/detect_changes.py", str(pr_number)], + capture_output=True, + text=True, + ) + + modified_skills = result.stdout.strip() + return modified_skills + + +def generate_matrix(filter_provider: str = "all", skills: str = "") -> list: + """Generate evaluation matrix from configuration with per-skill jobs.""" + + result = subprocess.run( + ["uv", "run", "--with", "pyyaml", "ci/matrix_generator.py", "--filter-provider", filter_provider], + capture_output=True, + text=True, + ) + + if result.returncode != 0: + print(f"Error generating matrix: {result.stderr}", file=sys.stderr) + sys.exit(1) + + try: + matrix_data = json.loads(result.stdout) + except json.JSONDecodeError: + print(f"Error parsing matrix JSON: {result.stdout}", file=sys.stderr) + sys.exit(1) + + items = matrix_data.get("include", []) + + if not items: + print("Error: No enabled providers in configuration", file=sys.stderr) + sys.exit(1) + + # If skills are specified, expand matrix to one item per skill per model + if skills and skills.strip(): + skill_list = skills.strip().split() + expanded_items = [] + + for item in items: + for skill in skill_list: + expanded_item = item.copy() + expanded_item["skill"] = skill + expanded_item["display_name"] = f"{item['display_name']} / {skill}" + expanded_items.append(expanded_item) + + items = expanded_items + + return items + + +def run_sequential(items: list, threshold: int = 50): + """Run evaluations sequentially.""" + print("\n==> Running evaluations sequentially") + + for item in items: + provider = item["provider"] + model = item["model"] + extra_args = item.get("extra_args", "") + + result = subprocess.run( + ["python3", "ci/run_evaluation.py", provider, model, str(threshold), extra_args] + ) + + if result.returncode != 0: + print(f"⚠ Evaluation failed for {provider}/{model}") + + print("βœ“ All evaluations completed") + + # Consolidate + subprocess.run(["python3", "ci/consolidate_results.py"]) + + +def run_parallel_local(items: list, threshold: int = 50): + """Run evaluations in parallel locally (one job per skill per model).""" + print(f"\n==> Running {len(items)} evaluation(s) in parallel") + + import concurrent.futures + + def run_single_eval(item): + provider = item["provider"] + model = item["model"] + extra_args = item.get("extra_args", "") + skill = item.get("skill") + + display = f"{provider}/{model}" + (f"/{skill}" if skill else "") + print(f"[{display}] Starting...") + + cmd = [ + "uv", + "run", + "--project", + "tests", + "--frozen", + "tests/evaluator.py", + "--provider", + provider, + "--model", + model, + "--threshold", + str(threshold), + "--judge", + "--verbose", + "--report", + ] + + if extra_args.strip(): + cmd.extend(extra_args.split()) + + if skill: + cmd.extend(["--skill", skill]) + else: + cmd.append("--all") + + result = subprocess.run(cmd, capture_output=False) + + return provider, model, skill, result.returncode + + with concurrent.futures.ThreadPoolExecutor(max_workers=min(len(items), 4)) as executor: + futures = {executor.submit(run_single_eval, item): item for item in items} + + failed_count = 0 + for future in concurrent.futures.as_completed(futures): + try: + provider, model, skill, exit_code = future.result() + display = f"{provider}/{model}" + (f"/{skill}" if skill else "") + if exit_code == 0: + print(f"βœ… [{display}] Completed") + else: + print(f"❌ [{display}] Failed (exit code {exit_code})") + failed_count += 1 + except Exception as e: + print(f"❌ Error: {e}") + failed_count += 1 + + print(f"\nβœ“ All evaluation(s) completed ({len(items)-failed_count}/{len(items)} passed)") + + if failed_count > 0: + print(f"⚠ {failed_count} evaluation(s) failed") + + # Consolidate + subprocess.run(["python3", "ci/consolidate_results.py"]) + + +def parse_command(comment: str) -> tuple: + """Parse /test command from comment.""" + filter_provider = "all" + use_parallel = False + + if "/test copilot" in comment: + filter_provider = "copilot" + elif "/test ollama" in comment: + filter_provider = "ollama" + elif "/test gemini" in comment: + filter_provider = "gemini" + + if "parallel" in comment: + use_parallel = True + + return filter_provider, use_parallel + + +def main(): + parser = argparse.ArgumentParser(description="Orchestrate skill evaluations") + parser.add_argument("pr_number", type=int, help="GitHub PR number") + parser.add_argument("--filter-provider", default="all", help="Filter by provider") + parser.add_argument("--parallel", action="store_true", help="Run in parallel (GitHub Actions mode)") + parser.add_argument("--threshold", type=int, default=50, help="Pass threshold") + parser.add_argument("--matrix-only", action="store_true", help="Only output matrix JSON and exit") + + args = parser.parse_args() + + # Validate environment + if not args.matrix_only: + validate_environment() + + # Export for child scripts + os.environ["PR_NUMBER"] = str(args.pr_number) + os.environ["COPILOT_GITHUB_TOKEN"] = os.environ.get("GITHUB_TOKEN", "") + os.environ["GH_TOKEN"] = os.environ.get("GITHUB_TOKEN", "") + + # Detect changes + modified_skills = detect_changes(args.pr_number) + os.environ["MODIFIED_SKILLS"] = modified_skills + + # Generate matrix (expand with per-skill jobs if skills detected) + items = generate_matrix(args.filter_provider, modified_skills) + + # If matrix-only mode, output JSON and exit + if args.matrix_only: + print(json.dumps({"include": items})) + sys.exit(0) + + # Clean previous results + results_base = Path("tests/results") + import shutil + if results_base.exists(): + shutil.rmtree(results_base) + results_base.mkdir(parents=True, exist_ok=True) + + # Run evaluations + if args.parallel: + run_parallel_local(items, args.threshold) + else: + run_sequential(items, args.threshold) + + +if __name__ == "__main__": + main() diff --git a/ci/orchestrate_evaluations.sh b/ci/orchestrate_evaluations.sh deleted file mode 100755 index 78957de..0000000 --- a/ci/orchestrate_evaluations.sh +++ /dev/null @@ -1,164 +0,0 @@ -#!/bin/bash -set -e - -# Orchestrate Evaluations -# -# Refactored for scalability following programming skills: -# - Policy-Mechanism Separation: Config drives execution -# - Composition Over Coordination: Small focused scripts combined -# - Single Responsibility: Each script does one thing -# - Explicit State Invariants: Configuration validated upfront -# - Single Direction Data Flow: Clear pipeline of operations -# -# This orchestrator: -# 1. Validates configuration -# 2. Detects changed skills -# 3. Generates evaluation matrix from config -# 4. Runs evaluations (parallel or sequential) -# 5. Consolidates results -# -# Usage: -# ./ci/orchestrate_evaluations.sh [--parallel] [--filter-provider ] - -PR_NUMBER=$1 -shift - -PARALLEL=false -FILTER_PROVIDER="all" -THRESHOLD=50 - -# Parse optional arguments -while [[ $# -gt 0 ]]; do - case $1 in - --parallel) - PARALLEL=true - shift - ;; - --filter-provider) - FILTER_PROVIDER="$2" - shift 2 - ;; - --threshold) - THRESHOLD="$2" - shift 2 - ;; - *) - echo "Unknown option: $1" - exit 1 - ;; - esac -done - -if [ -z "$PR_NUMBER" ]; then - echo "Error: PR number not provided." - echo "Usage: $0 [--parallel] [--filter-provider ] [--threshold ]" - exit 1 -fi - -# Validate environment -echo "==> Validating environment" - -if [ -z "$GITHUB_TOKEN" ]; then - echo "❌ Error: GITHUB_TOKEN not set" - exit 1 -fi - -if [ -z "$OLLAMA_API_KEY" ]; then - echo "⚠ Warning: OLLAMA_API_KEY not set (required for Ollama)" -fi - -export GITHUB_TOKEN -export COPILOT_GITHUB_TOKEN="$GITHUB_TOKEN" -export GH_TOKEN="$GITHUB_TOKEN" -export OLLAMA_API_KEY -export PR_NUMBER - -echo "βœ“ Environment configured" - -# Detect changes -echo "" -echo "==> Detecting changes" -MODIFIED_SKILLS=$(./ci/detect_changes.sh "$PR_NUMBER") -export MODIFIED_SKILLS - -if [ -n "$MODIFIED_SKILLS" ]; then - echo "βœ“ Will test skills: $MODIFIED_SKILLS" -else - echo "βœ“ Will test all skills" -fi - -# Generate matrix from configuration -echo "" -echo "==> Generating evaluation matrix" -MATRIX=$(uv run --with pyyaml ci/matrix_generator.py --filter-provider "$FILTER_PROVIDER") - -if [ -z "$MATRIX" ] || [ "$MATRIX" = '{"include": []}' ]; then - echo "❌ Error: No enabled providers in configuration" - exit 1 -fi - -# Extract matrix items -MATRIX_ITEMS=$(echo "$MATRIX" | python3 -c " -import sys, json -data = json.load(sys.stdin) -for item in data['include']: - print(f\"{item['provider']}|{item['model']}|{item.get('extra_args', '')}\") -") - -ITEM_COUNT=$(echo "$MATRIX_ITEMS" | wc -l | xargs) -echo "βœ“ Generated matrix with $ITEM_COUNT configurations" -echo "$MATRIX_ITEMS" | sed 's/^/ - /' - -# Clean previous results -rm -rf tests/results/* -mkdir -p tests/results - -# Run evaluations -echo "" -if [ "$PARALLEL" = true ]; then - echo "==> Running evaluations in parallel" - - PIDS=() - while IFS='|' read -r provider model extra_args; do - echo " Starting: $provider/$model" - ./ci/run_evaluation.sh "$provider" "$model" "$THRESHOLD" "$extra_args" > "tests/results/${provider}_${model//[:\/]/_}.log" 2>&1 & - PIDS+=($!) - done <<< "$MATRIX_ITEMS" - - # Wait for all background jobs - FAILED=0 - for pid in "${PIDS[@]}"; do - if ! wait "$pid"; then - FAILED=$((FAILED + 1)) - fi - done - - echo "βœ“ All parallel evaluations completed" - - if [ "$FAILED" -gt 0 ]; then - echo "⚠ $FAILED evaluation(s) reported failures" - fi -else - echo "==> Running evaluations sequentially" - - while IFS='|' read -r provider model extra_args; do - ./ci/run_evaluation.sh "$provider" "$model" "$THRESHOLD" "$extra_args" - done <<< "$MATRIX_ITEMS" - - echo "βœ“ All evaluations completed" -fi - -# Consolidate results -echo "" -./ci/consolidate_results.sh - -EXIT_CODE=$? - -echo "" -if [ $EXIT_CODE -eq 0 ]; then - echo "βœ… Orchestration completed successfully" -else - echo "❌ Orchestration failed" -fi - -exit $EXIT_CODE diff --git a/ci/parse_test_command.py b/ci/parse_test_command.py new file mode 100644 index 0000000..173d669 --- /dev/null +++ b/ci/parse_test_command.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python3 +""" +Parse /test command from GitHub comment. + +Supports: + /test - Auto-detect changed skills + /test skill ps-error-handling-design - Override with specific skill + /test skill skill1 skill2 skill3 - Override with multiple skills +""" + +import sys +import re + + +def parse_command(comment: str) -> list: + """Parse /test command and extract skills if specified. + + Returns list of skill names or empty list for auto-detect. + """ + + # Check if /test skill is present + match = re.search(r'/test\s+skill\s+(.+?)(?:\s*$|\s+\S)', comment) + + if match: + # Extract everything after "/test skill" until end or next word + skills_str = match.group(1).strip() + skills = skills_str.split() + return skills + + # No skill override, return empty (will trigger auto-detect) + return [] + + +def main(): + if len(sys.argv) < 2: + print("Usage: parse_test_command.py ") + sys.exit(1) + + comment = sys.argv[1] + skills = parse_command(comment) + + if skills: + print(" ".join(skills)) + else: + print("") + + +if __name__ == "__main__": + main() diff --git a/ci/release.sh b/ci/release.sh deleted file mode 100755 index dba24e3..0000000 --- a/ci/release.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/bin/bash -set -e - -PLATFORM=$1 - -if [ -z "$PLATFORM" ]; then - echo "Usage: $0 " - echo "Platforms: cursor-antigravity, copilot" - exit 1 -fi - -case $PLATFORM in - cursor-antigravity) - echo "Packaging skills for Cursor/Antigravity..." - cd skills - zip -r ../cursor-antigravity.zip . - ;; - copilot) - echo "Packaging skills for Copilot..." - # For Copilot, we need to aggregate all skills into instructions.md - mkdir -p .github/copilot - echo "# Programming Skills" > .github/copilot/instructions.md - echo "" >> .github/copilot/instructions.md - - for skill_dir in skills/*/; do - if [ -f "$skill_dir/SKILL.md" ]; then - echo "Adding $(basename $skill_dir)..." - cat "$skill_dir/SKILL.md" >> .github/copilot/instructions.md - echo "" >> .github/copilot/instructions.md - echo "---" >> .github/copilot/instructions.md - echo "" >> .github/copilot/instructions.md - fi - done - - cd .github - zip -r ../copilot.zip copilot - ;; - *) - echo "Unknown platform: $PLATFORM" - exit 1 - ;; -esac - -echo "βœ“ Package created: ${PLATFORM}.zip" diff --git a/ci/run_evaluation.py b/ci/run_evaluation.py new file mode 100644 index 0000000..73bc1f2 --- /dev/null +++ b/ci/run_evaluation.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python3 +""" +Execute evaluation for a single provider/model combination. + +Policy-Mechanism Separation: Configuration passed as parameters. +Single Responsibility: Only runs evaluation, doesn't detect or orchestrate. +""" + +import subprocess +import sys +import os +from pathlib import Path + + +def main(): + if len(sys.argv) < 3: + print("Usage: run_evaluation.py [threshold] [extra_args]") + sys.exit(1) + + provider = sys.argv[1] + model = sys.argv[2] + threshold = sys.argv[3] if len(sys.argv) > 3 else "50" + extra_args = sys.argv[4] if len(sys.argv) > 4 else "" + + # Results directory per provider/model for parallel execution + results_dir = Path("tests/results") / provider / model.replace(":", "_").replace("/", "_") + results_dir.mkdir(parents=True, exist_ok=True) + + print(f"==> Running evaluation: {provider}/{model}") + print(f" Results: {results_dir}") + print(f" Threshold: {threshold}") + + # Build skill arguments + skill_args = [] + modified_skills = os.environ.get("MODIFIED_SKILLS", "").strip() + if modified_skills: + for skill in modified_skills.split(): + skill_args.extend(["--skill", skill]) + else: + skill_args = ["--all"] + + # Build command + cmd = [ + "uv", + "run", + "--project", + "tests", + "--frozen", + "tests/evaluator.py", + "--provider", + provider, + "--model", + model, + "--threshold", + threshold, + "--results-dir", + str(results_dir), + "--report", + "--judge", + "--verbose", + ] + + # Add extra args if provided + if extra_args.strip(): + cmd.extend(extra_args.split()) + + # Add skill arguments + cmd.extend(skill_args) + + print(f"[{provider}/{model}] Executing evaluation...") + print(f"[{provider}/{model}] Output will be in: {results_dir}") + + # Execute with live output + exit_code = subprocess.run(cmd).returncode + + if exit_code == 0: + print(f"[{provider}/{model}] βœ… Evaluation completed successfully") + else: + print(f"[{provider}/{model}] ❌ Evaluation failed with exit code {exit_code}") + + # Save exit code for consolidation + (results_dir / "exit_code").write_text(str(exit_code)) + + sys.exit(exit_code) + + +if __name__ == "__main__": + main() diff --git a/ci/run_evaluation.sh b/ci/run_evaluation.sh deleted file mode 100755 index 037a05a..0000000 --- a/ci/run_evaluation.sh +++ /dev/null @@ -1,79 +0,0 @@ -#!/bin/bash -set -e - -# run_evaluation.sh -# Executes evaluation for a single provider/model combination. -# Policy-Mechanism Separation: Configuration passed as parameters. -# Single Responsibility: Only runs evaluation, doesn't detect or orchestrate. - -PROVIDER=$1 -MODEL=$2 -THRESHOLD=${3:-50} -EXTRA_ARGS=${4:-""} - -if [ -z "$PROVIDER" ] || [ -z "$MODEL" ]; then - echo "Usage: $0 [threshold] [extra_args]" - exit 1 -fi - -# Explicit environment configuration -if [ -n "$GITHUB_TOKEN" ]; then - export GITHUB_TOKEN - export COPILOT_GITHUB_TOKEN="$GITHUB_TOKEN" - export GH_TOKEN="$GITHUB_TOKEN" -fi - -if [ -n "$OLLAMA_API_KEY" ]; then - export OLLAMA_API_KEY -fi - -# Results directory per provider/model for parallel execution -RESULTS_DIR="tests/results/${PROVIDER}/${MODEL//[:\/]/_}" -mkdir -p "$RESULTS_DIR" - -echo "==> Running evaluation: $PROVIDER/$MODEL" -echo " Results: $RESULTS_DIR" -echo " Threshold: $THRESHOLD" - -# Detect skills to test (passed via environment or default to all) -SKILL_ARGS=() -if [ -n "$MODIFIED_SKILLS" ]; then - for skill in $MODIFIED_SKILLS; do - SKILL_ARGS+=("--skill" "$skill") - done -else - SKILL_ARGS+=("--all") -fi - -# Build command - Explicit State Invariants: All parameters explicit -CMD=( - "uv" "run" "--project" "tests" - "tests/evaluator.py" - "--provider" "$PROVIDER" - "--model" "$MODEL" - "--threshold" "$THRESHOLD" - "--results-dir" "$RESULTS_DIR" - "--report" - "--judge" - "--verbose" -) - -# Add extra args if provided -if [ -n "$EXTRA_ARGS" ]; then - read -ra EXTRA_ARRAY <<< "$EXTRA_ARGS" - CMD+=("${EXTRA_ARRAY[@]}") -fi - -# Add skill arguments -CMD+=("${SKILL_ARGS[@]}") - -# Execute -echo "Executing: ${CMD[*]}" -"${CMD[@]}" - -EXIT_CODE=$? - -# Save exit code for consolidation -echo "$EXIT_CODE" > "$RESULTS_DIR/exit_code" - -exit $EXIT_CODE diff --git a/skills/ps-composition-over-coordination/SKILL.md b/skills/ps-composition-over-coordination/SKILL.md index c7baa00..d8f7bfa 100644 --- a/skills/ps-composition-over-coordination/SKILL.md +++ b/skills/ps-composition-over-coordination/SKILL.md @@ -11,6 +11,7 @@ severity: SUGGEST Build complex systems by combining simple, focused units that work together through interfaces, not through centralized coordinators that orchestrate every detail. Benefits: + - **Modularity**: Each unit has one clear responsibility - **Testability**: Small units test in isolation - **Flexibility**: Swap or extend units without affecting others @@ -19,12 +20,14 @@ Benefits: ## When to Use **Use this pattern when:** + - Functions exceed 50 lines by orchestrating multiple steps - Classes have "Manager", "Coordinator", "Orchestrator" in names - Adding features requires changing central control flow - Objects know too much about others' internal details **Indicators you need this:** + - God objects with 10+ methods - Manager classes that just delegate - Deep call chains: `a.getB().getC().doX()` @@ -53,6 +56,7 @@ Benefits: ### Structure guidelines βœ… **Do:** + - Create small, focused functions/classes - Compose behavior from simple units - Let structure express the algorithm @@ -60,6 +64,7 @@ Benefits: - Build pipelines and chains ❌ **Avoid:** + - Centralized managers that know everything - Methods that orchestrate 5+ operations - Objects that reach into other objects' internals @@ -84,7 +89,7 @@ FUNCTION processOrder(order): RETURN createInvoice(order, finalPrice) ``` -*Each function has one clear job. They compose naturally without coordination.* +_Each function has one clear job. They compose naturally without coordination._ ### ❌ Bad: Central coordinator @@ -95,33 +100,37 @@ CLASS OrderManager: subtotal = 0 FOR EACH item IN items: subtotal = subtotal + item.getPrice() - + IF order.hasDiscount(): discount = subtotal * 0.1 subtotal = subtotal - discount - + invoice = NEW Invoice() invoice.setOrder(order) invoice.setAmount(subtotal) this.invoiceRepository.save(invoice) - + RETURN invoice ``` -*One large method coordinates everything. Hard to test, modify, or reuse parts.* +_One large method coordinates everything. Hard to test, modify, or reuse parts._ ## Common Anti-Patterns ### God Objects + Classes that do too much. Split into focused units that compose. ### Manager/Coordinator Classes + Classes that only delegate. Push behavior into the units themselves. ### Deep Chains + `order.getCustomer().getAddress().getZipCode()` - violates Law of Demeter. Pass the data you need. ### Orchestration Methods + 50+ line methods coordinating calls. Extract units and compose them. ## Enforcement @@ -141,30 +150,32 @@ When reviewing code, check: ## Testing Strategy **Small Units:** + ``` TEST "unit transforms input correctly": // Arrange input = createTestInput() expected = createExpectedOutput() - + // Act result = transform(input) - + // Assert ASSERT result EQUALS expected ``` **Composed Systems:** + ``` TEST "pipeline produces correct result": // Arrange pipeline = compose(unitA, unitB, unitC) input = createTestData() expected = createExpectedResult() - + // Act result = pipeline(input) - + // Assert ASSERT result EQUALS expected ``` diff --git a/skills/ps-error-handling-design/SKILL.md b/skills/ps-error-handling-design/SKILL.md index 44bc5fe..9478417 100644 --- a/skills/ps-error-handling-design/SKILL.md +++ b/skills/ps-error-handling-design/SKILL.md @@ -7,12 +7,14 @@ severity: WARN ## Principle Treat error handling as a first-class design concern, not an afterthought: + - **Explicit errors**: Make failure modes visible in function signatures - **Type-safe failures**: Use Result types, not exceptions for control flow - **Intentional recovery**: Distinguish recoverable from non-recoverable errors - **Strategic failure**: Choose between fail-fast and graceful degradation Benefits: + - **Reliability**: Errors can't be accidentally ignored - **Clarity**: Function signatures document failure modes - **Safety**: Compiler enforces error handling @@ -21,12 +23,14 @@ Benefits: ## When to Use **Use this pattern when:** + - Functions can fail in predictable ways - Error handling affects business logic flow - Silently ignoring errors would corrupt state - Callers need to decide how to respond to failures **Indicators you need this:** + - Try-catch blocks with empty handlers - Errors logged but not acted upon - Silent failure causing debugging nightmares @@ -38,11 +42,13 @@ Benefits: ### 1. Make Errors Explicit in Signatures **Use Result/Either types instead of exceptions:** + - Return types encode both success and failure - Compiler forces callers to handle both paths - Errors become part of the type contract **Distinguish error categories:** + - **Domain errors**: Expected business failures (validation, not found) - **Infrastructure errors**: External system failures (network, DB) - **Programming errors**: Bugs (null reference, type mismatch) @@ -50,6 +56,7 @@ Benefits: ### 2. Recoverable vs Non-Recoverable **Recoverable errors** (use Result types): + - Validation failures - Not found / already exists - Permission denied @@ -57,6 +64,7 @@ Benefits: - Expected external service failures **Non-recoverable errors** (crash immediately): + - Null pointer / undefined - Programming logic errors - Corrupted state @@ -66,12 +74,14 @@ Benefits: ### 3. Fail Fast vs Graceful Degradation **Fail fast when:** + - Early detection prevents worse failure later - Corrupted state would spread - Recovery is impossible or unsafe - Developer error (should never happen in production) **Degrade gracefully when:** + - Partial functionality is acceptable - User experience benefits from fallback - System can recover automatically @@ -80,6 +90,7 @@ Benefits: ### Common Pitfalls ❌ **Avoid:** + - Catching exceptions just to log and rethrow - Empty catch blocks (swallowing errors) - Using exceptions for control flow @@ -88,6 +99,7 @@ Benefits: - Generic error messages ("Something went wrong") βœ… **Do:** + - Return Result types for expected failures - Crash on programming errors - Propagate errors to appropriate level @@ -115,7 +127,7 @@ FUNCTION processRequest(userId): RETURN notFoundResponse(result.context) ``` -*Errors are visible in the type. Caller must handle both paths. Cannot ignore failure.* +_Errors are visible in the type. Caller must handle both paths. Cannot ignore failure._ ### ❌ Bad: Hidden exceptions @@ -135,28 +147,32 @@ FUNCTION processRequest(userId): // Error swallowed, no recovery ``` -*Exception is hidden. Easy to forget to catch. Error silently ignored.* +_Exception is hidden. Easy to forget to catch. Error silently ignored._ ## Error Handling Strategy ### At Different Layers **Domain Layer:** + - Return Result types for business rule violations - Never throw exceptions - Pure functions that validate and transform **Application Layer:** + - Coordinate operations - Convert Results to appropriate responses - Handle transaction boundaries **Infrastructure Layer:** + - Wrap external failures in domain errors - Retry transient failures - Circuit breakers for external services **Presentation Layer:** + - Convert errors to user-friendly messages - Log technical details - Provide recovery actions @@ -164,14 +180,15 @@ FUNCTION processRequest(userId): ### Testing Error Paths **Test both happy and error paths:** + ``` TEST "handles validation failure": // Arrange invalidData = createInvalidData() - + // Act result = validate(invalidData) - + // Assert ASSERT result.isError IS true ASSERT result.error.type EQUALS "VALIDATION_ERROR" @@ -179,16 +196,17 @@ TEST "handles validation failure": TEST "handles not found": // Arrange nonexistentId = "nonexistent" - + // Act result = findUser(nonexistentId) - + // Assert ASSERT result.isError IS true ASSERT result.error.type EQUALS "NOT_FOUND" ``` **Test error propagation:** + - Verify errors bubble correctly - Check error context is preserved - Ensure no information leakage @@ -228,7 +246,6 @@ A: No. Either handle the error or propagate it. Log at the boundary where it's h **Q: What about validation errors?** A: Return a Result with structured validation errors. Let the caller decide how to present them. Don't throw exceptions for expected validation failures. - ## Related Patterns - **Functional Core, Imperative Shell**: Pure core returns Results, shell handles effects diff --git a/skills/ps-explicit-boundaries-adapters/SKILL.md b/skills/ps-explicit-boundaries-adapters/SKILL.md index a50d168..cc2eb50 100644 --- a/skills/ps-explicit-boundaries-adapters/SKILL.md +++ b/skills/ps-explicit-boundaries-adapters/SKILL.md @@ -9,11 +9,13 @@ severity: WARN ## Principle Isolate external systems and frameworks behind explicit boundaries using adapters and ports: + - **Core Domain**: Business logic with no external dependencies - **Ports**: Interfaces defining what the core needs - **Adapters**: Implementations that connect to external systems Benefits: + - **Testability**: Core logic tests without real infrastructure - **Flexibility**: Swap implementations without changing core - **Maintainability**: Changes to external systems don't ripple through domain logic @@ -23,12 +25,14 @@ This is also known as Hexagonal Architecture or Ports & Adapters pattern. ## When to Use **Use this pattern when:** + - Business logic directly imports framework code - Core domain depends on database libraries - External API clients used throughout the codebase - Tests require complex infrastructure setup **Indicators you need this:** + - Framework imports in business logic files - Database client passed to domain functions - HTTP client dependencies in core modules @@ -65,6 +69,7 @@ This is also known as Hexagonal Architecture or Ports & Adapters pattern. ### Core Rules βœ… **Must:** + - Define ports as interfaces in core domain - Implement adapters outside core domain - Core depends only on port interfaces @@ -72,6 +77,7 @@ This is also known as Hexagonal Architecture or Ports & Adapters pattern. - Pass domain objects through ports, not external types ❌ **Must not:** + - Import framework code in domain logic - Pass database connections to core functions - Return ORM models or HTTP response objects from core @@ -101,13 +107,13 @@ CLASS PostgresUserRepository IMPLEMENTS UserRepository: METHOD save(user): sql = "INSERT INTO users VALUES (...)" EXECUTE sql WITH user.data - + METHOD findById(userId): sql = "SELECT * FROM users WHERE id = ?" RETURN queryDatabase(sql, userId) ``` -*Domain depends on interface. Adapter implements interface. Core has no database imports.* +_Domain depends on interface. Adapter implements interface. Core has no database imports._ ### ❌ Bad: Core depends on concrete infrastructure @@ -123,11 +129,12 @@ FUNCTION registerUser(userData, dbClient): RETURN user ``` -*Domain tightly coupled to PostgreSQL. Cannot test without real database. Hard to switch databases.* +_Domain tightly coupled to PostgreSQL. Cannot test without real database. Hard to switch databases._ ## Testing Strategy **Core Domain (with ports):** + ``` TEST "business rule enforced": // Arrange @@ -136,25 +143,26 @@ TEST "business rule enforced": findById: mockFunction() } order = createTestOrder() - + // Act result = processOrder(order, mockRepo) - + // Assert ASSERT result.isValid IS true ``` **Adapters (integration tests):** + ``` TEST "persists domain objects": // Arrange db = setupTestDatabase() domainObject = createTestObject() - + // Act adapter.save(domainObject) retrieved = adapter.findById(domainObject.id) - + // Assert ASSERT retrieved EQUALS domainObject ``` @@ -162,38 +170,46 @@ TEST "persists domain objects": ## Common Patterns ### Repository Pattern + Port defines `save()`, `find()`, `delete()` operations. Adapters implement for specific databases. Core never knows if it's PostgreSQL or MongoDB. ### Gateway Pattern + Port defines external service operations in domain terms. Adapters handle HTTP, authentication, data mapping. Core thinks in domain concepts. ### Notification Pattern + Port defines `notify(user, message)`. Adapters implement via email, SMS, push notifications. Core doesn't care about delivery mechanism. ### File Storage Pattern + Port defines `store(file)`, `retrieve(id)`. Adapters implement for local filesystem, S3, Azure Blob. Core logic unchanged when storage moves to cloud. ## Language-Specific Patterns ### JavaScript/TypeScript + - Use TypeScript interfaces for ports - Dependency injection via constructor parameters - Factory functions at composition root - Avoid importing concrete implementations in domain files ### Python + - Use ABC (Abstract Base Class) for ports - Type hints with Protocol for structural typing - Dependency injection via constructor or function parameters - Keep domain in separate package from adapters ### Java/C# + - Use interfaces for ports - Dependency injection frameworks (Spring, .NET Core) - Package structure: `domain/` `ports/` `adapters/` - Avoid `new` keyword in domain code ### Go + - Use interfaces for ports (idiomatic Go) - Small, focused interfaces - Accept interfaces, return structs @@ -235,7 +251,6 @@ A: Create a thin adapter layer that extends framework classes and delegates to y **Q: How do I handle transactions across adapters?** A: Define a transaction port in domain. Adapters implement transaction management. Core orchestrates but doesn't manage transactions directly. - ## References - Alistair Cockburn: "Hexagonal Architecture" (original pattern) diff --git a/skills/ps-explicit-ownership-lifecycle/SKILL.md b/skills/ps-explicit-ownership-lifecycle/SKILL.md index 1aed573..1c4b0d7 100644 --- a/skills/ps-explicit-ownership-lifecycle/SKILL.md +++ b/skills/ps-explicit-ownership-lifecycle/SKILL.md @@ -11,6 +11,7 @@ severity: WARN Every resource must have exactly one clear owner responsible for its lifecycle. The owner creates, manages, and destroys the resource deterministically. Benefits: + - **No leaks**: Resources always cleaned up - **No double-free**: Single ownership prevents duplicate cleanup - **Predictability**: Lifetime is explicit and obvious @@ -19,12 +20,14 @@ Benefits: ## When to Use **Use this pattern when:** + - Working with limited resources (connections, file handles, memory) - Managing subscriptions or event listeners - Dealing with external systems (databases, APIs, hardware) - Implementing cleanup logic **Indicators you need this:** + - Memory leaks in production - Unclosed connections or file handles - Event listeners not unregistered @@ -72,6 +75,7 @@ Benefits: ### Common Pitfalls ❌ **Avoid:** + - Shared ownership without clear rules - Resources outliving their scope - Cleanup in finalizers/garbage collection only @@ -79,6 +83,7 @@ Benefits: - Forgetting cleanup in error paths βœ… **Do:** + - Make owner explicit in code structure - Clean up in reverse order of acquisition - Use language-provided cleanup mechanisms @@ -106,15 +111,15 @@ FUNCTION createConnection(): CLASS ConnectionWrapper: CONSTRUCTOR(connection): this.connection = connection - + METHOD use(): RETURN this.connection.query(...) - + METHOD close(): this.connection.disconnect() // Owner responsible for cleanup ``` -*Resource acquired, used in scope, cleaned up deterministically. Ownership clear at each step.* +_Resource acquired, used in scope, cleaned up deterministically. Ownership clear at each step._ ### ❌ Bad: Unclear ownership, missing cleanup @@ -136,7 +141,7 @@ FUNCTION getConnection(): RETURN connection // Shared ownership - who closes? ``` -*Resources created but never cleaned up. Ownership ambiguous. Leaks in error paths.* +_Resources created but never cleaned up. Ownership ambiguous. Leaks in error paths._ ## Enforcement Checklist @@ -184,6 +189,7 @@ For non-owning relationships, use weak references that don't prevent cleanup. ## Language-Specific Patterns ### JavaScript/TypeScript + - Use try-finally for cleanup - Implement `.dispose()` or `.close()` methods - Use WeakMap/WeakRef for non-owning references @@ -191,27 +197,32 @@ For non-owning relationships, use weak references that don't prevent cleanup. - Symbol.dispose for explicit resource management (TC39 proposal) ### Python + - Context managers with `__enter__` and `__exit__` - `with` statement for automatic cleanup - `contextlib.closing()` for objects with `.close()` - Weak references via `weakref` module ### Go + - `defer` for cleanup (executes in reverse order) - Explicit `.Close()` methods - Context for cancellation and timeouts ### Rust + - RAII via Drop trait (automatic cleanup) - Ownership system enforced by compiler - Borrow checker prevents use-after-free ### C# + - `IDisposable` interface with `.Dispose()` - `using` statement for automatic disposal - Finalizers as backup (not primary mechanism) ### Java + - Try-with-resources for AutoCloseable - Explicit `.close()` methods - Avoid finalizers (deprecated) @@ -236,7 +247,6 @@ A: Use factory methods or builder patterns. Ensure partially constructed objects - **Fail-Fast**: Detect leaked resources early in development - **Make Illegal States Unrepresentable**: Type system prevents invalid lifetimes - ## References - RAII (Resource Acquisition Is Initialization) - C++ core pattern diff --git a/skills/ps-explicit-state-invariants/SKILL.md b/skills/ps-explicit-state-invariants/SKILL.md index 25dd011..feff5ae 100644 --- a/skills/ps-explicit-state-invariants/SKILL.md +++ b/skills/ps-explicit-state-invariants/SKILL.md @@ -13,6 +13,7 @@ State must be intentionally designed and always valid. Every piece of state must **If you can't state the invariant, the state is already broken.** Benefits: + - **Reliability**: Impossible states become unrepresentable - **Debuggability**: Invalid states eliminated at design time - **Maintainability**: Invariants encoded in type system, not documentation @@ -20,12 +21,14 @@ Benefits: ## When to Use **Use this pattern when:** + - Designing state shapes for components or stores - Reviewing code with multiple boolean flags - Implementing state machines or workflows - Designing APIs that manage state **Indicators you need this:** + - Boolean flag combinations creating invalid states - Race conditions from temporarily invalid state - Defensive checks scattered throughout codebase @@ -70,12 +73,14 @@ Benefits: ### Common Pitfalls ❌ **Avoid:** + - Multiple independent boolean flags - Undocumented invariants (living in human memory) - Temporarily invalid state during updates - Optional fields that are only sometimes valid βœ… **Do:** + - Use discriminated unions for mutually exclusive states - Encode invariants in type system - Document what must always be true @@ -108,7 +113,7 @@ FUNCTION render(state): // Data exists only in Success state ``` -*State is explicit. Invalid combinations impossible. Compiler enforces invariants.* +_State is explicit. Invalid combinations impossible. Compiler enforces invariants._ ### ❌ Bad: Boolean flags allow invalid states @@ -123,7 +128,7 @@ FUNCTION render(state): IF state.isLoading AND state.hasError: // This shouldn't be possible - but it is! RETURN "???" - + IF state.data IS NOT null AND state.hasError: // Have data AND error? Which to show? RETURN "???" @@ -132,7 +137,7 @@ FUNCTION render(state): // 16 possible combinations (2^4), most invalid ``` -*Flags allow impossible states. No invariants enforced. Defensive checks everywhere.* +_Flags allow impossible states. No invariants enforced. Defensive checks everywhere._ ## AI Review Checklist @@ -167,7 +172,6 @@ A: Wrap legacy state with adapters that enforce invariants at boundaries. - **Documentation**: Require explicit invariant statements - **Testing**: Verify impossible states truly cannot be created - ## Related Patterns - **Functional Core, Imperative Shell**: Pure functions naturally preserve invariants diff --git a/skills/ps-functional-core-imperative-shell/SKILL.md b/skills/ps-functional-core-imperative-shell/SKILL.md index 74504ce..873925a 100644 --- a/skills/ps-functional-core-imperative-shell/SKILL.md +++ b/skills/ps-functional-core-imperative-shell/SKILL.md @@ -9,10 +9,12 @@ severity: BLOCK ## Principle Organize code into two layers: + - **Pure Core**: Deterministic computations with no side effects - **Imperative Shell**: Coordinates effects (IO, state, external calls) Benefits: + - **Testability**: Pure functions need no mocks - **Reliability**: Same input β†’ same output - **Maintainability**: Business logic isolated from infrastructure @@ -20,11 +22,13 @@ Benefits: ## When to Use **Use this pattern when:** + - Business logic depends on external data - Calculations should be deterministic - Testing pure logic without infrastructure setup **Indicators you need this:** + - Database calls mixed into validation - HTTP requests inside calculation functions - File IO scattered throughout business logic @@ -51,12 +55,14 @@ Benefits: ### Common Pitfalls ❌ **Avoid:** + - Side effects in pure functions (logging, DB calls) - Pure functions reading global state - Business logic in shell layer - Passing IO objects to core (connections, file handles) βœ… **Do:** + - Pass values, not connections - Keep shell thin (just orchestration) - Make effects explicit in function signatures @@ -83,14 +89,14 @@ FUNCTION applyDiscountRules(amount, rules): FUNCTION processOrder(orderId): items = database.getOrderItems(orderId) discountRules = database.getActiveDiscounts() - + total = calculateOrderTotal(items, discountRules) - + database.updateOrder(orderId, total) RETURN total ``` -*Core is pure - testable without database. Shell coordinates IO.* +_Core is pure - testable without database. Shell coordinates IO._ ### ❌ Bad: Side effects mixed with logic @@ -98,48 +104,50 @@ FUNCTION processOrder(orderId): FUNCTION calculateOrderTotal(orderId): items = database.getOrderItems(orderId) // Side effect! discountRules = database.getActiveDiscounts() // Side effect! - + log("Calculating total for order " + orderId) // Side effect! - + subtotal = sum(items.map(item => item.price)) discount = 0 FOR EACH rule IN discountRules: IF rule.applies(subtotal): discount = discount + (subtotal * rule.percentage) - + total = subtotal - discount database.updateOrder(orderId, total) // Side effect! RETURN total ``` -*Cannot test calculation without database. Cannot reuse logic. Hard to reason about.* +_Cannot test calculation without database. Cannot reuse logic. Hard to reason about._ ## Testing Strategy **Pure Core:** + ``` TEST "calculates correctly": // Arrange items = createTestItems() expectedValue = 150 - + // Act result = calculateTotal(items) - + // Assert ASSERT result EQUALS expectedValue ``` **Imperative Shell:** + ``` TEST "coordinates effects": // Arrange mockDB.save = mockFunction() orderId = "test-order-123" - + // Act processOrder(orderId) - + // Assert ASSERT mockDB.save WAS CALLED ``` @@ -150,7 +158,6 @@ TEST "coordinates effects": - **Architecture tests**: Assert core modules don't import IO libraries - **Linter rules**: Warn on console.log / global state in core - ## Related Patterns - **Explicit State Invariants**: Pure functions enforce invariants diff --git a/skills/ps-illegal-states-unrepresentable/SKILL.md b/skills/ps-illegal-states-unrepresentable/SKILL.md index 83c7490..c8f7cfc 100644 --- a/skills/ps-illegal-states-unrepresentable/SKILL.md +++ b/skills/ps-illegal-states-unrepresentable/SKILL.md @@ -11,6 +11,7 @@ severity: SUGGEST Design data structures so that illegal states cannot be represented. Use the type system to enforce invariants at compile time rather than runtime validation. Benefits: + - **Correctness**: Invalid states literally cannot exist - **Maintainability**: Eliminate defensive runtime checks - **Clarity**: Type declarations document valid states @@ -19,12 +20,14 @@ Benefits: ## When to Use **Use this pattern when:** + - Multiple fields have interdependent constraints - Boolean flags create ambiguous combinations - Validation logic is scattered across the codebase - Runtime errors indicate impossible states were reached **Indicators you need this:** + - Multiple booleans that shouldn't all be true/false together - Fields that are only valid in certain states - Comments explaining "if X then Y must be Z" @@ -54,17 +57,21 @@ Benefits: ### Common Transformations **Boolean pairs β†’ Enum/Union:** + - `{ loading: bool, error: bool, data: T }` β†’ Union of Loading | Error | Success states **Nullable with conditions β†’ Discriminated union:** + - `{ status: string, error?: string, result?: T }` β†’ Explicit state variants **Multiple modes β†’ Sum types:** + - `{ editMode: bool, viewMode: bool }` β†’ Single Mode enum ### Common Pitfalls ❌ **Avoid:** + - Representing states with multiple independent booleans - Using strings/numbers for states without type constraints - Nullable fields that are "required in some cases" @@ -72,6 +79,7 @@ Benefits: - Comments explaining when fields are valid βœ… **Do:** + - Use discriminated unions for mutually exclusive states - Make illegal combinations unrepresentable in types - Parse external data into constrained types at boundaries @@ -106,7 +114,7 @@ FUNCTION render(state: RemoteData): // - Failure without error message ``` -*Type system prevents invalid states. Compiler catches missing cases.* +_Type system prevents invalid states. Compiler catches missing cases._ ### ❌ Bad: Booleans allow invalid combinations @@ -120,10 +128,10 @@ STRUCTURE RemoteData: FUNCTION render(state: RemoteData): IF state.isLoading AND state.isError: // Can this happen? Should we show loading or error? - + IF state.data IS NOT null AND state.isError: // Have both data and error? Which to display? - + // Must defensively check all combinations // Still might miss edge cases @@ -132,25 +140,30 @@ FUNCTION render(state: RemoteData): // Invalid states we must handle: 12 ``` -*Most combinations are meaningless. Runtime checks scattered everywhere.* +_Most combinations are meaningless. Runtime checks scattered everywhere._ ## Type System Strategies ### Discriminated Unions (TypeScript/Flow) + Use a discriminant field to distinguish variants. The type system narrows based on discriminant checks. ### Algebraic Data Types (Rust/Haskell/Scala) + Sum types (enums with data) naturally prevent invalid combinations. ### Phantom Types (Advanced) + Use type parameters that exist only at compile time to track state. ### Builder Pattern with States + Encode construction order in types so incomplete objects can't exist. ## Enforcement **Code review checklist:** + - [ ] Are multiple booleans modeling a single state? - [ ] Are there nullable fields that are "sometimes required"? - [ ] Do comments explain field interdependencies? @@ -175,7 +188,6 @@ A: If you have runtime checks for "impossible" states, this prevents real bugs. **Q: How does this work in languages without algebraic types?** A: Use classes/interfaces with factory methods. Constructor is private; factories return only valid instances. - ## Related Patterns - **Parse, Don't Validate**: Transformation to validated types at boundaries diff --git a/skills/ps-local-reasoning/SKILL.md b/skills/ps-local-reasoning/SKILL.md index 4b2c989..29e1089 100644 --- a/skills/ps-local-reasoning/SKILL.md +++ b/skills/ps-local-reasoning/SKILL.md @@ -11,6 +11,7 @@ severity: WARN Code should be understandable by reading it in isolation, without jumping to other files or searching for hidden dependencies. Benefits: + - **Readability**: Understanding requires minimal context switching - **Maintainability**: Changes are predictable and safe - **Debugging**: Problem source is immediately visible @@ -19,12 +20,14 @@ Benefits: ## When to Use **Use this skill when:** + - Writing functions that depend on external data - Reviewing code with unclear data sources - Refactoring code with scattered dependencies - Debugging issues caused by hidden state **Indicators you need this:** + - Must trace through multiple files to understand a function - Function behavior changes based on global state - Dependencies are implicit (imported modules, globals) @@ -70,6 +73,7 @@ Benefits: ### Common Pitfalls ❌ **Avoid:** + - Reading from global state inside functions - Importing config deep in logic - Hidden dependencies through closures @@ -77,6 +81,7 @@ Benefits: - Accessing outer scope unnecessarily βœ… **Do:** + - Pass everything the function needs as parameters - Make external dependencies obvious - Keep function scope self-contained @@ -90,10 +95,10 @@ Benefits: ``` FUNCTION calculateDiscount(order, pricingRules, customerTier): basePrice = order.items.sum(item => item.price) - + tierMultiplier = pricingRules.getTierMultiplier(customerTier) discount = basePrice * tierMultiplier - + RETURN basePrice - discount // Everything needed is in parameters @@ -101,7 +106,7 @@ FUNCTION calculateDiscount(order, pricingRules, customerTier): // Easy to test - just pass values ``` -*Function signature reveals all dependencies. Fully understandable in isolation.* +_Function signature reveals all dependencies. Fully understandable in isolation._ ### ❌ Bad: Hidden dependencies @@ -111,11 +116,11 @@ IMPORT { pricingService } from "services" FUNCTION calculateDiscount(order): basePrice = order.items.sum(item => item.price) - + customerTier = getCurrentUser().tier // Where does this come from? tierMultiplier = pricingService.getTier(customerTier) // Global import discount = basePrice * tierMultiplier * config.discountFactor // Global config - + RETURN basePrice - discount // Must read 3+ other files to understand @@ -124,7 +129,7 @@ FUNCTION calculateDiscount(order): // config - what value is this? ``` -*Cannot understand without hunting through imports. Hidden dependencies make testing hard.* +_Cannot understand without hunting through imports. Hidden dependencies make testing hard._ ## Review Checklist @@ -145,6 +150,7 @@ Before accepting code, verify: ### Dependency Injection Instead of importing dependencies directly, accept them as parameters: + - Pass configuration objects as arguments - Inject services rather than importing singletons - Accept callbacks for side effects @@ -152,6 +158,7 @@ Instead of importing dependencies directly, accept them as parameters: ### Pure Function Default Make functions pure by default: + - All inputs via parameters - All outputs via return value - No side effects unless absolutely necessary @@ -159,6 +166,7 @@ Make functions pure by default: ### Explicit Context When context is needed, pass it explicitly: + - Request/response objects as parameters - User context as explicit argument - Transaction context passed down @@ -166,6 +174,7 @@ When context is needed, pass it explicitly: ### Colocation Keep related code together: + - Helper functions near usage - Type definitions with implementations - Validation with data structures @@ -173,18 +182,21 @@ Keep related code together: ## Language-Specific Patterns ### JavaScript/TypeScript + - Use function parameters over closure scope - Prefer dependency injection over module imports - Avoid ambient global access (process.env directly) - Use TypeScript interfaces to make dependencies explicit ### Python + - Use function arguments instead of module-level variables - Avoid mutable module state - Pass config objects explicitly - Use type hints to document dependencies ### Others + - Java: Constructor injection over field access - C#: Dependency injection containers - Go: Explicit context passing @@ -204,7 +216,6 @@ A: Start simple with explicit parameters. The principle is making dependencies v **Q: How do I handle ambient context like user authentication?** A: Pass user/session context explicitly through the call chain, or use a request context object. Don't rely on thread-local or global state. - ## Related Patterns - **Functional Core, Imperative Shell**: Pure core has explicit dependencies diff --git a/skills/ps-minimize-mutation/SKILL.md b/skills/ps-minimize-mutation/SKILL.md index dd30b4c..7a3686b 100644 --- a/skills/ps-minimize-mutation/SKILL.md +++ b/skills/ps-minimize-mutation/SKILL.md @@ -9,11 +9,13 @@ severity: WARN ## Principle Mutation should be: + - **Localized**: Confined to small, clear scopes - **Explicit**: Visible at call sites when mutation occurs - **Controlled**: Used only when beneficial for performance or clarity Benefits: + - **Predictability**: Easier to reason about code when values don't change unexpectedly - **Thread Safety**: Immutable data is inherently safe for concurrent access - **Debugging**: Fewer moving parts, simpler data flow @@ -22,12 +24,14 @@ Benefits: ## When to Use **Use this skill when:** + - Reviewing code with shared mutable state - Functions modify their arguments unexpectedly - Objects change state across multiple locations - Tracking down bugs related to unexpected data changes **Indicators you need this:** + - Functions with surprising side effects - Collections modified while being iterated - Object properties changed far from creation @@ -39,6 +43,7 @@ Benefits: ### Prefer Immutable Updates **Create new values instead of modifying existing ones:** + - Return new collections rather than mutating inputs - Use spread operators, Object.assign, or copy methods - Build new objects with updated properties @@ -46,6 +51,7 @@ Benefits: ### Localize Mutation When Necessary **When mutation is appropriate (performance, builder patterns):** + - Keep mutations within a single function scope - Don't expose mutable references outside the scope - Return immutable results to callers @@ -53,6 +59,7 @@ Benefits: ### Make Mutation Explicit **Signal mutation at function boundaries:** + - Name functions to indicate mutation (update, modify, add) - Document when parameters are mutated - Avoid mutating function parameters received from callers @@ -60,6 +67,7 @@ Benefits: ### Acceptable Mutation Contexts **Mutation is reasonable when:** + - Building large data structures incrementally (builders) - Performance-critical loops processing arrays - Local variables within a function (not escaping scope) @@ -67,6 +75,7 @@ Benefits: - Managing UI component state **Mutation should be avoided when:** + - Functions operate on shared data structures - Arguments could be reused by the caller - The mutation affects code outside current scope @@ -87,7 +96,7 @@ FUNCTION addItem(cart, newItem): FUNCTION removeItem(cart, itemId): updatedItems = cart.items.filter(item => item.id !== itemId) updatedTotal = updatedItems.sum(item => item.price) - + RETURN { ...cart, items: updatedItems, @@ -99,7 +108,7 @@ FUNCTION removeItem(cart, itemId): // Caller decides what to do with result ``` -*Functions create new values. Original data preserved. Safe for concurrent access.* +_Functions create new values. Original data preserved. Safe for concurrent access._ ### ❌ Bad: Mutation scattered @@ -112,7 +121,7 @@ FUNCTION addItem(cart, newItem): FUNCTION processOrder(cart): addItem(cart, { id: 1, price: 10 }) // Mutates cart addItem(cart, { id: 2, price: 20 }) // Mutates cart again - + // Later... is cart the original or modified? // Which functions modified it? // Hard to track changes @@ -122,30 +131,34 @@ FUNCTION processOrder(cart): // Hard to test or reason about ``` -*Functions modify shared state. Caller's data changed without clear signal. Race conditions possible.* +_Functions modify shared state. Caller's data changed without clear signal. Race conditions possible._ ## Common Patterns ### Immutable Collection Updates **Arrays:** + - Add: `[...array, newItem]` - Remove: `array.filter(item => item !== toRemove)` - Update: `array.map(item => item.id === id ? updated : item)` - Slice: `array.slice(start, end)` **Objects:** + - Add/Update property: `{...obj, key: newValue}` - Remove property: `{...obj}; delete result.key` or use destructuring - Nested update: `{...obj, nested: {...obj.nested, key: value}}` **Maps/Sets:** + - Create new instances with changes - Or use within a local scope and return frozen copy ### Builder Pattern for Complex Construction When building complex objects step-by-step: + - Use a builder class with mutation methods - Return immutable result from `.build()` method - Builder is local, result is shared immutably @@ -220,7 +233,6 @@ A: Use spreading at each level, or use libraries like Immer (JS), lenses (functi **Q: What about builder patterns?** A: Builders are fineβ€”they use mutation internally but expose an immutable interface. The builder itself is transient and local. - ## References - "Effective Java" (Joshua Bloch) - Item 17: Minimize Mutability diff --git a/skills/ps-naming-as-design/SKILL.md b/skills/ps-naming-as-design/SKILL.md index 764d852..87ce997 100644 --- a/skills/ps-naming-as-design/SKILL.md +++ b/skills/ps-naming-as-design/SKILL.md @@ -11,6 +11,7 @@ severity: WARN Names are design decisions made visible. Every name reveals what you understand about the problem domain, the role of each component, and the boundaries between concepts. **Core insight:** + - Good names make the design obvious - Bad names reveal confused thinking - When you can't name something clearly, the design needs work @@ -18,12 +19,14 @@ Names are design decisions made visible. Every name reveals what you understand ## When to Use **Use this pattern when:** + - Reviewing any new code or APIs - A function/variable is hard to name (signals design problem) - Names include "Manager", "Handler", "Data", "Info", "Process" - Names require explaining what they mean **Indicators you need this:** + - Comments explaining what variables mean - Generic names (data, item, value, result, temp) - Names mixing abstraction levels (getUserFromDBAndFormat) @@ -36,11 +39,13 @@ Names are design decisions made visible. Every name reveals what you understand Names should tell you **what** and **why**, not **how**. ❌ **Avoid implementation details:** + - `arrayOfUsers` β†’ data structure is obvious from usage - `getUserFromDatabase` β†’ storage mechanism is internal - `parseJSONAndValidate` β†’ combining unrelated concerns βœ… **Reveal intent and role:** + - `eligibleCandidates` β†’ tells you the selection criteria - `authenticatedUser` β†’ tells you the authorization state - `validatedOrder` β†’ tells you the business state @@ -50,6 +55,7 @@ Names should tell you **what** and **why**, not **how**. Good names encode business rules and constraints. **Examples of constraint-encoding names:** + - `NonEmptyString` vs `string` - makes null handling explicit - `PositiveAmount` vs `number` - encodes valid range - `AuthenticatedUser` vs `User` - encodes state requirement @@ -60,11 +66,13 @@ Good names encode business rules and constraints. If you can't name something without "And", "Or", "Manager", it's doing too much. ❌ **Signals mixed responsibilities:** + - `validateAndSave` β†’ two responsibilities - `UserManager` β†’ vague, does everything - `handleRequest` β†’ what aspect of requests? βœ… **Clear, focused names:** + - `validateOrder`, then `saveOrder` β†’ separate concerns - `UserAuthenticator`, `UserRepository` β†’ specific roles - `parseRequest`, `authorizeRequest` β†’ distinct operations @@ -74,11 +82,13 @@ If you can't name something without "And", "Or", "Manager", it's doing too much. Use terms from the problem domain, not computer science abstractions. ❌ **Generic programmer terms:** + - `UserFactory` β†’ pattern name, not domain concept - `DataAccessLayer` β†’ technical architecture term - `RequestProcessor` β†’ says nothing about business logic βœ… **Domain-specific language:** + - `RegistrationService` β†’ what business process? - `OrderRepository` β†’ what domain objects? - `PaymentValidator` β†’ what business rules? @@ -86,6 +96,7 @@ Use terms from the problem domain, not computer science abstractions. ## Common Pitfalls ❌ **Avoid:** + - Suffixes that hide poor design: Handler, Manager, Processor, Helper, Utility - Generic containers: data, info, result, response, object - Technical jargon when domain terms exist @@ -93,6 +104,7 @@ Use terms from the problem domain, not computer science abstractions. - Abbreviations that obscure meaning (usrMgr, procReq) βœ… **Do:** + - Use nouns for objects, verbs for functions - Make illegal states unrepresentable in names - Let the type system handle types (no `userArray`, just `users`) @@ -109,10 +121,10 @@ TYPE ValidatedEmail = string // passed validation FUNCTION calculateRefundAmount(order, returnedItems): // Name reveals purpose, not implementation - + FUNCTION authenticateUser(credentials): // Returns authenticated user or error - + FUNCTION eligibleForDiscount(customer, order): // Boolean reveals business rule @@ -124,20 +136,20 @@ CONST MAXIMUM_RETRY_ATTEMPTS = 3 // Clear from reading alone ``` -*Every name tells a story. Intent is obvious. Constraints explicit.* +_Every name tells a story. Intent is obvious. Constraints explicit._ ### ❌ Bad: Names hide intent ``` FUNCTION process(data): // What is being processed? How? - + FUNCTION handleUser(u): // Handle how? Create? Update? Delete? - + FUNCTION doStuff(x, y): // Completely opaque - + CONST VALUE = 50 // What does this limit? CONST LIMIT = 3 // Limit of what? @@ -150,26 +162,30 @@ VAR info = result.data // Intent buried in code ``` -*Names reveal nothing. Must read implementation. Maintenance nightmare.* +_Names reveal nothing. Must read implementation. Maintenance nightmare._ ## Naming Patterns ### Booleans and Predicates + - Prefix with `is`, `has`, `can`, `should`: `isValid`, `hasPermission` - Use positive names: `isEnabled` not `isNotDisabled` - Make the condition explicit: `isOverBudget` not `checkBudget` ### Collections + - Use plural nouns: `users`, `orders`, `transactions` - Name reveals filtering: `activeUsers`, `paidInvoices`, `recentPosts` - Name reveals relationship: `usersByEmail`, `ordersByDate` ### Functions + - Verbs for actions: `calculate`, `validate`, `transform` - Reveal what changes: `createUser`, `deleteOrder`, `updateInventory` - Query vs Command: `getUser` (query) vs `fetchUser` (command with effects) ### Constants + - ALL_CAPS for true constants: `MAX_RETRIES`, `DEFAULT_TIMEOUT` - Reveal the constraint: `MINIMUM_PASSWORD_LENGTH` not `PASSWORD_LIMIT` @@ -191,7 +207,6 @@ When reviewing code, verify naming decisions: **If function name includes "And" β†’ SUGGEST splitting into multiple functions** - ## Related Patterns - **Explicit State Invariants**: Names should reveal state requirements diff --git a/skills/ps-policy-mechanism-separation/SKILL.md b/skills/ps-policy-mechanism-separation/SKILL.md index d516641..7804c6e 100644 --- a/skills/ps-policy-mechanism-separation/SKILL.md +++ b/skills/ps-policy-mechanism-separation/SKILL.md @@ -9,10 +9,12 @@ severity: WARN ## Principle Separate what to do (policy) from how to do it (mechanism): + - **Policy**: Business rules, decisions, configuration, strategies - **Mechanism**: Reusable implementation, algorithms, infrastructure Benefits: + - **Flexibility**: Change decisions without touching implementation - **Reusability**: Same mechanism serves multiple policies - **Clarity**: Business rules explicit and discoverable @@ -21,12 +23,14 @@ Benefits: ## When to Use **Use this pattern when:** + - Business rules are hardcoded in loops or handlers - Same logic needs different behaviors in different contexts - Rules change frequently but implementation is stable - Configuration should be editable without code changes **Indicators you need this:** + - Magic numbers or strings scattered in code - Duplicated logic with slight variations - if/else chains encoding business rules @@ -62,12 +66,14 @@ Benefits: ### Common Pitfalls ❌ **Avoid:** + - Hardcoding business rules in loops - Mixing rule definitions with execution logic - Duplicating mechanisms to support different policies - Making policy changes require code modifications βœ… **Do:** + - Pass rules as parameters - Use configuration objects for policies - Keep mechanisms generic and reusable @@ -107,7 +113,7 @@ FUNCTION retryWithPolicy(operation, policy): RETURN result ``` -*Rules defined separately. Mechanism is reusable. Easy to test independently.* +_Rules defined separately. Mechanism is reusable. Easy to test independently._ ### ❌ Bad: Policy hardcoded in mechanism @@ -132,21 +138,22 @@ FUNCTION fetchWithRetry(url): // Changing rules requires code changes ``` -*Business rules scattered in code. Cannot reuse. Must modify code to change policy.* +_Business rules scattered in code. Cannot reuse. Must modify code to change policy._ ## Testing Strategy **Test mechanism:** + ``` TEST "executes correctly with policy A": // Arrange data = createTestData() policyA = createPolicyA() expectedA = createExpectedResultA() - + // Act result = mechanism(data, policyA) - + // Assert ASSERT result EQUALS expectedA @@ -155,20 +162,21 @@ TEST "executes correctly with policy B": data = createTestData() policyB = createPolicyB() expectedB = createExpectedResultB() - + // Act result = mechanism(data, policyB) - + // Assert ASSERT result EQUALS expectedB ``` **Test policy:** + ``` TEST "policy defines correct rules": // Arrange retryPolicy = createRetryPolicy() - + // Act & Assert ASSERT retryPolicy.maxAttempts EQUALS 3 ASSERT retryPolicy.backoffMs EQUALS 1000 @@ -177,6 +185,7 @@ TEST "policy defines correct rules": ## Enforcement **Code review checklist:** + - [ ] Are business rules defined separately from implementation? - [ ] Can rules change without modifying mechanism code? - [ ] Is the mechanism reusable with different policies? @@ -184,6 +193,7 @@ TEST "policy defines correct rules": - [ ] Could this logic serve other use cases with different rules? **Red flags:** + - Magic numbers in loops or conditionals - Similar functions differing only in constants - Comments explaining "business rule: X" @@ -192,6 +202,7 @@ TEST "policy defines correct rules": ## Practical Patterns ### Configuration Object Pattern + ``` // Policy const validationRules = { @@ -205,6 +216,7 @@ function validate(input, rules) { /* ... */ } ``` ### Strategy Pattern + ``` // Policies const strategies = { @@ -219,6 +231,7 @@ function apply(data, strategy) { ``` ### Rule Definition Pattern + ``` // Policy const discountRules = [ @@ -232,7 +245,6 @@ function applyRules(data, rules) { } ``` - ## Related Patterns - **Functional Core, Imperative Shell**: Policy in pure core, mechanism in shell diff --git a/skills/ps-single-direction-data-flow/SKILL.md b/skills/ps-single-direction-data-flow/SKILL.md index f200767..8623ec5 100644 --- a/skills/ps-single-direction-data-flow/SKILL.md +++ b/skills/ps-single-direction-data-flow/SKILL.md @@ -13,6 +13,7 @@ Data flows one way. No backchannels. No hidden feedback loops. No circular depen This is about **reasoning locality**: you should be able to trace data flow linearly without jumping through multiple files. Benefits: + - **Predictability**: Updates follow a single path - **Debuggability**: No hidden state mutations - **Maintainability**: Clear ownership boundaries @@ -20,12 +21,14 @@ Benefits: ## When to Use **Use this pattern when:** + - Designing state management architecture - Reviewing components that share state - Debugging "why did this re-render?" issues - Setting up reactive systems or event streams **Indicators you need this:** + - Race conditions from multiple writers - Difficulty tracing where data changes - Components falling out of sync @@ -36,6 +39,7 @@ Benefits: ### One Source of Truth Each piece of data has exactly one owner: + - Owner is the sole writer - Other code reads from the owner - Updates propagate from the owner downward @@ -43,6 +47,7 @@ Each piece of data has exactly one owner: ### Clear Ownership For every piece of state, know: + - **Who owns this data?** (Which component/module) - **Who may change it?** (Only the owner) - **How do others get changes?** (Subscriptions, props, parameters) @@ -50,6 +55,7 @@ For every piece of state, know: ### Updates Flow Down, Events Flow Up Establish clear communication paths: + - **Parent owns state**: Single source of truth - **Children receive state**: Via props/parameters - **Children send events up**: Actions, callbacks, events @@ -58,12 +64,14 @@ Establish clear communication paths: ### Common Pitfalls ❌ **Avoid:** + - Multiple components writing to shared state - Children mutating parent state directly - Circular data dependencies - Backchannel updates through side effects βœ… **Do:** + - Single owner per state value - Explicit update paths - Events for upward communication @@ -77,10 +85,10 @@ Establish clear communication paths: // PARENT - owns state COMPONENT ShoppingCart: STATE items = [] - + FUNCTION addItem(item): this.items = [...this.items, item] - + FUNCTION render(): RETURN CartView( items: this.items, @@ -99,7 +107,7 @@ COMPONENT CartView(items, onAddItem): // Single source of truth: parent owns items ``` -*Clear ownership. Data flows one way. Easy to trace updates.* +_Clear ownership. Data flows one way. Easy to trace updates._ ### ❌ Bad: Bidirectional updates @@ -110,7 +118,7 @@ COMPONENT CartView: FUNCTION addItem(item): sharedCart.items.push(item) // Direct mutation notifyOtherComponents() // Manual sync - + FUNCTION render(): DISPLAY sharedCart.items @@ -118,7 +126,7 @@ COMPONENT CartSummary: FUNCTION removeItem(itemId): sharedCart.items = sharedCart.items.filter(...) updateCartView() // Backchannel update - + FUNCTION render(): DISPLAY sharedCart.items.length @@ -128,7 +136,7 @@ COMPONENT CartSummary: // Synchronization nightmare ``` -*Shared mutable state. Multiple writers. Updates flow in all directions. Debugging nightmare.* +_Shared mutable state. Multiple writers. Updates flow in all directions. Debugging nightmare._ ## AI Review Checklist @@ -162,7 +170,6 @@ A: Derive from source of truth. Don't store redundant state. **Q: How do I handle circular dependencies in legacy code?** A: Refactor to extract shared logic to a parent module. Break the cycle. - ## Related Patterns - **Functional Core, Imperative Shell**: Pure core benefits from single-direction flow