diff --git a/amplifier-bundle/tools/amplihack/hooks/dev_intent_router.py b/.claude/tools/amplihack/hooks/dev_intent_router.py similarity index 100% rename from amplifier-bundle/tools/amplihack/hooks/dev_intent_router.py rename to .claude/tools/amplihack/hooks/dev_intent_router.py diff --git a/amplifier-bundle/tools/amplihack/hooks/templates/routing_prompt.txt b/.claude/tools/amplihack/hooks/templates/routing_prompt.txt similarity index 100% rename from amplifier-bundle/tools/amplihack/hooks/templates/routing_prompt.txt rename to .claude/tools/amplihack/hooks/templates/routing_prompt.txt diff --git a/amplifier-bundle/tools/amplihack/hooks/tests/test_dev_intent_router.py b/.claude/tools/amplihack/hooks/tests/test_dev_intent_router.py similarity index 100% rename from amplifier-bundle/tools/amplihack/hooks/tests/test_dev_intent_router.py rename to .claude/tools/amplihack/hooks/tests/test_dev_intent_router.py diff --git a/amplifier-bundle/tools/amplihack/hooks/tests/test_pre_tool_use_cwd_protection.py b/.claude/tools/amplihack/hooks/tests/test_pre_tool_use_cwd_protection.py similarity index 100% rename from amplifier-bundle/tools/amplihack/hooks/tests/test_pre_tool_use_cwd_protection.py rename to .claude/tools/amplihack/hooks/tests/test_pre_tool_use_cwd_protection.py diff --git a/.github/workflows/drift-detection.yml b/.github/workflows/drift-detection.yml index c2a6965b1..8c4f8b506 100644 --- a/.github/workflows/drift-detection.yml +++ b/.github/workflows/drift-detection.yml @@ -11,7 +11,7 @@ concurrency: jobs: check-drift: - name: Check skill/agent drift + name: Check skill/agent/hooks drift runs-on: ubuntu-latest timeout-minutes: 5 diff --git a/Makefile b/Makefile index 07792714f..ba8259263 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # Makefile for Scenarios Directory Pattern Tools # Provides easy access to production-ready scenario tools -.PHONY: help analyze-codebase scenarios-help list-scenarios docs-serve docs-build docs-deploy +.PHONY: help analyze-codebase scenarios-help list-scenarios docs-serve docs-build docs-deploy check-drift verify-hooks-symlink # Default target - show help help: @@ -158,3 +158,26 @@ docs-deploy: @echo "🚀 Deploying documentation to GitHub Pages..." @mkdocs gh-deploy --force @echo "✅ Documentation deployed successfully" + +# Drift Detection +# =============== + +# Run drift detection for skills, agents, and hooks +check-drift: + @python scripts/check_drift.py + +# Verify hooks symlink is intact +verify-hooks-symlink: + @if [ -L "amplifier-bundle/tools/amplihack/hooks" ]; then \ + target=$$(readlink amplifier-bundle/tools/amplihack/hooks); \ + if [ "$$target" = "../../../.claude/tools/amplihack/hooks" ]; then \ + echo "OK: hooks symlink is correct"; \ + else \ + echo "ERROR: hooks symlink points to $$target (expected ../../../.claude/tools/amplihack/hooks)"; \ + exit 1; \ + fi; \ + else \ + echo "ERROR: amplifier-bundle/tools/amplihack/hooks is not a symlink"; \ + echo "Fix: rm -rf amplifier-bundle/tools/amplihack/hooks && ln -s ../../../.claude/tools/amplihack/hooks amplifier-bundle/tools/amplihack/hooks"; \ + exit 1; \ + fi diff --git a/amplifier-bundle/tools/amplihack/hooks b/amplifier-bundle/tools/amplihack/hooks new file mode 120000 index 000000000..4ab95e9d9 --- /dev/null +++ b/amplifier-bundle/tools/amplihack/hooks @@ -0,0 +1 @@ +../../../.claude/tools/amplihack/hooks \ No newline at end of file diff --git a/amplifier-bundle/tools/amplihack/hooks/README.md b/amplifier-bundle/tools/amplihack/hooks/README.md deleted file mode 100644 index 738a5612d..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/README.md +++ /dev/null @@ -1,254 +0,0 @@ -# Claude Code Hook System - -This directory contains the hook system for Claude Code, which allows for customization and monitoring of the Claude Code runtime environment. - -## Overview - -The hook system uses a **unified HookProcessor** base class that provides common functionality for all hooks, reducing code duplication and improving maintainability. - -## Hook Files - -### Core Infrastructure - -- **`hook_processor.py`** - Base class providing common functionality for all hooks - - JSON input/output handling - - Logging to `~/.amplihack/.claude/runtime/logs/` - - Metrics collection - - Error handling and graceful fallback - - Session data management - -### Active Hooks (Configured in .claude/settings.json) - -- **`session_start.py`** - Runs when a Claude Code session starts - - Adds project context to the conversation - - Reads and applies user preferences from USER_PREFERENCES.md - - Logs session start metrics - -- **`stop.py`** - Runs when a session ends - - Checks for lock flag (`~/.amplihack/.claude/tools/amplihack/.lock_active`) - - Blocks stop if continuous work mode is enabled (lock active) - - Logs stop attempts and lock status - -- **`post_tool_use.py`** - Runs after each tool use - - Tracks tool usage metrics - - Validates tool execution results - - Categorizes tool types for analytics - -- **`pre_compact.py`** - Runs before context compaction - - Manages context and prepares for compaction - - Logs pre-compact events - -- **`pre_tool_use.py`** - Runs before each tool use (Bash only) - - **CWD deletion protection**: blocks `rm -rf` / `rmdir` on the current working directory or any parent - - **CWD rename/move protection**: blocks `mv` commands that would rename the CWD or any parent (prevents session crash from invalid CWD) - - **Main branch protection**: blocks `git commit` directly to `main` or `master` - - **No-verify bypass protection**: blocks `git commit --no-verify` and `git push --no-verify` - -## Architecture - -``` -┌─────────────────┐ -│ Claude Code │ -└────────┬────────┘ - │ JSON input - ▼ -┌─────────────────┐ -│ Hook Script │ -├─────────────────┤ -│ HookProcessor │ ◄── Base class -│ - read_input │ -│ - process │ ◄── Implemented by subclass -│ - write_output│ -│ - logging │ -│ - metrics │ -└────────┬────────┘ - │ JSON output - ▼ -┌─────────────────┐ -│ Claude Code │ -└─────────────────┘ -``` - -## Creating a New Hook - -To create a new hook, extend the `HookProcessor` class: - -```python -#!/usr/bin/env python3 -"""Your hook description.""" - -from typing import Any, Dict -import sys -from pathlib import Path -sys.path.insert(0, str(Path(__file__).parent)) -from hook_processor import HookProcessor - - -class YourHook(HookProcessor): - """Your hook processor.""" - - def __init__(self): - super().__init__("your_hook_name") - - def process(self, input_data: Dict[str, Any]) -> Dict[str, Any]: - """Process the hook input. - - Args: - input_data: Input from Claude Code - - Returns: - Output to return to Claude Code - """ - # Your processing logic here - self.log("Processing something") - self.save_metric("metric_name", value) - - return {"result": "success"} - - -def main(): - """Entry point.""" - hook = YourHook() - hook.run() - - -if __name__ == "__main__": - main() -``` - -## Data Storage - -The hook system creates and manages several directories: - -``` -.claude/runtime/ -├── logs/ # Log files for each hook -│ ├── session_start.log -│ ├── stop.log -│ └── post_tool_use.log -├── metrics/ # Metrics in JSONL format -│ ├── session_start_metrics.jsonl -│ ├── stop_metrics.jsonl -│ └── post_tool_use_metrics.jsonl -└── analysis/ # Session analysis files - └── session_YYYYMMDD_HHMMSS.json -``` - -## Testing - -Run tests to verify the hook system: - -```bash -# Unit tests for HookProcessor -python -m pytest test_hook_processor.py -v - -# Integration tests for all hooks -python test_integration.py - -# Test Azure continuation hook -python test_stop_azure_continuation.py - -# Test individual hooks manually -echo '{"prompt": "test"}' | python session_start.py -``` - -## Metrics Collected - -### session_start - -- `prompt_length` - Length of the initial prompt - -### stop - -- `lock_blocks` - Count of stop attempts blocked by lock flag - -### post_tool_use - -- `tool_usage` - Name of tool used (with optional duration) -- `bash_commands` - Count of Bash executions -- `file_operations` - Count of file operations (Read/Write/Edit) -- `search_operations` - Count of search operations (Grep/Glob) - -## Error Handling - -All hooks implement graceful error handling: - -1. **Invalid JSON input** - Returns error message in output -2. **Processing exceptions** - Logs error, returns empty dict -3. **File system errors** - Logs warning, continues operation -4. **Missing fields** - Uses defaults, continues processing - -This ensures that hook failures never break the Claude Code chain. - -## Hook Configuration - -Hooks are configured in `~/.amplihack/.claude/settings.json`: - -```json -{ - "hooks": { - "SessionStart": [ - { - "hooks": [ - { - "type": "command", - "command": "$CLAUDE_PROJECT_DIR/.claude/tools/amplihack/hooks/session_start.py" - } - ] - } - ], - "Stop": [ - { - "hooks": [ - { - "type": "command", - "command": "$CLAUDE_PROJECT_DIR/.claude/tools/amplihack/hooks/stop.py" - } - ] - } - ], - "PostToolUse": [ - { - "matcher": "*", - "hooks": [ - { - "type": "command", - "command": "$CLAUDE_PROJECT_DIR/.claude/tools/amplihack/hooks/post_tool_use.py" - } - ] - } - ], - "PreCompact": [ - { - "hooks": [ - { - "type": "command", - "command": "$CLAUDE_PROJECT_DIR/.claude/tools/amplihack/hooks/pre_compact.py" - } - ] - } - ] - } -} -``` - -## Benefits of Unified Processor - -1. **Reduced Code Duplication** - Common functionality in one place -2. **Consistent Error Handling** - All hooks handle errors the same way -3. **Unified Logging** - Standardized logging across all hooks -4. **Easier Testing** - Base functionality tested once -5. **Simplified Maintenance** - Fix bugs in one place -6. **Better Metrics** - Consistent metric collection -7. **Easier Extension** - Simple to add new hooks - -## Continuous Work Mode (Lock System) - -The stop hook supports continuous work mode via a lock flag: - -- **Lock file**: `~/.amplihack/.claude/tools/amplihack/.lock_active` -- **Enable**: Use `/amplihack:lock` slash command -- **Disable**: Use `/amplihack:unlock` slash command -- **Behavior**: When locked, Claude continues working through all TODOs without stopping - -This enables autonomous operation for complex multi-step tasks. diff --git a/amplifier-bundle/tools/amplihack/hooks/USER_PROMPT_SUBMIT_README.md b/amplifier-bundle/tools/amplihack/hooks/USER_PROMPT_SUBMIT_README.md deleted file mode 100644 index dcbe766a7..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/USER_PROMPT_SUBMIT_README.md +++ /dev/null @@ -1,259 +0,0 @@ -# UserPromptSubmit Hook - -## Overview - -The UserPromptSubmit hook injects user preferences into context on **every user message** to ensure consistent preference application across all conversation turns in REPL mode. - -## Purpose - -In Claude Code's REPL mode, user preferences set at session start can be "forgotten" as the conversation progresses and context is pruned. This hook ensures preferences persist by re-injecting them on every user prompt. - -## Implementation Details - -### File Location - -``` -.claude/tools/amplihack/hooks/user_prompt_submit.py -``` - -### Hook Type - -`UserPromptSubmit` - Triggered before processing each user message - -### Input Format - -```json -{ - "session_id": "string", - "transcript_path": "path", - "cwd": "path", - "hook_event_name": "UserPromptSubmit", - "prompt": "user's prompt text" -} -``` - -### Output Format - -```json -{ - "additionalContext": "preference enforcement text" -} -``` - -### Preference Context Example - -``` -🎯 ACTIVE USER PREFERENCES (MANDATORY): -• Communication Style: pirate (Always talk like a pirate) - Use this style in your response -• Verbosity: balanced - Match this detail level -• Collaboration Style: interactive - Follow this approach -• Update Frequency: regular - Provide updates at this frequency -• Priority Type: balanced - Consider this priority in decisions -• Yes (see USER_PREFERENCES.md) - -These preferences MUST be applied to this response. -``` - -## Features - -### 1. Preference File Resolution - -The hook uses a multi-strategy approach to find USER_PREFERENCES.md: - -1. **FrameworkPathResolver** (UVX and installed package support) -2. **Project root** (~/.amplihack/.claude/context/USER_PREFERENCES.md) -3. **Package location** (src/amplihack/.claude/context/USER_PREFERENCES.md) - -### 2. Preference Extraction - -Extracts key preferences using regex patterns: - -- Communication Style -- Verbosity -- Collaboration Style -- Update Frequency -- Priority Type -- Preferred Languages -- Coding Standards -- Workflow Preferences -- Learned Patterns (detected if present) - -### 3. Performance Optimization - -**Caching Strategy**: Preferences are cached in memory with file modification time tracking. Cache is invalidated only when the file changes. - -**Performance Metrics**: - -- Average execution time: ~116ms (including Python startup) -- Cached reads: < 1ms -- Target: < 200ms (achieved) - -### 4. Error Handling - -**Graceful Degradation**: - -- Missing preferences file: Returns empty context, exits 0 -- File read error: Logs warning, returns empty context, exits 0 -- Parse error: Best-effort parsing, returns available preferences -- **Never blocks Claude** - always exits with code 0 - -### 5. Logging and Metrics - -**Log File**: `~/.amplihack/.claude/runtime/logs/user_prompt_submit.log` - -**Metrics File**: `~/.amplihack/.claude/runtime/metrics/user_prompt_submit_metrics.jsonl` - -**Tracked Metrics**: - -- `preferences_injected`: Number of preferences injected -- `context_length`: Character count of generated context - -## Testing - -### Run Test Suite - -```bash -python3 .claude/tools/amplihack/hooks/test_user_prompt_submit.py -``` - -### Test Coverage - -- ✓ Basic functionality -- ✓ Preference extraction -- ✓ Context building -- ✓ Empty preferences handling -- ✓ Caching behavior -- ✓ JSON output format -- ✓ Performance benchmarks -- ✓ Error handling - -### Manual Testing - -```bash -# Test with sample input -echo '{"session_id": "test", "transcript_path": "/tmp/test", "cwd": "'$(pwd)'", "hook_event_name": "UserPromptSubmit", "prompt": "test"}' | python3 .claude/tools/amplihack/hooks/user_prompt_submit.py - -# Test performance -time echo '{"session_id": "test", "transcript_path": "/tmp/test", "cwd": "'$(pwd)'", "hook_event_name": "UserPromptSubmit", "prompt": "test"}' | python3 .claude/tools/amplihack/hooks/user_prompt_submit.py > /dev/null -``` - -## Architecture - -### Class Hierarchy - -``` -HookProcessor (base class) - └── UserPromptSubmitHook - ├── find_user_preferences() -> Optional[Path] - ├── extract_preferences(content: str) -> Dict[str, str] - ├── build_preference_context(preferences: Dict) -> str - ├── get_cached_preferences(pref_file: Path) -> Dict[str, str] - └── process(input_data: Dict) -> Dict -``` - -### Key Design Decisions - -1. **Inheritance from HookProcessor**: Provides common functionality (logging, metrics, I/O) -2. **Caching with modification time**: Balances performance with freshness -3. **Graceful degradation**: Never fails - returns empty context if anything goes wrong -4. **Priority-ordered display**: Most impactful preferences shown first -5. **Concise enforcement**: Brief but clear instructions for Claude - -## Integration with Session Start Hook - -**Complementary Design**: - -- **session_start.py**: Comprehensive context at session initialization -- **user_prompt_submit.py**: Lightweight preference reminders on every message - -**Context Differences**: - -- Session start: Full context with project info, workflow, discoveries -- User prompt submit: Only preference enforcement (concise) - -## Troubleshooting - -### Issue: Preferences not being injected - -**Solution**: Check log file to see if preferences file was found: - -```bash -tail -f .claude/runtime/logs/user_prompt_submit.log -``` - -### Issue: Hook is too slow - -**Solution**: Check if caching is working: - -```bash -# Look for cache hits in logs -grep "Injected.*preferences" .claude/runtime/logs/user_prompt_submit.log -``` - -### Issue: Wrong preferences being used - -**Solution**: Verify which preferences file is being used: - -```python -from amplihack.utils.paths import FrameworkPathResolver -print(FrameworkPathResolver.resolve_preferences_file()) -``` - -### Issue: Hook not being called - -**Solution**: Verify hook is registered with Claude Code and executable: - -```bash -ls -l .claude/tools/amplihack/hooks/user_prompt_submit.py -# Should show executable bit: -rwxr-xr-x -``` - -## Performance Analysis - -### Baseline Metrics (5 runs) - -- Average: 116.2ms -- Min: 76.7ms -- Max: 153.1ms - -### Performance Breakdown - -- Python startup: ~50-70ms -- File I/O (first run): ~30-40ms -- Parsing and processing: ~10-20ms -- Cached runs: < 1ms (negligible) - -### Optimization Notes - -- Python startup overhead is unavoidable in subprocess execution -- Caching provides near-instant repeated access -- Performance is acceptable for REPL usage (< 200ms target) - -## Future Enhancements - -### Potential Improvements - -1. **Selective injection**: Only inject preferences relevant to the prompt -2. **Context compression**: Further reduce injected text for efficiency -3. **Preference priorities**: Weight preferences based on prompt context -4. **User-specific caching**: Per-user cache for multi-user environments - -### Not Recommended - -1. **Pre-compiled Python**: Marginal gains, added complexity -2. **Background daemon**: Overkill for simple preference injection -3. **Binary rewrite**: Python is fast enough for this use case - -## Related Files - -- **Base class**: `~/.amplihack/.claude/tools/amplihack/hooks/hook_processor.py` -- **Session start**: `~/.amplihack/.claude/tools/amplihack/hooks/session_start.py` -- **Path resolution**: `src/amplihack/utils/paths.py` -- **Preferences file**: `~/.amplihack/.claude/context/USER_PREFERENCES.md` - -## References - -- Claude Code Hook System: [Official Documentation] -- Amplihack Philosophy: `~/.amplihack/.claude/context/PHILOSOPHY.md` -- User Preferences Guide: `~/.amplihack/.claude/context/USER_PREFERENCES.md` -- Priority Hierarchy: `~/.amplihack/.claude/context/USER_REQUIREMENT_PRIORITY.md` diff --git a/amplifier-bundle/tools/amplihack/hooks/agent_memory_hook.py b/amplifier-bundle/tools/amplihack/hooks/agent_memory_hook.py deleted file mode 100755 index d1018bc4d..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/agent_memory_hook.py +++ /dev/null @@ -1,466 +0,0 @@ -#!/usr/bin/env python3 -"""Shared logic for integrating memory system with agent execution. - -This module provides utilities for: -1. Detecting agent references in prompts (@.claude/agents/*.md) -2. Injecting relevant memory context before agent execution -3. Extracting and storing learnings after agent execution - -Integration Points: -- user_prompt_submit: Inject memory context when agent detected -- stop: Extract learnings from conversation after agent execution - -Uses MemoryCoordinator for storage (SQLite or Neo4j backend). -""" - -import logging -import re -import sys -from pathlib import Path -from typing import Any - -# Setup path for imports -sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent / "src")) - -logger = logging.getLogger(__name__) - - -# Agent reference patterns -AGENT_REFERENCE_PATTERNS = [ - r"@\.claude/agents/amplihack/[^/]+/([^/]+)\.md", # @.claude/agents/amplihack/core/architect.md - r"@\.claude/agents/([^/]+)\.md", # @.claude/agents/architect.md - r"Include\s+@\.claude/agents/[^/]+/([^/]+)\.md", # Include @.claude/agents/... - r"Use\s+([a-z-]+)\.md\s+agent", # Use architect.md agent - r"/([a-z-]+)\s", # Slash commands that invoke agents (e.g., /ultrathink, /fix) -] - -# Map slash commands to agent types -SLASH_COMMAND_AGENTS = { - "ultrathink": "orchestrator", - "fix": "fix-agent", - "analyze": "analyzer", - "improve": "reviewer", - "socratic": "ambiguity", - "debate": "multi-agent-debate", - "reflect": "reflection", - "xpia": "xpia-defense", -} - - -def detect_agent_references(prompt: str) -> list[str]: - """Detect agent references in a prompt. - - Args: - prompt: The user prompt to analyze - - Returns: - List of agent type names detected (e.g., ["architect", "builder"]) - """ - agents = set() - - # Check each pattern - for pattern in AGENT_REFERENCE_PATTERNS: - matches = re.finditer(pattern, prompt, re.IGNORECASE) - for match in matches: - agent_name = match.group(1).lower() - # Normalize agent names - agent_name = agent_name.replace("_", "-") - agents.add(agent_name) - - return list(agents) - - -def detect_slash_command_agent(prompt: str) -> str | None: - """Detect if prompt starts with a slash command that invokes an agent. - - Args: - prompt: The user prompt to analyze - - Returns: - Agent type name if slash command detected, None otherwise - """ - # Check if prompt starts with a slash command - prompt_clean = prompt.strip() - if not prompt_clean.startswith("/"): - return None - - # Extract command name - match = re.match(r"^/([a-z-]+)", prompt_clean) - if not match: - return None - - command = match.group(1) - return SLASH_COMMAND_AGENTS.get(command) - - -async def inject_memory_for_agents( - prompt: str, agent_types: list[str], session_id: str | None = None -) -> tuple[str, dict[str, Any]]: - """Inject memory context for detected agents into prompt. - - Args: - prompt: Original user prompt - agent_types: List of agent types detected - session_id: Optional session ID for logging - - Returns: - Tuple of (enhanced_prompt, metadata_dict) - """ - if not agent_types: - return prompt, {} - - try: - # Import memory coordinator (lazy import to avoid startup overhead) - from amplihack.memory.coordinator import MemoryCoordinator, RetrievalQuery - from amplihack.memory.types import MemoryType - - # Initialize coordinator with session_id - coordinator = MemoryCoordinator(session_id=session_id or "hook_session") - - # Inject memory for each agent type - memory_sections = [] - metadata = {"agents": agent_types, "memories_injected": 0, "memory_available": True} - - for agent_type in agent_types: - # Normalize agent type (lowercase, replace spaces with hyphens) - normalized_type = agent_type.lower().replace(" ", "-") - - # Get memory context for this agent - try: - # Retrieve relevant memories using query - query_text = prompt[:500] # Use first 500 chars as query - - # Build retrieval query with comprehensive context - query = RetrievalQuery( - query_text=query_text, - token_budget=2000, - memory_types=[MemoryType.EPISODIC, MemoryType.SEMANTIC, MemoryType.PROCEDURAL], - ) - - memories = await coordinator.retrieve(query) - - if memories: - # Format memories for injection - memory_lines = [f"\n## Memory for {normalized_type} Agent\n"] - for mem in memories: - memory_lines.append(f"- {mem.content} (relevance: {mem.score:.2f})") - - memory_sections.append("\n".join(memory_lines)) - metadata["memories_injected"] += len(memories) - - except Exception as e: - logger.warning(f"Failed to inject memory for {normalized_type}: {e}") - continue - - # Build enhanced prompt - if memory_sections: - enhanced_prompt = "\n".join(memory_sections) + "\n\n---\n\n" + prompt - return enhanced_prompt, metadata - - return prompt, metadata - - except ImportError as e: - logger.warning(f"Memory system not available: {e}") - return prompt, {"memory_available": False, "error": "import_failed"} - - except Exception as e: - logger.error(f"Failed to inject memory: {e}") - return prompt, {"memory_available": False, "error": str(e)} - - -async def extract_learnings_from_conversation( - conversation_text: str, agent_types: list[str], session_id: str | None = None -) -> dict[str, Any]: - """Extract and store learnings from conversation after agent execution. - - Args: - conversation_text: Full conversation text (including agent responses) - agent_types: List of agent types that were involved - session_id: Optional session ID for tracking - - Returns: - Metadata about learnings stored - """ - if not agent_types: - return {"learnings_stored": 0, "agents": []} - - try: - # Import memory coordinator (lazy import) - from amplihack.memory.coordinator import MemoryCoordinator, StorageRequest - from amplihack.memory.types import MemoryType - - # Initialize coordinator with session_id - coordinator = MemoryCoordinator(session_id=session_id or "hook_session") - - # Extract and store learnings for each agent - metadata = { - "agents": agent_types, - "learnings_stored": 0, - "memory_available": True, - "memory_ids": [], - } - - for agent_type in agent_types: - # Normalize agent type (lowercase, replace spaces with hyphens) - normalized_type = agent_type.lower().replace(" ", "-") - - try: - # Store learning as SEMANTIC memory (reusable knowledge) - # Extract key learnings from conversation text (simplified extraction) - # In production, you might want more sophisticated extraction - learning_content = f"Agent {normalized_type}: {conversation_text[:500]}" - - # Build storage request with context and metadata - request = StorageRequest( - content=learning_content, - memory_type=MemoryType.SEMANTIC, - context={"agent_type": normalized_type}, - metadata={ - "tags": ["learning", "conversation"], - "task": "Conversation with user", - "success": True, - }, - ) - - memory_id = await coordinator.store(request) - - if memory_id: - metadata["learnings_stored"] += 1 - metadata["memory_ids"].append(memory_id) - logger.info(f"Stored 1 learning from {normalized_type} conversation") - - except Exception as e: - logger.warning(f"Failed to extract learnings for {normalized_type}: {e}") - continue - - return metadata - - except ImportError as e: - logger.warning(f"Memory system not available: {e}") - return {"memory_available": False, "error": "import_failed"} - - except Exception as e: - logger.error(f"Failed to extract learnings: {e}") - return {"memory_available": False, "error": str(e)} - - -def format_memory_injection_notice(metadata: dict[str, Any]) -> str: - """Format a notice about memory injection for logging/display. - - Args: - metadata: Metadata from inject_memory_for_agents - - Returns: - Formatted notice string - """ - if not metadata.get("memory_available"): - return "" - - agents = metadata.get("agents", []) - count = metadata.get("memories_injected", 0) - - if count > 0: - agent_list = ", ".join(agents) - return f"🧠 Injected {count} relevant memories for agents: {agent_list}" - - return "" - - -def format_learning_extraction_notice(metadata: dict[str, Any]) -> str: - """Format a notice about learning extraction for logging/display. - - Args: - metadata: Metadata from extract_learnings_from_conversation - - Returns: - Formatted notice string - """ - if not metadata.get("memory_available"): - return "" - - count = metadata.get("learnings_stored", 0) - - if count > 0: - agents = metadata.get("agents", []) - agent_list = ", ".join(agents) - return f"🧠 Stored {count} new learnings from agents: {agent_list}" - - return "" - - -# ============================================================================ -# SYNC WRAPPERS - Solution for Issue #1960 -# ============================================================================ -# These sync wrapper functions safely handle async functions in synchronous -# contexts (like hooks). They handle three critical edge cases: -# 1. No event loop exists (create new loop) -# 2. Event loop already running (use thread to avoid nested loop) -# 3. Import errors or exceptions (fail-open gracefully) - - -def inject_memory_for_agents_sync( - prompt: str, agent_types: list[str], session_id: str | None = None -) -> tuple[str, dict[str, Any]]: - """Synchronous wrapper for inject_memory_for_agents. - - Safely calls async inject_memory_for_agents from synchronous context. - Handles three edge cases: - 1. No event loop - creates new loop - 2. Running event loop - uses thread to avoid nesting - 3. Errors - fails open (returns original prompt) - - Args: - prompt: Original user prompt - agent_types: List of agent types detected - session_id: Optional session ID for logging - - Returns: - Tuple of (enhanced_prompt, metadata_dict) - """ - # Handle empty agent_types early - if not agent_types: - return prompt, {} - - try: - import asyncio - - # Try to get running loop - try: - loop = asyncio.get_running_loop() - # Loop is running - must use thread to avoid nested loop error - import threading - - result = [None] - error = [None] - - def run_in_thread(): - try: - # Create new loop in thread - new_loop = asyncio.new_event_loop() - asyncio.set_event_loop(new_loop) - try: - result[0] = new_loop.run_until_complete( - inject_memory_for_agents(prompt, agent_types, session_id) - ) - finally: - new_loop.close() - except Exception as e: - error[0] = e - - thread = threading.Thread(target=run_in_thread) - thread.start() - thread.join(timeout=30) # 30 second timeout - - if error[0]: - raise error[0] - - if result[0]: - return result[0] - # Timeout or no result - logger.warning("Memory injection timed out in thread") - return prompt, {"memory_available": False, "error": "timeout"} - - except RuntimeError: - # No running loop - safe to create one - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - try: - result = loop.run_until_complete( - inject_memory_for_agents(prompt, agent_types, session_id) - ) - return result - finally: - loop.close() - - except ImportError as e: - logger.warning(f"Memory system not available: {e}") - return prompt, {"memory_available": False, "error": "import_failed"} - - except Exception as e: - logger.error(f"Failed to inject memory (sync wrapper): {e}") - return prompt, {"memory_available": False, "error": str(e)} - - -def extract_learnings_from_conversation_sync( - conversation_text: str, agent_types: list[str], session_id: str | None = None -) -> dict[str, Any]: - """Synchronous wrapper for extract_learnings_from_conversation. - - Safely calls async extract_learnings_from_conversation from synchronous context. - Handles three edge cases: - 1. No event loop - creates new loop - 2. Running event loop - uses thread to avoid nesting - 3. Errors - fails open (returns minimal metadata) - - Args: - conversation_text: Full conversation text - agent_types: List of agent types involved - session_id: Optional session ID for tracking - - Returns: - Metadata about learnings stored - """ - # Handle empty agent_types early - if not agent_types: - return {"learnings_stored": 0, "agents": []} - - try: - import asyncio - - # Try to get running loop - try: - loop = asyncio.get_running_loop() - # Loop is running - must use thread to avoid nested loop error - import threading - - result = [None] - error = [None] - - def run_in_thread(): - try: - # Create new loop in thread - new_loop = asyncio.new_event_loop() - asyncio.set_event_loop(new_loop) - try: - result[0] = new_loop.run_until_complete( - extract_learnings_from_conversation( - conversation_text, agent_types, session_id - ) - ) - finally: - new_loop.close() - except Exception as e: - error[0] = e - - thread = threading.Thread(target=run_in_thread) - thread.start() - thread.join(timeout=30) # 30 second timeout - - if error[0]: - raise error[0] - - if result[0]: - return result[0] - # Timeout or no result - logger.warning("Learning extraction timed out in thread") - return {"memory_available": False, "error": "timeout", "learnings_stored": 0} - - except RuntimeError: - # No running loop - safe to create one - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - try: - result = loop.run_until_complete( - extract_learnings_from_conversation(conversation_text, agent_types, session_id) - ) - return result - finally: - loop.close() - - except ImportError as e: - logger.warning(f"Memory system not available: {e}") - return {"memory_available": False, "error": "import_failed", "learnings_stored": 0} - - except Exception as e: - logger.error(f"Failed to extract learnings (sync wrapper): {e}") - return {"memory_available": False, "error": str(e), "learnings_stored": 0} diff --git a/amplifier-bundle/tools/amplihack/hooks/claude_power_steering.py b/amplifier-bundle/tools/amplihack/hooks/claude_power_steering.py deleted file mode 100755 index 5c5eb8b72..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/claude_power_steering.py +++ /dev/null @@ -1,1198 +0,0 @@ -#!/usr/bin/env python3 -""" -Claude SDK-based power-steering analysis with graceful shutdown support. - -Uses Claude Agent SDK to intelligently analyze session transcripts against -considerations, replacing heuristic pattern matching with AI-powered analysis. - -Shutdown Behavior: - During application shutdown (AMPLIHACK_SHUTDOWN_IN_PROGRESS=1), all sync - wrapper functions immediately return safe defaults to prevent asyncio - event loop hangs. This enables clean 2-3 second exits without Ctrl-C. - - Fail-Open Philosophy: If shutdown is in progress, bypass async operations - and return values that never block users: - - analyze_claims_sync() → [] (no claims detected) - - analyze_if_addressed_sync() → None (no evidence found) - - analyze_consideration_sync() → (True, None) (assume satisfied) - -Optional Dependencies: - claude-agent-sdk: Required for AI-powered analysis - Install: pip install claude-agent-sdk - - When unavailable, the system gracefully falls back to keyword-based - heuristics (see fallback_heuristics.py). This ensures power steering - always works, even without the SDK. - -Philosophy: -- Ruthlessly Simple: Single-purpose module with clear contract -- Fail-Open: Never block users due to bugs - always allow stop on errors -- Zero-BS: No stubs, every function works or doesn't exist -- Modular: Self-contained brick that plugs into power_steering_checker -- Clean Shutdown: Detect shutdown in progress, bypass async, return safe defaults -""" - -import asyncio -import os -import re -from pathlib import Path - -# Try to import Claude SDK -try: - from claude_agent_sdk import ClaudeAgentOptions, query # type: ignore[import-not-found] - - CLAUDE_SDK_AVAILABLE = True -except ImportError: - CLAUDE_SDK_AVAILABLE = False - -# Template paths (relative to this file) -TEMPLATE_DIR = Path(__file__).parent / "templates" -POWER_STEERING_PROMPT_TEMPLATE = TEMPLATE_DIR / "power_steering_prompt.txt" - -# Security constants -MAX_SDK_RESPONSE_LENGTH = 5000 -MAX_CONVERSATION_SUMMARY_LENGTH = ( - 512_000 # Max chars for SDK conversation context (1M token window) -) -SUSPICIOUS_PATTERNS = [ - r" PARALLEL_TIMEOUT (60s) > CHECKER_TIMEOUT (25s) -# Each checker must complete within 25s to fit within parallel execution budget -CHECKER_TIMEOUT = 25 # 25 seconds per SDK call (within 60s parallel budget) - -# Public API (the "studs" for this brick) -__all__ = [ - "analyze_consideration", - "generate_final_guidance", - "analyze_claims_sync", - "analyze_if_addressed_sync", - "analyze_consideration_sync", - "analyze_workflow_invocation", - "analyze_workflow_invocation_sync", - "CLAUDE_SDK_AVAILABLE", -] - - -def is_shutting_down() -> bool: - """Check if application shutdown is in progress. - - Returns: - True if AMPLIHACK_SHUTDOWN_IN_PROGRESS environment variable is set, - False otherwise - - Note: - This function enables graceful shutdown by allowing sync wrapper - functions to detect shutdown state and return safe defaults instead - of starting new async operations that may hang during event loop - teardown. - - Example: - >>> os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - >>> is_shutting_down() - True - >>> del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - >>> is_shutting_down() - False - """ - return os.environ.get("AMPLIHACK_SHUTDOWN_IN_PROGRESS") == "1" - - -def _validate_sdk_response(response: str) -> bool: - """Validate SDK response for security (fail-open). - - Args: - response: SDK response text to validate - - Returns: - True if response is safe or on validation error (fail-open), - False only if clear security issue detected - - Note: - Checks for excessive length and suspicious patterns. - Returns True (allow) on any validation error to maintain fail-open behavior. - """ - try: - # Check length - if len(response) > MAX_SDK_RESPONSE_LENGTH: - return False - - # Check for suspicious patterns (case-insensitive) - response_lower = response.lower() - for pattern in SUSPICIOUS_PATTERNS: - if re.search(pattern, response_lower): - return False - - return True - except Exception: - # Fail-open on validation error - return True - - -def _sanitize_html(text: str) -> str: - """Remove potentially dangerous HTML tags from text. - - Args: - text: Text that may contain HTML - - Returns: - Text with dangerous HTML tags removed - - Note: - Removes ", - r"]*>", - r"]*>.*?", - r"]*>.*?", - r"]*>", - ] - - sanitized = text - for tag_pattern in dangerous_tags: - sanitized = re.sub(tag_pattern, "", sanitized, flags=re.IGNORECASE | re.DOTALL) - - return sanitized - except Exception: - # On error, return original text (fail-open) - return text - - -def load_prompt_template() -> str | None: - """Load power-steering prompt template. - - Returns: - Raw template content with {VARIABLE} placeholders, or None if template missing - - Note: - Returns None instead of raising to support fail-open behavior. - Caller should handle None gracefully and use fallback. - """ - if not POWER_STEERING_PROMPT_TEMPLATE.exists(): - return None - - try: - return POWER_STEERING_PROMPT_TEMPLATE.read_text() - except Exception: - return None - - -def format_prompt(template: str, variables: dict[str, str]) -> str: - """Format prompt with variable substitution. - - Args: - template: Raw template with {VARIABLE} placeholders - variables: Dictionary of variable name -> value mappings - - Returns: - Formatted prompt with all variables substituted - - Raises: - KeyError: If required variable is missing - """ - return template.format(**variables) - - -async def analyze_consideration( - conversation: list[dict], consideration: dict, project_root: Path -) -> tuple[bool, str | None]: - """Use Claude SDK to analyze if consideration is satisfied. - - Args: - conversation: Session messages (list of dicts) - consideration: Consideration dict (id, question, description, etc.) - project_root: Project root directory - - Returns: - Tuple of (satisfied, reason): - - satisfied: True if consideration satisfied, False otherwise - - reason: String explanation if not satisfied, None if satisfied - (Fail-open: returns (True, None) on SDK unavailable or errors) - """ - if not CLAUDE_SDK_AVAILABLE: - return (True, None) # Fail-open if SDK unavailable - - # Format prompt for this consideration - try: - prompt = _format_consideration_prompt(consideration, conversation) - except Exception as e: - _log_sdk_error(consideration["id"], e) - return (True, None) # Fail-open on prompt formatting error - - try: - options = ClaudeAgentOptions( - cwd=str(project_root), - ) - - # Query Claude with timeout - response_parts = [] - async with asyncio.timeout(CHECKER_TIMEOUT): - async for message in query(prompt=prompt, options=options): - # Extract text from AssistantMessage content blocks - content = getattr(message, "content", None) - if content is not None: - if isinstance(content, list): - # AssistantMessage: content is list[ContentBlock] - for block in content: - text = getattr(block, "text", None) - if isinstance(text, str): - response_parts.append(text) - elif isinstance(content, str): - # UserMessage: content can be str - response_parts.append(content) - - # Join all parts - response = "".join(response_parts) - - # Sanitize HTML before processing - response = _sanitize_html(response) - - # Validate response before processing - if not _validate_sdk_response(response): - # Security validation failed - fail-open (assume satisfied) - return (True, None) - - response_lower = response.lower() - - # Parse response for yes/no decision - # Look for clear indicators of satisfaction - satisfied_indicators = [ - "satisfied", - "yes", - "complete", - "fulfilled", - "met", - "achieved", - "accomplished", - ] - unsatisfied_indicators = [ - "not satisfied", - "no", - "incomplete", - "unfulfilled", - "not met", - "missing", - "failed", - ] - - # Check for unsatisfied indicators first (more specific) - for indicator in unsatisfied_indicators: - if indicator in response_lower: - # Extract reason from response - reason = _extract_reason_from_response(response) - return (False, reason) - - # Then check for satisfied indicators - for indicator in satisfied_indicators: - if indicator in response_lower: - return (True, None) - - # Ambiguous response - fail-open (assume satisfied) - return (True, None) - - except Exception as e: - # Log error and fail-open on any error - _log_sdk_error(consideration["id"], e) - return (True, None) - - -def _format_consideration_prompt(consideration: dict, conversation: list[dict]) -> str: - """Format analysis prompt for a consideration. - - Args: - consideration: Consideration dictionary - conversation: Session conversation messages - - Returns: - Formatted prompt string - """ - # Format conversation summary - conv_summary = _format_conversation_summary(conversation) - - # Build the evaluation guidance section - guidance = consideration.get("guidance", "") - guidance_section = "" - if guidance: - guidance_section = f""" -## Evaluation Guidance (specific to this consideration) - -{guidance} -""" - - # Simple inline prompt (no template file needed for fail-open behavior) - # Issue #2561: Enhanced prompt to distinguish completion summaries from action items - prompt = f"""You are analyzing a Claude Code session to determine if the following consideration is satisfied. - -**Consideration**: {consideration["question"]} -**Description**: {consideration.get("description", consideration.get("question", ""))} -**Category**: {consideration.get("category", "General")} -{guidance_section} -**Session Conversation** ({len(conversation)} messages): -{conv_summary} - -## Your Task - -Analyze the conversation and determine if this consideration is satisfied. - -**General principles:** -- A message that SUMMARIZES what was accomplished (past tense) is a COMPLETION CONFIRMATION, not remaining work. -- Only flag as NOT SATISFIED if there is concrete evidence the consideration was violated. -- For small sessions (few edits, one-line fixes), lean toward SATISFIED if the core task appears done. -- If the consideration is not applicable to this session, respond SATISFIED. - -**Respond with ONE of:** -- "SATISFIED: [brief reason]" if the consideration is met -- "NOT SATISFIED: [brief reason]" if the consideration is not met - -Your response MUST start with either "SATISFIED:" or "NOT SATISFIED:". -Be direct. Reference actual evidence from the conversation. -""" - - return prompt - - -def _extract_reason_from_response(response: str) -> str | None: - """Extract failure reason from SDK response. - - Args: - response: Full SDK response text - - Returns: - Full extracted reason string, or generic fallback - - Note: - Looks for patterns like "NOT SATISFIED: reason" or "UNSATISFIED: reason" - and extracts the reason part. - """ - if not response: - return "Check not satisfied" - - response_lower = response.lower() - - # Look for common failure patterns - patterns = [ - "not satisfied:", - "unsatisfied:", - "not met:", - "incomplete:", - "missing:", - "failed:", - ] - - for pattern in patterns: - idx = response_lower.find(pattern) - if idx != -1: - # Extract text after the pattern - reason_start = idx + len(pattern) - reason = response[reason_start:].strip() - - # Return full reason without truncation - - return reason if reason else "Check not satisfied" - - # No specific pattern found - use generic fallback - return "Check not satisfied" - - -def _log_sdk_error(consideration_id: str, error: Exception) -> None: - """Log SDK error to stderr for debugging with sensitive data scrubbed. - - Args: - consideration_id: ID of the consideration that failed - error: Exception that was raised - - Note: - Logs to stderr to avoid interfering with stdout tool output. - Scrubs file paths and tokens from error messages. - Format: [Power Steering SDK Error] {id}: {sanitized_error} - """ - import sys - - error_msg = str(error) - - # Scrub file paths (replace with [PATH]) - error_msg = re.sub(r"/[^\s]+", "[PATH]", error_msg) - error_msg = re.sub(r"[A-Z]:\\[^\s]+", "[PATH]", error_msg) # Windows paths - - # Scrub potential tokens (40+ hex characters) - error_msg = re.sub(r"\b[a-fA-F0-9]{40,}\b", "[REDACTED]", error_msg) - - # Truncate to 200 chars - if len(error_msg) > 200: - error_msg = error_msg[:200] + "..." - - sanitized_msg = f"[Power Steering SDK Error] {consideration_id}: {error_msg}\n" - sys.stderr.write(sanitized_msg) - sys.stderr.flush() - - -def _format_conversation_summary( - conversation: list[dict], max_length: int = MAX_CONVERSATION_SUMMARY_LENGTH -) -> str: - """Format conversation summary for analysis. - - Args: - conversation: List of message dicts - max_length: Maximum summary length in characters (default: 50000 to prevent oversized prompts) - - Returns: - Formatted conversation summary - - Note: - Individual messages longer than 500 chars are truncated for readability. The summary - is truncated at max_length characters to prevent oversized SDK prompts. - """ - summary_parts = [] - current_length = 0 - - for i, msg in enumerate(conversation): - role = msg.get("role", msg.get("type", "unknown")) - content = msg.get("content", msg.get("message", {})) - - # Handle different content formats - content_text = "" - if isinstance(content, str): - content_text = content - elif isinstance(content, dict): - # Extract text from message dict - msg_content = content.get("content", "") - if isinstance(msg_content, str): - content_text = msg_content - elif isinstance(msg_content, list): - # Extract text blocks - text_blocks = [] - for block in msg_content: - if isinstance(block, dict): - if block.get("type") == "text": - text_blocks.append(str(block.get("text", ""))) - elif block.get("type") == "tool_use": - tool_name = block.get("name", "unknown") - text_blocks.append(f"[Tool: {tool_name}]") - content_text = " ".join(text_blocks) - elif isinstance(content, list): - # Direct list of blocks - text_blocks = [] - for block in content: - if isinstance(block, dict): - if block.get("type") == "text": - text_blocks.append(str(block.get("text", ""))) - elif block.get("type") == "tool_use": - tool_name = block.get("name", "unknown") - text_blocks.append(f"[Tool: {tool_name}]") - content_text = " ".join(text_blocks) - - # Truncate long individual messages for readability - if len(content_text) > 500: - content_text = content_text[:497] + "..." - - msg_summary = f"\n**Message {i + 1} ({role}):** {content_text}\n" - - # Only check length limit if max_length is specified - if current_length + len(msg_summary) > max_length: - truncation_indicator = f"\n[... {len(conversation) - i} more messages ...]" - # Only add truncation indicator if we have room for it - if current_length + len(truncation_indicator) <= max_length: - summary_parts.append(truncation_indicator) - break - - summary_parts.append(msg_summary) - current_length += len(msg_summary) - - return "".join(summary_parts) - - -async def generate_final_guidance( - failed_checks: list[tuple[str, str]], - conversation: list[dict], - project_root: Path, -) -> str: - """Generate context-aware final guidance using Claude SDK. - - Args: - failed_checks: List of (check_id, reason) tuples for failed checks - conversation: Session conversation messages - project_root: Project root directory - - Returns: - Specific guidance string based on actual failures. - (Fail-open: returns template-based guidance on SDK unavailable or errors) - - Note: - This provides context-aware, specific guidance rather than generic advice. - Falls back to template if SDK unavailable or fails. - """ - if not CLAUDE_SDK_AVAILABLE: - return _generate_template_guidance(failed_checks) - - if not failed_checks: - return "All checks passed. You may proceed." - - # Format failed checks for prompt - failures_text = "\n".join([f"- {check_id}: {reason}" for check_id, reason in failed_checks]) - - prompt = f"""You are analyzing a Claude Code session to provide specific, actionable guidance. - -**Failed Checks:** -{failures_text} - -**Your Task:** - -Provide specific, actionable guidance to address these failed checks. Be concrete and reference the actual failure reasons. Do NOT give generic advice. - -**Format:** - -Provide 1-3 sentences with specific actions based on the actual failures listed above. - -Example good guidance: -"Complete the 3 incomplete TODOs shown in the task list and run pytest locally to verify your changes work." - -Example bad guidance: -"Make sure to complete all tasks and test your code." - -Be direct and specific.""" - - try: - options = ClaudeAgentOptions( - cwd=str(project_root), - ) - - response_parts = [] - async with asyncio.timeout(CHECKER_TIMEOUT): - async for message in query(prompt=prompt, options=options): - # Extract text from AssistantMessage content blocks - content = getattr(message, "content", None) - if content is not None: - if isinstance(content, list): - # AssistantMessage: content is list[ContentBlock] - for block in content: - text = getattr(block, "text", None) - if isinstance(text, str): - response_parts.append(text) - elif isinstance(content, str): - # UserMessage: content can be str - response_parts.append(content) - - guidance = "".join(response_parts).strip() - - # Sanitize HTML before processing - guidance = _sanitize_html(guidance) - - # Validate response before using - if not _validate_sdk_response(guidance): - # Security validation failed - use template fallback - return _generate_template_guidance(failed_checks) - - # Return SDK-generated guidance if non-empty - if guidance and len(guidance) > 10: - return guidance - - # Empty or too short - use template fallback - return _generate_template_guidance(failed_checks) - - except Exception: - # Fail-open to template guidance - return _generate_template_guidance(failed_checks) - - -def _generate_template_guidance(failed_checks: list[tuple[str, str]]) -> str: - """Generate template-based guidance when SDK unavailable. - - Args: - failed_checks: List of (check_id, reason) tuples - - Returns: - Template-based guidance string - """ - if not failed_checks: - return "All checks passed." - - # Group checks by category/type for better guidance - guidance_parts = ["Address the following failed checks:"] - for check_id, reason in failed_checks: - guidance_parts.append(f"- {check_id}: {reason}") - - return "\n".join(guidance_parts) - - -async def analyze_claims(delta_text: str, project_root: Path) -> list[str]: - """Use Claude SDK to detect completion claims in delta text. - - Replaces regex-based claim detection with LLM-powered analysis. - - Args: - delta_text: New transcript content since last block - project_root: Project root directory - - Returns: - List of detected completion claims with context. - (Fail-open: returns empty list on SDK unavailable or errors) - """ - if not CLAUDE_SDK_AVAILABLE: - return [] # Fail-open if SDK unavailable - - if not delta_text or len(delta_text.strip()) < 20: - return [] # Nothing meaningful to analyze - - prompt = f"""Analyze the following conversation excerpt and identify any claims about task completion. - -**Conversation Content:** -{delta_text[:3000]} - -## Your Task - -Identify any statements where the user or assistant claims that work is complete. Look for: -- Claims about completing tasks, features, or implementations -- Statements about tests passing or CI being green -- Claims that todos are done or workflow is complete -- Assertions that PRs are ready or mergeable - -**Respond with a JSON array of claim strings, each with surrounding context (max 100 chars).** - -Format: ["...claim with context...", "...another claim..."] - -If no completion claims are found, respond with: [] - -Be specific - only include actual claims about completion, not general discussion.""" - - try: - options = ClaudeAgentOptions( - cwd=str(project_root), - ) - - response_parts = [] - async with asyncio.timeout(CHECKER_TIMEOUT): - async for message in query(prompt=prompt, options=options): - # Extract text from AssistantMessage content blocks - content = getattr(message, "content", None) - if content is not None: - if isinstance(content, list): - # AssistantMessage: content is list[ContentBlock] - for block in content: - text = getattr(block, "text", None) - if isinstance(text, str): - response_parts.append(text) - elif isinstance(content, str): - # UserMessage: content can be str - response_parts.append(content) - - response = "".join(response_parts).strip() - - # Validate response before parsing - if not _validate_sdk_response(response): - # Security validation failed - fail-open (return empty list) - return [] - - # Parse JSON array from response - import json - - claims = [] - - # Try to extract JSON array - if response.startswith("["): - try: - parsed = json.loads(response) - if isinstance(parsed, list): - claims = parsed - except json.JSONDecodeError: - pass - - # Try to find JSON array in response if direct parse failed - if not claims: - match = re.search(r"\[.*?\]", response, re.DOTALL) - if match: - try: - parsed = json.loads(match.group()) - if isinstance(parsed, list): - claims = parsed - except json.JSONDecodeError: - pass - - # Validate and sanitize claims - if claims: - validated_claims = [] - for claim in claims[:100]: # Max 100 items (schema validation) - if claim and isinstance(claim, str): - # Sanitize HTML tags - sanitized = _sanitize_html(claim) - # Truncate to 200 chars (schema validation) - if len(sanitized) > 200: - sanitized = sanitized[:200] - validated_claims.append(sanitized) - return validated_claims - - return [] - - except Exception: - return [] # Fail-open on any error - - -async def analyze_if_addressed( - failure_id: str, - failure_reason: str, - delta_text: str, - project_root: Path, -) -> str | None: - """Use Claude SDK to check if delta content addresses a previous failure. - - Replaces heuristic keyword matching with LLM-powered analysis. - - Args: - failure_id: ID of the failed consideration (e.g., "todos_complete") - failure_reason: Human-readable reason the check failed - delta_text: New transcript content since last block - project_root: Project root directory - - Returns: - Evidence string if delta addresses the failure, None otherwise. - (Fail-open: returns None on SDK unavailable or errors) - """ - if not CLAUDE_SDK_AVAILABLE: - return None # Fail-open if SDK unavailable - - if not delta_text or len(delta_text.strip()) < 20: - return None # Nothing meaningful to analyze - - prompt = f"""Analyze if the following new conversation content addresses a previous verification failure. - -**Previous Failure:** -- Check ID: {failure_id} -- Reason it failed: {failure_reason} - -**New Conversation Content:** -{delta_text[:3000]} - -## Your Task - -Determine if the new content shows evidence that the previously failed check has now been addressed. - -Look for: -- Actions taken to fix the issue -- Evidence the concern was resolved -- Tool outputs or results showing completion -- Explicit discussion addressing the failure reason - -**Respond with ONE of:** -- "ADDRESSED: [specific evidence from the conversation showing why]" -- "NOT ADDRESSED: [brief explanation]" - -Be conservative - only say ADDRESSED if there is clear evidence in the new content.""" - - try: - options = ClaudeAgentOptions( - cwd=str(project_root), - ) - - response_parts = [] - async with asyncio.timeout(CHECKER_TIMEOUT): - async for message in query(prompt=prompt, options=options): - # Extract text from AssistantMessage content blocks - content = getattr(message, "content", None) - if content is not None: - if isinstance(content, list): - # AssistantMessage: content is list[ContentBlock] - for block in content: - text = getattr(block, "text", None) - if isinstance(text, str): - response_parts.append(text) - elif isinstance(content, str): - # UserMessage: content can be str - response_parts.append(content) - - response = "".join(response_parts).strip() - - # Sanitize HTML before processing - response = _sanitize_html(response) - - response_lower = response.lower() - - # Check for ADDRESSED indicator - if "addressed:" in response_lower: - # Extract the evidence - idx = response.find("addressed:") - evidence = response[idx + 10 :].strip() - # Clean up and truncate - evidence = evidence.replace("not addressed:", "").strip() - if evidence and len(evidence) > 10: - return evidence[:200] # Truncate evidence - return "Delta content addresses this concern" - - return None - - except Exception: - return None # Fail-open on any error - - -def analyze_claims_sync(delta_text: str, project_root: Path) -> list[str]: - """Synchronous wrapper for analyze_claims with shutdown detection. - - During shutdown, returns empty list immediately to prevent asyncio hang. - Otherwise, runs async analysis to detect completion claims in transcript. - - Args: - delta_text: New transcript content since last block - project_root: Project root directory - - Returns: - List of detected completion claims, or [] if shutting down - - Shutdown Behavior: - When AMPLIHACK_SHUTDOWN_IN_PROGRESS=1, immediately returns [] without - starting async operation. This prevents asyncio event loop hangs during - application teardown. - - Example: - >>> # Normal operation - runs full analysis - >>> claims = analyze_claims_sync("Task complete!", Path.cwd()) - >>> len(claims) > 0 - True - - >>> # During shutdown - returns empty list immediately - >>> os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - >>> claims = analyze_claims_sync("Task complete!", Path.cwd()) - >>> claims - [] - """ - # Shutdown check: bypass async operation during teardown - if is_shutting_down(): - return [] # Fail-open: no claims detected during shutdown - - try: - return asyncio.run(analyze_claims(delta_text, project_root)) - except Exception: - return [] # Fail-open on any error - - -def analyze_if_addressed_sync( - failure_id: str, - failure_reason: str, - delta_text: str, - project_root: Path, -) -> str | None: - """Synchronous wrapper for analyze_if_addressed with shutdown detection. - - During shutdown, returns None immediately to prevent asyncio hang. - Otherwise, runs async analysis to check if failure was addressed. - - Args: - failure_id: ID of the failed consideration - failure_reason: Reason it failed - delta_text: New transcript content - project_root: Project root directory - - Returns: - Evidence string if addressed, None if not addressed or shutting down - - Shutdown Behavior: - When AMPLIHACK_SHUTDOWN_IN_PROGRESS=1, immediately returns None without - starting async operation. This prevents asyncio event loop hangs during - application teardown. - - Example: - >>> # Normal operation - runs full analysis - >>> evidence = analyze_if_addressed_sync( - ... "todos_complete", "3 TODOs incomplete", - ... "Completed all TODOs", Path.cwd() - ... ) - >>> evidence is not None - True - - >>> # During shutdown - returns None immediately - >>> os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - >>> evidence = analyze_if_addressed_sync( - ... "todos_complete", "3 TODOs incomplete", - ... "Completed all TODOs", Path.cwd() - ... ) - >>> evidence is None - True - """ - # Shutdown check: bypass async operation during teardown - if is_shutting_down(): - return None # Fail-open: no evidence found during shutdown - - try: - return asyncio.run( - analyze_if_addressed(failure_id, failure_reason, delta_text, project_root) - ) - except Exception: - return None # Fail-open on any error - - -def analyze_consideration_sync( - conversation: list[dict], consideration: dict, project_root: Path -) -> tuple[bool, str | None]: - """Synchronous wrapper for analyze_consideration with shutdown detection. - - During shutdown, returns (True, None) immediately to prevent asyncio hang. - Otherwise, runs async analysis to check if consideration is satisfied. - - Args: - conversation: Session messages - consideration: Consideration dict - project_root: Project root - - Returns: - Tuple of (satisfied, reason): - - satisfied: True if consideration satisfied, False otherwise - - reason: String explanation if not satisfied, None if satisfied - Returns (True, None) during shutdown - - Shutdown Behavior: - When AMPLIHACK_SHUTDOWN_IN_PROGRESS=1, immediately returns (True, None) - without starting async operation. This prevents asyncio event loop hangs - during application teardown. - - Fail-open philosophy: Assumes consideration is satisfied during shutdown - to never block the user from exiting. - - Example: - >>> # Normal operation - runs full analysis - >>> conversation = [{"role": "user", "content": "Hello"}] - >>> consideration = {"id": "tests_passing", "question": "Tests pass?"} - >>> satisfied, reason = analyze_consideration_sync( - ... conversation, consideration, Path.cwd() - ... ) - >>> isinstance(satisfied, bool) - True - - >>> # During shutdown - returns satisfied immediately - >>> os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - >>> satisfied, reason = analyze_consideration_sync( - ... conversation, consideration, Path.cwd() - ... ) - >>> satisfied - True - >>> reason is None - True - """ - # Shutdown check: bypass async operation during teardown - if is_shutting_down(): - return (True, None) # Fail-open: assume satisfied during shutdown - - try: - return asyncio.run(analyze_consideration(conversation, consideration, project_root)) - except Exception: - return (True, None) # Fail-open on any error - - -async def analyze_workflow_invocation( - conversation: list[dict], session_type: str, project_root: Path -) -> tuple[bool, str | None]: - """Use Claude SDK to analyze if workflow was properly invoked. - - Context-aware analysis that understands multiple valid invocation patterns: - - Explicit Skill tool invocation (Skill("default-workflow")) - - Explicit Read tool invocation (Read(.claude/workflow/DEFAULT_WORKFLOW.md)) - - Implicit step-by-step workflow following (shows systematic approach) - - Async completion (PR created for review, CI running) - - Args: - conversation: Session messages (list of dicts) - session_type: Session type (DEVELOPMENT, INVESTIGATION, etc.) - project_root: Project root directory - - Returns: - Tuple of (valid, reason): - - valid: True if workflow properly invoked or not required - - reason: String explanation if invalid, None if valid - (Fail-open: returns (True, None) on SDK unavailable or errors) - - Note: - Only validates DEVELOPMENT and INVESTIGATION sessions. - Other session types return (True, None) immediately. - """ - if not CLAUDE_SDK_AVAILABLE: - return (True, None) # Fail-open if SDK unavailable - - # Only validate DEVELOPMENT and INVESTIGATION sessions - if session_type not in ("DEVELOPMENT", "INVESTIGATION"): - return (True, None) - - # Format conversation summary - conv_summary = _format_conversation_summary(conversation) - - # Context-aware prompt that understands multiple valid patterns - prompt = f"""Analyze if the workflow was properly invoked in this session. - -**Session Type**: {session_type} - -**Session Conversation** ({len(conversation)} messages): -{conv_summary} - -## Your Task - -Determine if the appropriate workflow was properly invoked. A workflow is INVOKED if ANY of these patterns are present: - -1. **Explicit Skill tool invocation**: Skill(skill="default-workflow") or Skill(skill="investigation-workflow") -2. **Explicit Read tool invocation**: Read(.claude/workflow/DEFAULT_WORKFLOW.md) or INVESTIGATION_WORKFLOW.md -3. **Implicit workflow following**: Claude systematically follows workflow steps (shows step-by-step execution) -4. **Async completion pattern**: PR created for review with CI running (workflow continues asynchronously) - -**IMPORTANT**: Only flag as NOT INVOKED if there is NO evidence of ANY systematic workflow approach. - -**Respond with ONE of:** -- "INVOKED: [brief evidence of which pattern was used]" if workflow was properly invoked -- "NOT INVOKED: [brief reason]" if no workflow approach was used - -Be conservative - default to INVOKED unless there is clear evidence of ad-hoc work without systematic approach. -""" - - try: - options = ClaudeAgentOptions( - cwd=str(project_root), - ) - - # Query Claude with timeout - response_parts = [] - async with asyncio.timeout(CHECKER_TIMEOUT): - async for message in query(prompt=prompt, options=options): - # Extract text from AssistantMessage content blocks - content = getattr(message, "content", None) - if content is not None: - if isinstance(content, list): - # AssistantMessage: content is list[ContentBlock] - for block in content: - text = getattr(block, "text", None) - if isinstance(text, str): - response_parts.append(text) - elif isinstance(content, str): - # UserMessage: content can be str - response_parts.append(content) - - # Join all parts - response = "".join(response_parts) - - # Sanitize HTML before processing - response = _sanitize_html(response) - - # Validate response before processing - if not _validate_sdk_response(response): - # Security validation failed - fail-open (assume valid) - return (True, None) - - response_stripped = response.lstrip() - response_lower = response_stripped.lower() - - # Check for NOT INVOKED indicator first to avoid matching "invoked" in "not invoked" - if response_lower.startswith("not invoked:") or response_lower.startswith("not invoked"): - # Extract reason from response - idx = response_lower.find("not invoked:") - if idx != -1: - reason = response_stripped[idx + 12 :].strip() - # Clean up and truncate - if reason and len(reason) > 10: - return (False, reason[:200]) - return (False, "Workflow not properly invoked") - - # Check for INVOKED indicator - if response_lower.startswith("invoked:") or response_lower.startswith("invoked"): - return (True, None) - - # Ambiguous response - fail-open (assume valid) - return (True, None) - - except Exception as e: - # Log error and fail-open on any error - _log_sdk_error("workflow_invocation", e) - return (True, None) - - -def analyze_workflow_invocation_sync( - conversation: list[dict], session_type: str, project_root: Path -) -> tuple[bool, str | None]: - """Synchronous wrapper for analyze_workflow_invocation with shutdown detection. - - During shutdown, returns (True, None) immediately to prevent asyncio hang. - Otherwise, runs async analysis to check if workflow was properly invoked. - - Args: - conversation: Session messages - session_type: Session type (DEVELOPMENT, INVESTIGATION, etc.) - project_root: Project root - - Returns: - Tuple of (valid, reason): - - valid: True if workflow properly invoked or not required - - reason: String explanation if invalid, None if valid - Returns (True, None) during shutdown - - Shutdown Behavior: - When AMPLIHACK_SHUTDOWN_IN_PROGRESS=1, immediately returns (True, None) - without starting async operation. This prevents asyncio event loop hangs - during application teardown. - - Fail-open philosophy: Assumes workflow is valid during shutdown - to never block the user from exiting. - - Example: - >>> # Normal operation - runs full analysis - >>> conversation = [{"role": "user", "content": "Implement feature"}] - >>> valid, reason = analyze_workflow_invocation_sync( - ... conversation, "DEVELOPMENT", Path.cwd() - ... ) - >>> isinstance(valid, bool) - True - - >>> # During shutdown - returns valid immediately - >>> os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - >>> valid, reason = analyze_workflow_invocation_sync( - ... conversation, "DEVELOPMENT", Path.cwd() - ... ) - >>> valid - True - >>> reason is None - True - """ - # Shutdown check: bypass async operation during teardown - if is_shutting_down(): - return (True, None) # Fail-open: assume valid during shutdown - - try: - return asyncio.run(analyze_workflow_invocation(conversation, session_type, project_root)) - except Exception: - return (True, None) # Fail-open on any error - - -# For testing -if __name__ == "__main__": - import argparse - import json - - parser = argparse.ArgumentParser(description="Test Claude-powered consideration analysis") - parser.add_argument("transcript_file", type=Path, help="Transcript JSON file") - parser.add_argument("consideration_id", type=str, help="Consideration ID to check") - parser.add_argument( - "--project-root", type=Path, default=Path.cwd(), help="Project root directory" - ) - - args = parser.parse_args() - - # Load transcript - with open(args.transcript_file) as f: - transcript = json.load(f) - - # Example consideration - consideration = { - "id": args.consideration_id, - "question": "Were all TODO items completed?", - "description": "Check if all todo items in TodoWrite are marked as completed", - "category": "Completion", - } - - result = analyze_consideration_sync(transcript, consideration, args.project_root) - print(f"\nConsideration '{consideration['id']}': {'SATISFIED' if result else 'NOT SATISFIED'}") diff --git a/amplifier-bundle/tools/amplihack/hooks/claude_reflection.py b/amplifier-bundle/tools/amplihack/hooks/claude_reflection.py deleted file mode 100755 index d20a5bf5b..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/claude_reflection.py +++ /dev/null @@ -1,460 +0,0 @@ -#!/usr/bin/env python3 -""" -Claude SDK-based session reflection. - -Uses Claude Agent SDK to intelligently analyze sessions and fill out -the FEEDBACK_SUMMARY template, replacing simple pattern matching with -AI-powered reflection. -""" - -import asyncio -import json -import sys -from pathlib import Path - -# Try to import Claude SDK -try: - from claude_agent_sdk import ClaudeAgentOptions, query - - CLAUDE_SDK_AVAILABLE = True -except ImportError: - CLAUDE_SDK_AVAILABLE = False - -# Repository constants -AMPLIHACK_REPO_URI = "https://github.com/rysweet/MicrosoftHackathon2025-AgenticCoding" - -# Template paths (relative to this file) -TEMPLATE_DIR = Path(__file__).parent / "templates" -REFLECTION_PROMPT_TEMPLATE = TEMPLATE_DIR / "reflection_prompt.txt" - - -def load_session_conversation(session_dir: Path) -> list[dict] | None: - """Load conversation messages from session directory. - - Args: - session_dir: Path to session log directory - - Returns: - List of message dicts, or None if not found - """ - # Try different possible file locations - candidates = [ - session_dir / "conversation_transcript.json", - session_dir / "messages.json", - session_dir / "session.json", - ] - - for candidate in candidates: - if candidate.exists(): - try: - with open(candidate) as f: - data = json.load(f) - # Handle different data structures - if isinstance(data, list): - return data - if isinstance(data, dict) and "messages" in data: - return data["messages"] - except (OSError, json.JSONDecodeError): - continue - - return None - - -def load_power_steering_redirects(session_dir: Path) -> list[dict] | None: - """Load power-steering redirect history from session directory. - - Args: - session_dir: Path to session log directory - - Returns: - List of redirect dicts, or None if no redirects file exists - """ - redirects_file = session_dir / "redirects.jsonl" - - if not redirects_file.exists(): - return None - - redirects = [] - try: - with open(redirects_file) as f: - for line in f: - line = line.strip() - if not line: - continue - try: - redirect = json.loads(line) - redirects.append(redirect) - except json.JSONDecodeError: - continue # Skip malformed lines - except OSError: - return None - - return redirects if redirects else None - - -def format_redirects_context(redirects: list[dict] | None) -> str: - """Format redirect history for inclusion in reflection prompt. - - Args: - redirects: List of redirect dictionaries - - Returns: - Formatted markdown string describing redirects - """ - if not redirects: - return "" - - redirect_word = "redirect" if len(redirects) == 1 else "redirects" - parts = [ - "", - "## Power-Steering Redirect History", - "", - f"This session had {len(redirects)} power-steering {redirect_word} where Claude was blocked from stopping due to incomplete work:", - "", - ] - - for redirect in redirects: - redirect_num = redirect.get("redirect_number", "?") - timestamp = redirect.get("timestamp", "unknown") - failed = redirect.get("failed_considerations", []) - prompt = redirect.get("continuation_prompt", "") - - parts.append(f"### Redirect #{redirect_num} ({timestamp})") - parts.append("") - parts.append(f"**Failed Checks:** {', '.join(failed)}") - parts.append("") - parts.append("**Continuation Prompt Given:**") - parts.append("```") - parts.append(prompt) - parts.append("```") - parts.append("") - - parts.append( - "**Analysis Note:** These redirects indicate areas where work was incomplete. " - "In your feedback, consider whether the redirects were justified and whether " - "Claude successfully addressed the blockers after being redirected." - ) - parts.append("") - - return "\n".join(parts) - - -def load_feedback_template(project_root: Path) -> str: - """Load FEEDBACK_SUMMARY template. - - Args: - project_root: Project root directory - - Returns: - Template content as string - """ - template_path = project_root / ".claude" / "templates" / "FEEDBACK_SUMMARY.md" - - if not template_path.exists(): - # Fallback minimal template - return """## Task Summary -[What was accomplished] - -## Feedback Summary -**User Interactions:** [Observations] -**Workflow Adherence:** [Did workflow get followed?] -**Subagent Usage:** [Which agents used?] -**Learning Opportunities:** [What to improve] -""" - - return template_path.read_text() - - -def load_prompt_template() -> str: - """Load reflection prompt template. - - Returns: - Raw template content with {VARIABLE} placeholders - - Raises: - FileNotFoundError: If template file is missing (configuration error) - """ - if not REFLECTION_PROMPT_TEMPLATE.exists(): - raise FileNotFoundError( - f"Reflection prompt template not found at {REFLECTION_PROMPT_TEMPLATE}. " - "This is a configuration error - the template file must exist." - ) - - return REFLECTION_PROMPT_TEMPLATE.read_text() - - -def format_reflection_prompt(template: str, variables: dict[str, str]) -> str: - """Format reflection prompt with variable substitution. - - Args: - template: Raw template with {VARIABLE} placeholders - variables: Dictionary of variable name -> value mappings - - Returns: - Formatted prompt with all variables substituted - - Raises: - KeyError: If required variable is missing - """ - return template.format(**variables) - - -def get_repository_context(project_root: Path) -> str: - """Detect repository context to distinguish amplihack vs project issues. - - Args: - project_root: Project root directory - - Returns: - Formatted repository context guidance for reflection prompt - """ - import subprocess - - try: - # Get current repository URL - result = subprocess.run( - ["git", "remote", "get-url", "origin"], - cwd=project_root, - capture_output=True, - text=True, - timeout=5, - ) - - if result.returncode == 0: - current_repo = result.stdout.strip() - - # Normalize URLs for comparison (handle .git suffix and https/ssh) - def normalize_url(url: str) -> str: - url = url.rstrip("/").replace(".git", "") - if url.startswith("git@github.com:"): - url = url.replace("git@github.com:", "https://github.com/") - return url.lower() - - current_normalized = normalize_url(current_repo) - amplihack_normalized = normalize_url(AMPLIHACK_REPO_URI) - - is_amplihack_repo = current_normalized == amplihack_normalized - - if is_amplihack_repo: - return f""" -## Repository Context - -**Current Repository**: {current_repo} -**Context**: Working on Amplihack itself - -**IMPORTANT**: Since we're working on the Amplihack framework itself, ALL issues identified in this session are Amplihack framework issues and should be filed against the Amplihack repository. -""" - return f""" -## Repository Context - -**Current Repository**: {current_repo} -**Amplihack Repository**: {AMPLIHACK_REPO_URI} -**Context**: Working on a user project (not Amplihack itself) -""" - - # Git command failed - provide generic guidance - return f""" -## Repository Context - -**Amplihack Repository**: {AMPLIHACK_REPO_URI} -**Context**: Repository detection unavailable -""" - - except Exception: - # Subprocess failed - provide generic guidance - return f""" -## Repository Context - -**Amplihack Repository**: {AMPLIHACK_REPO_URI} -**Context**: Repository detection unavailable -""" - - -async def analyze_session_with_claude( - conversation: list[dict], - template: str, - project_root: Path, - session_dir: Path | None = None, -) -> str | None: - """Use Claude SDK to analyze session and fill out template. - - Args: - conversation: Session conversation messages - template: FEEDBACK_SUMMARY template - project_root: Project root directory - session_dir: Optional session directory for loading redirects - - Returns: - Filled template as string, or None if analysis fails - """ - if not CLAUDE_SDK_AVAILABLE: - print("Claude SDK not available - cannot run AI-powered reflection", file=sys.stderr) - return None - - # Load USER_PREFERENCES for context (same as session_start does) - user_preferences_context = "" - try: - # Try to use FrameworkPathResolver if available - try: - sys.path.insert(0, str(project_root / ".claude" / "tools" / "amplihack")) - from amplihack.utils.paths import FrameworkPathResolver - - preferences_file = FrameworkPathResolver.resolve_preferences_file() - except ImportError: - # Fallback to default location - preferences_file = project_root / ".claude" / "context" / "USER_PREFERENCES.md" - - if preferences_file and preferences_file.exists(): - with open(preferences_file) as f: - prefs_content = f.read() - user_preferences_context = f""" -## User Preferences (MANDATORY - MUST FOLLOW) - -The following preferences are REQUIRED and CANNOT be ignored: - -{prefs_content} - -**IMPORTANT**: When analyzing this session, consider whether Claude followed these user preferences. Do NOT criticize behavior that aligns with configured preferences. -""" - except Exception as e: - print(f"Warning: Could not load USER_PREFERENCES: {e}", file=sys.stderr) - # Continue without preferences - - # Get repository context for issue categorization - repository_context = get_repository_context(project_root) - - # Load power-steering redirects if available - redirects_context = "" - if session_dir: - redirects = load_power_steering_redirects(session_dir) - redirects_context = format_redirects_context(redirects) - - # Load prompt template and format with variables - prompt_template = load_prompt_template() - prompt = format_reflection_prompt( - prompt_template, - { - "user_preferences_context": user_preferences_context, - "repository_context": repository_context, - "amplihack_repo_uri": AMPLIHACK_REPO_URI, - "message_count": str(len(conversation)), - "conversation_summary": _format_conversation_summary(conversation), - "redirects_context": redirects_context, - "template": template, - }, - ) - - try: - # Configure SDK - options = ClaudeAgentOptions( - cwd=str(project_root), - permission_mode="bypassPermissions", - ) - - # Collect response - response_parts = [] - async for message in query(prompt=prompt, options=options): - if hasattr(message, "text"): - response_parts.append(message.text) - elif hasattr(message, "content"): - response_parts.append(str(message.content)) - - # Join all parts - filled_template = "".join(response_parts) - return filled_template if filled_template.strip() else None - - except Exception as e: - print(f"Error during Claude reflection: {e}", file=sys.stderr) - return None - - -def _format_conversation_summary(conversation: list[dict], max_length: int = 5000) -> str: - """Format conversation summary for analysis. - - Args: - conversation: List of message dicts - max_length: Maximum summary length - - Returns: - Formatted conversation summary - """ - summary_parts = [] - current_length = 0 - - for i, msg in enumerate(conversation): - role = msg.get("role", "unknown") - content = str(msg.get("content", "")) - - # Truncate long messages - if len(content) > 500: - content = content[:497] + "..." - - msg_summary = f"\n**Message {i + 1} ({role}):** {content}\n" - - # Check if adding this would exceed limit - if current_length + len(msg_summary) > max_length: - summary_parts.append(f"\n[... {len(conversation) - i} more messages ...]") - break - - summary_parts.append(msg_summary) - current_length += len(msg_summary) - - return "".join(summary_parts) - - -def run_claude_reflection( - session_dir: Path, project_root: Path, conversation: list[dict] | None = None -) -> str | None: - """Run Claude SDK-based reflection on a session. - - Args: - session_dir: Session log directory - project_root: Project root directory - conversation: Optional pre-loaded conversation (if None, loads from session_dir) - - Returns: - Filled FEEDBACK_SUMMARY template, or None if failed - """ - # Load conversation if not provided - if conversation is None: - conversation = load_session_conversation(session_dir) - if not conversation: - print(f"No conversation found in {session_dir}", file=sys.stderr) - return None - - # Load template - template = load_feedback_template(project_root) - - # Run async analysis with session_dir for redirect loading - try: - result = asyncio.run( - analyze_session_with_claude(conversation, template, project_root, session_dir) - ) - return result - except Exception as e: - print(f"Claude reflection failed: {e}", file=sys.stderr) - return None - - -# For testing -if __name__ == "__main__": - import argparse - - parser = argparse.ArgumentParser(description="Run Claude-powered session reflection") - parser.add_argument("session_dir", type=Path, help="Session directory path") - parser.add_argument( - "--project-root", type=Path, default=Path.cwd(), help="Project root directory" - ) - - args = parser.parse_args() - - result = run_claude_reflection(args.session_dir, args.project_root) - if result: - print("\n" + "=" * 70) - print("CLAUDE REFLECTION RESULT") - print("=" * 70) - print(result) - print("=" * 70) - else: - print("Reflection failed") - sys.exit(1) diff --git a/amplifier-bundle/tools/amplihack/hooks/completion_evidence.py b/amplifier-bundle/tools/amplihack/hooks/completion_evidence.py deleted file mode 100755 index 0fcd30649..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/completion_evidence.py +++ /dev/null @@ -1,303 +0,0 @@ -#!/usr/bin/env python3 -""" -Completion Evidence Checker: Concrete verification of work completion. - -Provides evidence-based completion verification BEFORE relying on SDK analysis. -Uses concrete signals like PR status, CI results, and user confirmation. - -Philosophy: -- Ruthlessly Simple: Check concrete evidence first, SDK second -- Fail-Open: If evidence checking fails, fall back to SDK analysis -- Zero-BS: Every function works or doesn't exist -- Modular: Self-contained brick that plugs into power_steering_checker - -Evidence Types (Priority Order): -1. PR_MERGED - Strongest evidence (work is merged) -2. USER_CONFIRMATION - User explicitly confirmed completion -3. CI_PASSING - All CI checks passed -4. TODO_COMPLETE - All TODO items marked complete -5. FILES_COMMITTED - Changes committed to git -""" - -import json -import subprocess -from dataclasses import dataclass -from enum import Enum -from pathlib import Path - - -class EvidenceType(Enum): - """Types of concrete evidence for work completion.""" - - PR_MERGED = "pr_merged" # PR merged successfully - USER_CONFIRMATION = "user_confirmation" # User explicitly confirmed - CI_PASSING = "ci_passing" # CI checks all passing - TODO_COMPLETE = "todo_complete" # All TODOs marked complete - FILES_COMMITTED = "files_committed" # Changes committed - - -@dataclass -class Evidence: - """Single piece of evidence for work completion.""" - - evidence_type: EvidenceType - verified: bool - details: str - confidence: float # 0.0 to 1.0 - - -class CompletionEvidenceChecker: - """Check for concrete evidence of work completion. - - Uses concrete signals (GitHub PR, filesystem, user confirmation) - to determine if work is complete BEFORE running expensive SDK analysis. - """ - - def __init__(self, project_root: Path): - """Initialize evidence checker. - - Args: - project_root: Path to project root directory - """ - self.project_root = project_root - - def check_pr_status(self) -> Evidence | None: - """Check if PR is merged using gh CLI. - - Returns: - Evidence object if check succeeds, None if gh CLI unavailable - """ - try: - # Check if gh CLI is available - result = subprocess.run( - ["gh", "pr", "view", "--json", "state,mergedAt"], - capture_output=True, - text=True, - cwd=self.project_root, - timeout=10, - ) - - if result.returncode != 0: - # No PR found or gh CLI error - return None - - data = json.loads(result.stdout) - state = data.get("state", "").upper() - merged_at = data.get("mergedAt") - - if state == "MERGED" and merged_at: - return Evidence( - evidence_type=EvidenceType.PR_MERGED, - verified=True, - details=f"PR merged at {merged_at}", - confidence=1.0, - ) - - return Evidence( - evidence_type=EvidenceType.PR_MERGED, - verified=False, - details=f"PR state: {state}", - confidence=0.0, - ) - - except (subprocess.TimeoutExpired, json.JSONDecodeError, FileNotFoundError): - # gh CLI unavailable or failed - fail-open - return None - - def check_user_confirmation(self, session_dir: Path) -> Evidence | None: - """Check if user explicitly confirmed completion. - - Args: - session_dir: Path to session directory - - Returns: - Evidence object if confirmation found, None otherwise - """ - confirmation_file = session_dir / "user_confirmed_complete" - - if confirmation_file.exists(): - try: - content = confirmation_file.read_text().strip() - return Evidence( - evidence_type=EvidenceType.USER_CONFIRMATION, - verified=True, - details=f"User confirmed: {content}", - confidence=1.0, - ) - except OSError: - pass - - return None - - def check_todo_completion(self, transcript_path: Path) -> Evidence: - """Check if all TODO items are marked complete. - - Args: - transcript_path: Path to session transcript - - Returns: - Evidence object with TODO completion status - """ - try: - # Load transcript - todos_found = False - todos_complete = 0 - todos_total = 0 - - with open(transcript_path) as f: - for line in f: - try: - entry = json.loads(line) - role = entry.get("role") - - # Look for TODO items in assistant messages - if role == "assistant": - content = entry.get("content", []) - if not isinstance(content, list): - continue - - for block in content: - if not isinstance(block, dict): - continue - - text = block.get("text", "") - if "[ ]" in text or "[x]" in text: - todos_found = True - # Count TODO items - todos_total += text.count("[ ]") + text.count("[x]") - todos_complete += text.count("[x]") - - except json.JSONDecodeError: - continue - - if not todos_found: - # No TODOs found - not applicable - return Evidence( - evidence_type=EvidenceType.TODO_COMPLETE, - verified=False, - details="No TODO items found", - confidence=0.0, - ) - - # Check if all TODOs complete - if todos_total > 0 and todos_complete == todos_total: - return Evidence( - evidence_type=EvidenceType.TODO_COMPLETE, - verified=True, - details=f"All {todos_total} TODO items complete", - confidence=0.8, - ) - - return Evidence( - evidence_type=EvidenceType.TODO_COMPLETE, - verified=False, - details=f"{todos_complete}/{todos_total} TODO items complete", - confidence=0.0, - ) - - except (OSError, json.JSONDecodeError): - # Fail-open - return Evidence( - evidence_type=EvidenceType.TODO_COMPLETE, - verified=False, - details="Error reading transcript", - confidence=0.0, - ) - - def check_ci_status(self) -> Evidence | None: - """Check if CI checks are passing using gh CLI. - - Returns: - Evidence object if check succeeds, None if gh CLI unavailable - """ - try: - # Check if gh CLI is available - result = subprocess.run( - ["gh", "pr", "view", "--json", "statusCheckRollup"], - capture_output=True, - text=True, - cwd=self.project_root, - timeout=10, - ) - - if result.returncode != 0: - # No PR found or gh CLI error - return None - - data = json.loads(result.stdout) - checks = data.get("statusCheckRollup", []) - - if not checks: - return None - - # Check if all checks passed - all_passed = all(check.get("conclusion") == "SUCCESS" for check in checks) - total_checks = len(checks) - passed_checks = sum(1 for c in checks if c.get("conclusion") == "SUCCESS") - - if all_passed: - return Evidence( - evidence_type=EvidenceType.CI_PASSING, - verified=True, - details=f"All {total_checks} CI checks passed", - confidence=0.9, - ) - - return Evidence( - evidence_type=EvidenceType.CI_PASSING, - verified=False, - details=f"{passed_checks}/{total_checks} CI checks passed", - confidence=0.0, - ) - - except (subprocess.TimeoutExpired, json.JSONDecodeError, FileNotFoundError): - # gh CLI unavailable or failed - fail-open - return None - - def check_files_committed(self) -> Evidence | None: - """Check if changes are committed to git. - - Returns: - Evidence object if check succeeds, None if git unavailable - """ - try: - # Check git status - result = subprocess.run( - ["git", "status", "--porcelain"], - capture_output=True, - text=True, - cwd=self.project_root, - timeout=5, - ) - - if result.returncode != 0: - return None - - # Check if working directory is clean - if not result.stdout.strip(): - return Evidence( - evidence_type=EvidenceType.FILES_COMMITTED, - verified=True, - details="Working directory clean, all changes committed", - confidence=0.7, - ) - - # Count uncommitted files - uncommitted_lines = result.stdout.strip().split("\n") - return Evidence( - evidence_type=EvidenceType.FILES_COMMITTED, - verified=False, - details=f"{len(uncommitted_lines)} files with uncommitted changes", - confidence=0.0, - ) - - except (subprocess.TimeoutExpired, FileNotFoundError): - # git unavailable - fail-open - return None - - -__all__ = [ - "CompletionEvidenceChecker", - "Evidence", - "EvidenceType", -] diff --git a/amplifier-bundle/tools/amplihack/hooks/error_protocol.py b/amplifier-bundle/tools/amplihack/hooks/error_protocol.py deleted file mode 100755 index b88f93169..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/error_protocol.py +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/env python3 -""" -Error protocol for hook system - structured error handling. - -Provides consistent error reporting across all hooks with severity levels -and structured error information. -""" - -from dataclasses import dataclass -from enum import Enum - - -class HookErrorSeverity(Enum): - """Severity levels for hook errors.""" - - WARNING = "warning" # Non-critical, operation can continue - ERROR = "error" # Error occurred but hook can fail-open - FATAL = "fatal" # Critical error, but still fail-open - - -@dataclass -class HookError: - """Structured error information for hooks. - - Attributes: - severity: Error severity level - message: Human-readable error message - context: Additional context about where/why error occurred - suggestion: Suggested action to resolve the error - """ - - severity: HookErrorSeverity - message: str - context: str | None = None - suggestion: str | None = None - - -class HookException(Exception): - """Exception raised by hooks with structured error information. - - This exception carries a HookError object that provides detailed, - structured information about the error for logging and user feedback. - """ - - def __init__(self, error: HookError): - self.error = error - super().__init__(error.message) - - -class HookImportError(HookException): - """Specialized exception for import failures in hooks.""" - - -class HookConfigError(HookException): - """Specialized exception for configuration errors in hooks.""" - - -class HookValidationError(HookException): - """Specialized exception for validation errors in hooks.""" - - -__all__ = [ - "HookError", - "HookErrorSeverity", - "HookException", - "HookImportError", - "HookConfigError", - "HookValidationError", -] diff --git a/amplifier-bundle/tools/amplihack/hooks/fallback_heuristics.py b/amplifier-bundle/tools/amplihack/hooks/fallback_heuristics.py deleted file mode 100755 index 5c88ec8d2..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/fallback_heuristics.py +++ /dev/null @@ -1,163 +0,0 @@ -#!/usr/bin/env python3 -""" -Fallback heuristics for power steering analysis. - -This module provides pattern-based fallback checks when Claude SDK is unavailable -or times out. It extracts consideration types from IDs and matches against -keyword patterns to determine if a failure was addressed. - -Philosophy: -- Ruthlessly Simple: Single-purpose pattern matching module -- Zero-BS: No stubs, every function works -- Modular: Self-contained brick with clear public API -- Fail-Open: Returns None when uncertain (better safe than sorry) - -Public API (the "studs"): - AddressedChecker: Main interface for checking if concerns addressed - HEURISTIC_PATTERNS: Pattern definitions for transparency -""" - -# Heuristic patterns by consideration type -HEURISTIC_PATTERNS = { - "todos": { - "keywords": ["todo"], - "completion_words": ["complete", "done", "finished", "mark"], - "evidence": "Delta contains TODO completion discussion", - }, - "testing": { - "keywords": [ - "tests pass", - "test suite", - "pytest", - "all tests", - "tests are passing", - "ran tests", - ], - "evidence": "Delta mentions test execution/results", - }, - "test": { - "keywords": [ - "tests pass", - "test suite", - "pytest", - "all tests", - "tests are passing", - "ran tests", - ], - "evidence": "Delta mentions test execution/results", - }, - "ci": { - "keywords": [ - "ci is", - "ci pass", - "build is green", - "checks pass", - "ci green", - "pipeline pass", - ], - "evidence": "Delta mentions CI status", - }, - "docs": { - "keywords": ["created doc", "added doc", "updated doc", ".md", "readme"], - "evidence": "Delta mentions documentation changes", - }, - "documentation": { - "keywords": ["created doc", "added doc", "updated doc", ".md", "readme"], - "evidence": "Delta mentions documentation changes", - }, - "investigation": { - "keywords": ["session summary", "investigation report", "findings", "documented"], - "evidence": "Delta mentions investigation artifacts", - }, - "workflow": { - "keywords": ["followed workflow", "workflow complete", "step", "pr ready"], - "evidence": "Delta mentions workflow completion", - }, - "philosophy": { - "keywords": ["philosophy", "compliance", "simplicity", "zero-bs", "no stubs"], - "evidence": "Delta mentions philosophy compliance", - }, - "review": { - "keywords": ["review", "reviewed", "feedback", "approved"], - "evidence": "Delta mentions review process", - }, -} - - -class AddressedChecker: - """Check if delta text addresses a specific consideration failure. - - Uses keyword-based heuristics to determine if new content shows - that a previous concern was addressed. - """ - - def __init__(self): - """Initialize the checker with default patterns.""" - self.patterns = HEURISTIC_PATTERNS - - def check_if_addressed(self, consideration_id: str, delta_text: str) -> str | None: - """Check if the delta addresses a specific failure. - - Args: - consideration_id: ID of the consideration (e.g., "todos-incomplete") - delta_text: All text from the delta to check - - Returns: - Evidence string if addressed, None otherwise - """ - # Extract type from consideration ID - consideration_type = self._extract_type(consideration_id) - if not consideration_type: - return None - - # Get pattern for this type - pattern = self.patterns.get(consideration_type) - if not pattern: - return None - - # Check if text matches pattern - text_lower = delta_text.lower() - - # Special handling for todos (needs both keyword and completion word) - if consideration_type == "todos": - if "todo" in text_lower and any( - word in text_lower for word in pattern["completion_words"] - ): - return pattern["evidence"] - return None - - # For other types, just check keywords - if self._matches_pattern(text_lower, pattern["keywords"]): - return pattern["evidence"] - - return None - - def _extract_type(self, consideration_id: str) -> str | None: - """Extract consideration type from ID. - - Args: - consideration_id: ID like "todos-incomplete" or "test-failures" - - Returns: - Type string (e.g., "todos", "test") or None if not found - """ - # Split on hyphen and take first part - parts = consideration_id.split("-") - if parts: - return parts[0].lower() - return None - - def _matches_pattern(self, text: str, keywords: list[str]) -> bool: - """Check if text matches any keyword in the list. - - Args: - text: Lowercased text to search - keywords: List of keyword phrases to look for - - Returns: - True if any keyword found, False otherwise - """ - return any(phrase in text for phrase in keywords) - - -__all__ = ["AddressedChecker", "HEURISTIC_PATTERNS"] diff --git a/amplifier-bundle/tools/amplihack/hooks/hook_processor.py b/amplifier-bundle/tools/amplihack/hooks/hook_processor.py deleted file mode 100755 index c11b08bb7..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/hook_processor.py +++ /dev/null @@ -1,406 +0,0 @@ -#!/usr/bin/env python3 -""" -Unified hook processor for Claude Code hooks. -Provides common functionality for all hook scripts. - -Hook Protocol Documentation: -https://docs.claude.com/en/docs/claude-code/hooks - -Response Protocol: -- Return {} for default behavior (no intervention) -- Return {"decision": "block", "reason": "..."} to intervene (Stop hooks) -- Return {"permissionDecision": "allow"/"deny"/"ask"} for permission (PreToolUse hooks) - -Graceful Pipe Closure: -- Automatically handles BrokenPipeError during output writes -- Absorbs EPIPE (errno 32) errors on stdout flush -- Prevents shutdown hangs when Claude Code closes pipes early -- See HOOK_BEHAVIOR.md for detailed documentation -""" - -import json -import os -import sys -import traceback -from abc import ABC, abstractmethod -from datetime import datetime -from pathlib import Path -from typing import Any - -from error_protocol import HookError, HookErrorSeverity, HookException -from json_protocol import RobustJSONParser -from shutdown_context import is_shutdown_in_progress - - -class HookProcessor(ABC): - """Base class for Claude Code hook processors. - - Handles common operations: - - JSON input/output from stdin/stdout - - Logging to runtime directory - - Error handling and graceful fallback - - Clean import structure - """ - - def __init__(self, hook_name: str): - """Initialize the hook processor. - - Args: - hook_name: Name of the hook (used for logging) - """ - self.hook_name = hook_name - - # Use clean import path resolution - try: - # Import after ensuring path is set up - sys.path.insert(0, str(Path(__file__).parent.parent)) - from paths import get_project_root - - self.project_root = get_project_root() - except ImportError: - # Fallback: try to find project root by looking for .claude marker - current = Path(__file__).resolve().parent - found_root: Path | None = None - - for _ in range(10): # Max 10 levels up - # Check old location (repo root) - if (current / ".claude").exists(): - found_root = current - break - # Check new location (package) - if (current / "src" / "amplihack" / ".claude").exists(): - found_root = current - break - if current == current.parent: - break - current = current.parent - - if found_root is None: - raise ValueError("Could not find project root with .claude marker") - - self.project_root = found_root - - # Find .claude directory (could be at root or in package) - claude_dir = self.project_root / ".claude" - if not claude_dir.exists(): - claude_dir = self.project_root / "src" / "amplihack" / ".claude" - if not claude_dir.exists(): - raise ValueError("Could not find .claude directory in expected locations") - - # Setup directories using found location - self.log_dir = claude_dir / "runtime" / "logs" - self.metrics_dir = claude_dir / "runtime" / "metrics" - self.analysis_dir = claude_dir / "runtime" / "analysis" - - # Create directories - self.log_dir.mkdir(parents=True, exist_ok=True) - self.metrics_dir.mkdir(parents=True, exist_ok=True) - self.analysis_dir.mkdir(parents=True, exist_ok=True) - - # Setup log file - self.log_file = self.log_dir / f"{hook_name}.log" - - def validate_path_containment(self, path: Path) -> Path: - """Validate that path stays within project boundaries. - - Args: - path: Path to validate - - Returns: - Resolved path if valid - - Raises: - ValueError: If path escapes project root - """ - resolved = path.resolve() - try: - # Check if path is within project root - resolved.relative_to(self.project_root) - return resolved - except ValueError: - raise ValueError(f"Path escapes project root: {path}") - - def log(self, message: str, level: str = "INFO"): - """Log a message to the hook's log file. - - Args: - message: Message to log - level: Log level (INFO, WARNING, ERROR, DEBUG) - """ - timestamp = datetime.now().isoformat() - try: - # Check log file size and rotate if needed (10MB limit) - if self.log_file.exists() and self.log_file.stat().st_size > 10 * 1024 * 1024: - # Rotate log file - backup = self.log_file.with_suffix( - f".{datetime.now().strftime('%Y%m%d_%H%M%S')}.log" - ) - self.log_file.rename(backup) - - with open(self.log_file, "a") as f: - f.write(f"[{timestamp}] {level}: {message}\n") - except Exception as e: - # If we can't log, at least try stderr - print(f"Logging error: {e}", file=sys.stderr) - - def read_input(self) -> dict[str, Any]: - """Read and parse JSON input from stdin. - - Returns: - Parsed JSON data as dictionary - - Raises: - json.JSONDecodeError: If input is not valid JSON - """ - # Skip stdin read during shutdown to avoid blocking on closed/detached stdin - if is_shutdown_in_progress(): - self.log("Skipping stdin read during shutdown", "DEBUG") - return {} - - # Try to read from stdin - try: - raw_input = sys.stdin.read() - if not raw_input.strip(): - return {} - - # Use RobustJSONParser for resilient parsing - parser = RobustJSONParser() - return parser.parse(raw_input) - except (AttributeError, OSError, ValueError) as e: - # stdin might be closed, detached, or invalid - self.log(f"stdin read failed: {e}", "DEBUG") - return {} - - def write_output(self, output: dict[str, Any]): - """Write JSON output to stdout with fail-open pipe closure handling. - - Silently absorbs BrokenPipeError and EPIPE (errno 32) when Claude Code - closes the pipe during shutdown. This prevents hangs while maintaining - clean exit. - - Philosophy: Fail-open gracefully - if the pipe is closed, our job is done. - - Args: - output: Dictionary to write as JSON - - Raises: - OSError: Only unexpected OS errors (non-EPIPE) are raised - """ - try: - json.dump(output, sys.stdout) - sys.stdout.write("\n") - sys.stdout.flush() - except BrokenPipeError: - pass # Claude Code closed pipe - our job is done - except OSError as e: - # EPIPE (errno 32) or IOError (errno None) - pipe closed during write - if e.errno in (32, None): - if e.errno is None: - self.log( - "OSError with errno=None during pipe write (expected during shutdown)", - "DEBUG", - ) - # Expected during normal shutdown - else: - raise # Unexpected OS error - - def save_metric(self, metric_name: str, value: Any, metadata: dict | None = None): - """Save a metric to the metrics directory. - - Args: - metric_name: Name of the metric - value: Metric value - metadata: Optional additional metadata - """ - metrics_file = self.metrics_dir / f"{self.hook_name}_metrics.jsonl" - - metric = { - "timestamp": datetime.now().isoformat(), - "metric": metric_name, - "value": value, - "hook": self.hook_name, - } - - if metadata: - metric["metadata"] = metadata - - try: - with open(metrics_file, "a") as f: - f.write(json.dumps(metric) + "\n") - except Exception as e: - self.log(f"Failed to save metric: {e}", "WARNING") - - def _write_error_to_stderr(self, error: HookError): - """Write structured error to stderr for user visibility. - - Args: - error: HookError containing structured error information - """ - print("=" * 60, file=sys.stderr) - print(f"HOOK ERROR: {self.hook_name}", file=sys.stderr) - print("=" * 60, file=sys.stderr) - print(f"Severity: {error.severity.value}", file=sys.stderr) - print(f"Error: {error.message}", file=sys.stderr) - - if error.context: - print(f"Context: {error.context}", file=sys.stderr) - - if error.suggestion: - print(f"\nSuggestion: {error.suggestion}", file=sys.stderr) - - # Use relative path to avoid disclosing full system paths - try: - relative_log_path = self.log_file.relative_to(self.project_root) - print(f"\nLog file: {relative_log_path}", file=sys.stderr) - except ValueError: - # Fallback if path is outside project root - print(f"\nLog file: {self.log_file.name}", file=sys.stderr) - - print("=" * 60, file=sys.stderr) - - @abstractmethod - def process(self, input_data: dict[str, Any]) -> dict[str, Any]: - """Process the hook input and return output. - - This method must be implemented by subclasses. - - Args: - input_data: Input data from Claude Code - - Returns: - Output data to return to Claude Code - """ - - def run(self): - """Main entry point for the hook processor. - - Handles the full lifecycle: - 1. Read input from stdin - 2. Process the input - 3. Write output to stdout - 4. Handle any errors gracefully (fail-open) - """ - try: - # Log start with version info - self.log(f"{self.hook_name} hook starting (Python {sys.version.split()[0]})") - - # Read input - input_data = self.read_input() - self.log(f"Received input with keys: {list(input_data.keys())}") - - # Log hook event name if available for debugging - if "hook_event_name" in input_data: - self.log(f"Event type: {input_data['hook_event_name']}") - - # Process - output = self.process(input_data) - - # Ensure output is a dict - if output is None: - output = {} - elif not isinstance(output, dict): - self.log(f"Warning: process() returned non-dict: {type(output)}", "WARNING") - output = {"result": output} - - # Log output structure for diagnostics - output_keys = list(output.keys()) - if output_keys: - self.log(f"Returning output with keys: {output_keys}") - else: - self.log("Returning empty output (allows default behavior)") - - # Write output - self.write_output(output) - self.log(f"{self.hook_name} hook completed successfully") - - except HookException as e: - # Structured hook error - log and fail-open - self.log(f"Hook error: {e.error.message}", "ERROR") - self.log(f"Severity: {e.error.severity.value}", "ERROR") - - # Write error to stderr for user visibility - self._write_error_to_stderr(e.error) - - # Fail-open: Return empty dict (allows default behavior) - self.write_output({}) - self.log("Failed open - returning empty dict", "INFO") - - except json.JSONDecodeError as e: - # JSON parse error - log and fail-open - self.log(f"Invalid JSON input: {e}", "ERROR") - - # Create structured error - error = HookError( - severity=HookErrorSeverity.ERROR, - message="Invalid JSON input from stdin", - context=str(e), - suggestion="Check hook input format", - ) - self._write_error_to_stderr(error) - - # Fail-open - self.write_output({}) - - except Exception as e: - # Unexpected error - log full traceback and fail-open - error_msg = f"Unexpected error in {self.hook_name}: {e}" - traceback_str = traceback.format_exc() - - self.log(error_msg, "ERROR") - self.log(f"Traceback: {traceback_str}", "ERROR") - - # Create structured error - error = HookError( - severity=HookErrorSeverity.FATAL, - message=str(e), - context=f"Hook: {self.hook_name}", - suggestion="Check log file for full traceback", - ) - self._write_error_to_stderr(error) - - # Show stack trace in debug mode - if os.getenv("AMPLIHACK_DEBUG"): - print("\nStack trace:", file=sys.stderr) - print(traceback_str, file=sys.stderr) - - # Fail-open: Return empty dict (allows default behavior) - self.write_output({}) - self.log("Failed open after unexpected error", "INFO") - - def get_session_id(self) -> str: - """Generate or retrieve a session ID. - - Returns: - Session ID based on timestamp - """ - # Include microseconds to prevent collisions - return datetime.now().strftime("%Y%m%d_%H%M%S_%f") - - def save_session_data(self, filename: str, data: Any): - """Save data to a session-specific file with path validation. - - Args: - filename: Name of the file (without path) - data: Data to save (will be JSON serialized if dict/list) - """ - # Validate filename to prevent path traversal - if ".." in filename or "/" in filename or "\\" in filename: - self.log(f"Invalid filename attempted: {filename}", "WARNING") - raise ValueError("Invalid filename - no path separators allowed") - - session_dir = self.log_dir / self.get_session_id() - session_dir.mkdir(parents=True, exist_ok=True, mode=0o700) # Restrict permissions - - file_path = session_dir / filename - - try: - if isinstance(data, (dict, list)): - with open(file_path, "w") as f: - json.dump(data, f, indent=2) - else: - with open(file_path, "w") as f: - f.write(str(data)) - - self.log(f"Saved session data to {filename}") - except Exception as e: - self.log(f"Failed to save session data: {e}", "WARNING") diff --git a/amplifier-bundle/tools/amplihack/hooks/json_protocol.py b/amplifier-bundle/tools/amplihack/hooks/json_protocol.py deleted file mode 100755 index 76ab85a8e..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/json_protocol.py +++ /dev/null @@ -1,84 +0,0 @@ -#!/usr/bin/env python3 -""" -JSON protocol for hook system - robust JSON parsing. - -Provides resilient JSON parsing with graceful error handling for -malformed or truncated JSON input. -""" - -import json -from typing import Any - - -class RobustJSONParser: - """Robust JSON parser that handles malformed input gracefully. - - Features: - - Handles truncated JSON - - Recovers from trailing commas - - Provides helpful error messages - - Never crashes on bad input - """ - - def parse(self, raw_input: str) -> dict[str, Any]: - """Parse JSON with error recovery. - - Args: - raw_input: Raw JSON string to parse - - Returns: - Parsed dictionary, or empty dict if parsing fails - - Raises: - json.JSONDecodeError: If JSON is invalid and cannot be recovered - """ - if not raw_input or not raw_input.strip(): - return {} - - # Try normal parse first - try: - return json.loads(raw_input) - except json.JSONDecodeError as e: - # Try to recover from common issues - recovered = self._attempt_recovery(raw_input, e) - if recovered is not None: - return recovered - # Re-raise original error if recovery failed - raise - - def _attempt_recovery( - self, raw_input: str, original_error: json.JSONDecodeError - ) -> dict[str, Any] | None: - """Attempt to recover from JSON parse errors. - - Args: - raw_input: Original input string - original_error: The original parse error - - Returns: - Recovered dict or None if recovery failed - """ - # Try removing trailing commas - if "," in raw_input: - try: - # Remove trailing commas before closing braces/brackets - fixed = raw_input.replace(",}", "}").replace(",]", "]") - return json.loads(fixed) - except json.JSONDecodeError: - pass - - # Try truncating at last valid character - if "Expecting" in str(original_error): - try: - # Find last complete JSON object - last_brace = raw_input.rfind("}") - if last_brace > 0: - truncated = raw_input[: last_brace + 1] - return json.loads(truncated) - except json.JSONDecodeError: - pass - - return None - - -__all__ = ["RobustJSONParser"] diff --git a/amplifier-bundle/tools/amplihack/hooks/post_tool_use.py b/amplifier-bundle/tools/amplihack/hooks/post_tool_use.py deleted file mode 100755 index 30b89ba1f..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/post_tool_use.py +++ /dev/null @@ -1,187 +0,0 @@ -#!/usr/bin/env python3 -""" -Claude Code hook for post tool use events. -Uses unified HookProcessor for common functionality. - -Uses extensible tool registry system for multiple tool hooks. -""" - -# Import the base processor -import sys -from pathlib import Path -from typing import Any - -sys.path.insert(0, str(Path(__file__).parent)) -from hook_processor import HookProcessor - -# Import tool registry for extensible hook system -try: - from tool_registry import aggregate_hook_results, get_global_registry - - TOOL_REGISTRY_AVAILABLE = True -except ImportError: - TOOL_REGISTRY_AVAILABLE = False - - -class PostToolUseHook(HookProcessor): - """Hook processor for post tool use events.""" - - def __init__(self): - super().__init__("post_tool_use") - self.strategy = None - self._setup_tool_hooks() - - def _setup_tool_hooks(self): - """Setup all tool hooks (context management, etc.).""" - if not TOOL_REGISTRY_AVAILABLE: - self.log("Tool registry not available - hooks disabled", "DEBUG") - return - - # Import and register context management hook - try: - sys.path.insert(0, str(Path(__file__).parent.parent)) - from context_automation_hook import register_context_hook - - register_context_hook() # Registers with global registry - self.log("Context management hook registered", "DEBUG") - except ImportError as e: - self.log(f"Context management hook not available: {e}", "DEBUG") - - # Future: Add more tool hooks here - # from other_tool_hook import register_other_hook - # register_other_hook() - - def save_tool_metric(self, tool_name: str, duration_ms: int | None = None): - """Save tool usage metric with structured data. - - Args: - tool_name: Name of the tool used - duration_ms: Duration in milliseconds (if available) - """ - metadata = {} - if duration_ms is not None: - metadata["duration_ms"] = duration_ms - - self.save_metric("tool_usage", tool_name, metadata) - - def process(self, input_data: dict[str, Any]) -> dict[str, Any]: - """Process post tool use event. - - Args: - input_data: Input from Claude Code - - Returns: - Empty dict or validation messages - """ - # Detect launcher and select strategy - self.strategy = self._select_strategy() - if self.strategy: - self.log(f"Using strategy: {self.strategy.__class__.__name__}") - # Check for strategy-specific post-tool handling - strategy_result = self.strategy.handle_post_tool_use(input_data) - if strategy_result: - self.log("Strategy provided custom post-tool handling") - return strategy_result - - # Extract tool information - tool_use = input_data.get("toolUse", {}) - tool_name = tool_use.get("name", "unknown") - - # Extract result if available (not currently used but could be useful) - result = input_data.get("result", {}) - - self.log(f"Tool used: {tool_name}") - - # Save metrics - could extract duration from result if available - duration_ms = None - if isinstance(result, dict): - # Some tools might include timing information - duration_ms = result.get("duration_ms") - - self.save_tool_metric(tool_name, duration_ms) - - # Check for specific tool types that might need validation - output = {} - if tool_name in ["Write", "Edit", "MultiEdit"]: - # Could add validation or checks here - # For example, check if edits were successful - if isinstance(result, dict) and result.get("error"): - self.log(f"Tool {tool_name} reported error: {result.get('error')}", "WARNING") - # Could return a suggestion or alert - output["metadata"] = { - "warning": f"Tool {tool_name} encountered an error", - "tool": tool_name, - } - - # Track high-level metrics - if tool_name == "Bash": - self.save_metric("bash_commands", 1) - elif tool_name in ["Read", "Write", "Edit", "MultiEdit"]: - self.save_metric("file_operations", 1) - elif tool_name in ["Grep", "Glob"]: - self.save_metric("search_operations", 1) - - # Execute registered tool hooks via registry - if TOOL_REGISTRY_AVAILABLE: - try: - registry = get_global_registry() - hook_results = registry.execute_hooks(input_data) - - # Aggregate results from all hooks - aggregated = aggregate_hook_results(hook_results) - - # Add warnings and metadata to output - if aggregated["warnings"] or aggregated["metadata"]: - if "metadata" not in output: - output["metadata"] = {} - - # Add aggregated metadata - for key, value in aggregated["metadata"].items(): - output["metadata"][key] = value - - # Add warnings list if present - if aggregated["warnings"]: - output["metadata"]["warnings"] = aggregated["warnings"] - - # Log actions taken by hooks - for action in aggregated["actions_taken"]: - self.log(f"Tool Hook Action: {action}", "INFO") - - # Log warnings - for warning in aggregated["warnings"]: - self.log(f"Tool Hook Warning: {warning}", "INFO") - - except Exception as e: - # Silently fail - don't interrupt user workflow - self.log(f"Tool registry error: {e}", "DEBUG") - - return output - - def _select_strategy(self): - """Detect launcher and select appropriate strategy.""" - try: - # Import adaptive components - sys.path.insert(0, str(self.project_root / "src" / "amplihack")) - from amplihack.context.adaptive.detector import LauncherDetector - from amplihack.context.adaptive.strategies import ClaudeStrategy, CopilotStrategy - - detector = LauncherDetector(self.project_root) - launcher_type = detector.detect() - - if launcher_type == "copilot": - return CopilotStrategy(self.project_root, self.log) - return ClaudeStrategy(self.project_root, self.log) - - except ImportError as e: - self.log(f"Adaptive strategy not available: {e}", "DEBUG") - return None - - -def main(): - """Entry point for the post tool use hook.""" - hook = PostToolUseHook() - hook.run() - - -if __name__ == "__main__": - main() diff --git a/amplifier-bundle/tools/amplihack/hooks/power_steering_checker.py b/amplifier-bundle/tools/amplihack/hooks/power_steering_checker.py deleted file mode 100755 index fd87800fe..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/power_steering_checker.py +++ /dev/null @@ -1,5035 +0,0 @@ -#!/usr/bin/env python3 -""" -Power-Steering Mode: Autonomous session completion verification. - -Analyzes session transcripts against 21 considerations to determine if work is -truly complete before allowing session termination. Blocks incomplete sessions -with actionable continuation prompts. - -Philosophy: -- Ruthlessly Simple: Single-purpose module with clear contract -- Fail-Open: Never block users due to bugs - always allow stop on errors -- Zero-BS: No stubs, every function works or doesn't exist -- Modular: Self-contained brick that plugs into stop hook - -Phase 1 (MVP) Implementation: -- Core module with top 5 critical checkers -- Basic semaphore mechanism -- Simple configuration -- Fail-open error handling - -Phase 4 (Performance) Implementation: -- Parallel SDK calls using asyncio.gather() -- Transcript loaded ONCE, shared across parallel workers -- All checks run (no early exit) for comprehensive feedback -- No caching (not applicable to session-specific analysis) - -Public API for testing: - is_disabled() - Standalone function to check if power-steering is disabled -""" - -import asyncio -import json -import os -import re -import signal -import sys -from collections.abc import Callable -from contextlib import contextmanager -from dataclasses import dataclass, field -from datetime import datetime -from pathlib import Path -from typing import Any, Literal, Optional - -import yaml - -# Clean import structure -sys.path.insert(0, str(Path(__file__).parent)) - -# Import git utilities for worktree detection -try: - from git_utils import get_shared_runtime_dir -except ImportError: - # Fallback if git_utils not available (fail-open) - def get_shared_runtime_dir(project_root: str | Path) -> str: - """Fallback implementation when git_utils is unavailable.""" - return str(Path(project_root) / ".claude" / "runtime") - - -# Try to import Claude SDK integration -try: - from claude_power_steering import ( - analyze_claims_sync, - analyze_consideration, - analyze_if_addressed_sync, - ) - - SDK_AVAILABLE = True -except ImportError: - SDK_AVAILABLE = False - -# Import turn-aware state management with delta analysis -try: - from power_steering_state import ( - DeltaAnalysisResult, - DeltaAnalyzer, - FailureEvidence, - PowerSteeringTurnState, - TurnStateManager, - ) - - TURN_STATE_AVAILABLE = True -except ImportError: - TURN_STATE_AVAILABLE = False - -# Try to import completion evidence module -try: - from completion_evidence import ( # type: ignore[import-not-found] - CompletionEvidenceChecker, - EvidenceType, - ) - - EVIDENCE_AVAILABLE = True -except ImportError: - EVIDENCE_AVAILABLE = False - -# Try to import compaction validator -try: - from compaction_validator import ( # type: ignore[import-not-found] - CompactionContext, - CompactionValidator, - ) - - COMPACTION_AVAILABLE = True -except ImportError: - COMPACTION_AVAILABLE = False - - # Create placeholder type for when module is unavailable - class CompactionContext: # type: ignore[no-redef] - def __init__(self) -> None: - self.has_compaction_event = False - - -# Security: Maximum transcript size to prevent memory exhaustion -MAX_TRANSCRIPT_LINES = 50000 # Limit transcript to 50K lines (~10-20MB typical) - -# Timeout hierarchy: HOOK_TIMEOUT (120s) > PARALLEL_TIMEOUT (60s) > CHECKER_TIMEOUT (25s) -# Individual checker execution budget (within parallel execution budget) -CHECKER_TIMEOUT = 25 - -# Parallel execution budget: All 21 checks complete in ~15-20s typically, 60s provides buffer -# Must be less than HOOK_TIMEOUT (120s) to avoid being killed by framework -PARALLEL_TIMEOUT = 60 - -# Public API (the "studs" for this brick) -__all__ = [ - "PowerSteeringChecker", - "PowerSteeringResult", - "CheckerResult", - "ConsiderationAnalysis", -] - - -def _write_with_retry(filepath: Path, data: str, mode: str = "w", max_retries: int = 3) -> None: - """Write file with exponential backoff for cloud sync resilience. - - Handles transient file I/O errors that can occur with cloud-synced directories - (iCloud, OneDrive, Dropbox, etc.) by retrying with exponential backoff. - - Args: - filepath: Path to file to write - data: Content to write - mode: File mode ('w' for write, 'a' for append) - max_retries: Maximum retry attempts (default: 3) - - Raises: - OSError: If all retries exhausted (fail-open: caller should handle) - """ - import time - - retry_delay = 0.1 - - for attempt in range(max_retries): - try: - filepath.parent.mkdir(parents=True, exist_ok=True) - if mode == "w": - filepath.write_text(data, encoding="utf-8") - else: # append mode - with open(filepath, mode, encoding="utf-8") as f: - f.write(data) - return # Success! - except OSError as e: - if e.errno == 5 and attempt < max_retries - 1: # Input/output error - if attempt == 0: - # Only warn on first retry - import sys - - sys.stderr.write( - "[Power Steering] File I/O error, retrying (may be cloud sync issue)\n" - ) - time.sleep(retry_delay) - retry_delay *= 2 # Exponential backoff - else: - raise # Give up after max retries or non-transient error - - -@contextmanager -def _timeout(seconds: int): - """Context manager for operation timeout. - - Args: - seconds: Timeout in seconds - - Raises: - TimeoutError: If operation exceeds timeout - """ - - def handler(signum, frame): - raise TimeoutError("Operation timed out") - - # Set alarm - old_handler = signal.signal(signal.SIGALRM, handler) - signal.alarm(seconds) - - try: - yield - finally: - signal.alarm(0) - signal.signal(signal.SIGALRM, old_handler) - - -@dataclass -class CheckerResult: - """Result from a single consideration checker.""" - - consideration_id: str - satisfied: bool - reason: str - severity: Literal["blocker", "warning"] - recovery_steps: list[str] = field(default_factory=list) # Optional recovery guidance - executed: bool = True # Whether this check was actually executed - - @property - def id(self) -> str: - """Alias for consideration_id for backward compatibility.""" - return self.consideration_id - - -@dataclass -class ConsiderationAnalysis: - """Results of analyzing all considerations.""" - - results: dict[str, CheckerResult] = field(default_factory=dict) - failed_blockers: list[CheckerResult] = field(default_factory=list) - failed_warnings: list[CheckerResult] = field(default_factory=list) - - @property - def has_blockers(self) -> bool: - """True if any blocker consideration failed.""" - return len(self.failed_blockers) > 0 - - def add_result(self, result: CheckerResult) -> None: - """Add result for a consideration.""" - self.results[result.consideration_id] = result - if not result.satisfied: - if result.severity == "blocker": - self.failed_blockers.append(result) - else: - self.failed_warnings.append(result) - - def group_by_category(self) -> dict[str, list[CheckerResult]]: - """Group failed considerations by category.""" - # For Phase 1, use simplified categories based on consideration ID prefix - grouped: dict[str, list[CheckerResult]] = {} - for result in self.failed_blockers + self.failed_warnings: - # Simple category derivation from ID - if "workflow" in result.consideration_id or "philosophy" in result.consideration_id: - category = "Workflow & Philosophy" - elif "testing" in result.consideration_id or "ci" in result.consideration_id: - category = "Testing & CI/CD" - else: - category = "Completion Checks" - - if category not in grouped: - grouped[category] = [] - grouped[category].append(result) - return grouped - - -@dataclass -class PowerSteeringRedirect: - """Record of a power-steering redirect (blocked session).""" - - redirect_number: int - timestamp: str # ISO format - failed_considerations: list[str] # IDs of failed checks - continuation_prompt: str - work_summary: str | None = None - - -@dataclass -class PowerSteeringResult: - """Final decision from power-steering analysis.""" - - decision: Literal["approve", "block"] - reasons: list[str] - continuation_prompt: str | None = None - summary: str | None = None - analysis: Optional["ConsiderationAnalysis"] = None # Full analysis results for visibility - is_first_stop: bool = False # True if this is the first stop attempt in session - evidence_results: list = field(default_factory=list) # Concrete evidence from Phase 1 - compaction_context: Any = None # Compaction diagnostics (CompactionContext if available) - considerations: list = field( - default_factory=list - ) # List of CheckerResult objects for visibility - - -class PowerSteeringChecker: - """Analyzes session completeness using consideration checkers. - - Phase 2 Implementation: - - All 21 considerations from YAML file - - User customization support - - Generic analyzer for flexible considerations - - Backward compatible with Phase 1 - - Fail-open error handling - """ - - # File extension constants for session type detection - CODE_FILE_EXTENSIONS = [ - ".py", - ".js", - ".ts", - ".tsx", - ".jsx", - ".java", - ".go", - ".rs", - ".c", - ".cpp", - ".h", - ] - DOC_FILE_EXTENSIONS = [".md", ".txt", ".rst", "readme", "changelog"] - CONFIG_FILE_EXTENSIONS = [".yml", ".yaml", ".json"] - TEST_COMMAND_PATTERNS = [ - "pytest", - "npm test", - "cargo test", - "go test", - "python -m pytest", - "python -m unittest", - "uvx --from", # Outside-in package testing (user-mandated) - "uvx --from git+", # Outside-in from branch - ] - # Broader validation patterns (config checks, smoke tests, linting) - # Note: python -c requires an additional content check (must contain - # import/open/load/parse/validate) to avoid accepting trivial no-ops - # like python -c "print('hello')". See _is_meaningful_validation(). - VALIDATION_COMMAND_PATTERNS = [ - "ruff check", # Linting - "mypy", # Type checking - "flake8", # Linting - ] - # These patterns require content validation via _is_meaningful_validation() - INLINE_VALIDATION_PATTERNS = [ - "python -c", # Inline validation (YAML, imports, smoke tests) - "node -e", # Inline JS validation - ] - - # Keywords that indicate simple housekeeping tasks (skip power-steering) - # When found in user messages, session is classified as SIMPLE and most - # considerations are skipped. These are routine maintenance tasks. - SIMPLE_TASK_KEYWORDS = [ - "cleanup", - "clean up", - "fetch", - "git fetch", - "git pull", - "pull latest", - "sync", - "update branch", - "rebase", - "git rebase", - "merge main", - "merge master", - "merge pr", - "merge the pr", - "merge this pr", - "merge it", - "review pr", - "review the pr", - "review and merge", - "approve and merge", - "workspace", - "stash", - "git stash", - "discard changes", - "reset", - "checkout", - "switch branch", - "list files", - "show status", - "git status", - "what's changed", - "what changed", - ] - - # Keywords that indicate investigation/troubleshooting sessions - # When found in early user messages, session is classified as INVESTIGATION - # regardless of tool usage patterns (fixes #1604) - # - # Note: Using substring matching, so shorter forms match longer variants: - # - "troubleshoot" matches "troubleshooting" - # - "diagnos" matches "diagnose", "diagnosis", "diagnosing" - # - "debug" matches "debugging" - INVESTIGATION_KEYWORDS = [ - "investigate", - "troubleshoot", - "diagnos", # matches diagnose, diagnosis, diagnosing - "analyze", - "analyse", - "research", - "explore", - "understand", - "figure out", - "why does", - "why is", - "how does", - "how is", - "what causes", - "what's causing", - "root cause", - "debug", - "explain", - ] - - # Phase 1 fallback: Hardcoded considerations (top 5 critical) - # Used when YAML file is missing or invalid - PHASE1_CONSIDERATIONS = [ - { - "id": "todos_complete", - "category": "Session Completion & Progress", - "question": "Were all TodoWrite task items marked as completed before the session ended?", - "severity": "blocker", - "checker": "_check_todos_complete", - }, - { - "id": "dev_workflow_complete", - "category": "Workflow Process Adherence", - "question": "Were all required DEFAULT_WORKFLOW steps completed this session, including requirements clarification, design, implementation, testing, and PR creation?", - "severity": "blocker", - "checker": "_check_dev_workflow_complete", - }, - { - "id": "philosophy_compliance", - "category": "Code Quality & Philosophy", - "question": "Does all code written this session comply with the zero-BS philosophy, meaning no TODO comments, no NotImplementedError stubs, no placeholder functions, and no unimplemented code paths?", - "severity": "blocker", - "checker": "_check_philosophy_compliance", - }, - { - "id": "local_testing", - "category": "Testing & Local Validation", - "question": "Did the agent run the test suite locally (e.g., pytest, npm test, cargo test) and confirm all tests passed before declaring the work complete?", - "severity": "blocker", - "checker": "_check_local_testing", - }, - { - "id": "ci_status", - "category": "CI/CD & Mergeability", - "question": "Are all GitHub Actions CI checks passing and the PR in a mergeable state, with no failing required checks or unresolved merge conflicts?", - "severity": "blocker", - "checker": "_check_ci_status", - }, - ] - - def __init__(self, project_root: Path | None = None): - """Initialize power-steering checker. - - Args: - project_root: Project root directory (auto-detected if None) - """ - # Auto-detect project root if not provided - if project_root is None: - project_root = self._detect_project_root() - - self.project_root = project_root - - # Use shared runtime directory for worktree support - # In worktrees, this resolves to main repo's .claude/runtime - # In main repos, this resolves to project_root/.claude/runtime - shared_runtime = get_shared_runtime_dir(str(project_root)) - self.runtime_dir = Path(shared_runtime) / "power-steering" - - self.config_path = ( - project_root / ".claude" / "tools" / "amplihack" / ".power_steering_config" - ) - self.considerations_path = ( - project_root / ".claude" / "tools" / "amplihack" / "considerations.yaml" - ) - - # Ensure runtime directory exists - try: - self.runtime_dir.mkdir(parents=True, exist_ok=True) - except OSError: - pass # Fail-open: Continue even if directory creation fails - - # Load configuration - self.config = self._load_config() - - # Load considerations from YAML (with Phase 1 fallback) - self.considerations = self._load_considerations_yaml() - - def _detect_project_root(self) -> Path: - """Auto-detect project root by finding .claude marker. - - Returns: - Project root path - - Raises: - ValueError: If project root cannot be found - """ - current = Path(__file__).resolve().parent - for _ in range(10): # Max 10 levels up - if (current / ".claude").exists(): - return current - if current == current.parent: - break - current = current.parent - - raise ValueError("Could not find project root with .claude marker") - - def _validate_config_integrity(self, config: dict) -> bool: - """Validate configuration integrity (security check). - - Args: - config: Loaded configuration - - Returns: - True if config is valid, False otherwise - """ - # Check required keys - if "enabled" not in config: - return False - - # Validate enabled is boolean - if not isinstance(config["enabled"], bool): - return False - - # Validate phase if present - if "phase" in config and not isinstance(config["phase"], int): - return False - - # Validate checkers_enabled if present - if "checkers_enabled" in config: - if not isinstance(config["checkers_enabled"], dict): - return False - # All values should be booleans - if not all(isinstance(v, bool) for v in config["checkers_enabled"].values()): - return False - - return True - - def _load_config(self) -> dict[str, Any]: - """Load configuration from file with defaults. - - Returns: - Configuration dictionary with defaults applied - """ - defaults = { - "enabled": True, # Enabled by default per user requirement - "version": "1.0.0", - "phase": 1, - "checkers_enabled": { - "todos_complete": True, - "dev_workflow_complete": True, - "philosophy_compliance": True, - "local_testing": True, - "ci_status": True, - }, - } - - # Try to load config file - try: - if self.config_path.exists(): - with open(self.config_path) as f: - user_config = json.load(f) - - # Validate config integrity before using - if not self._validate_config_integrity(user_config): - self._log("Config integrity check failed, using defaults", "WARNING") - return defaults - - # Merge with defaults - defaults.update(user_config) - except (OSError, json.JSONDecodeError) as e: - self._log(f"Config load error ({e}), using defaults", "WARNING") - # Fail-open: Use defaults on any error - - return defaults - - def _load_considerations_yaml(self) -> list[dict[str, Any]]: - """Load considerations from YAML file with fallback to Phase 1. - - Returns: - List of consideration dictionaries (from YAML or Phase 1 fallback) - """ - try: - # Check if YAML file exists in project root - if not self.considerations_path.exists(): - # Try fallback: Look in the same directory as this script (for testing) - script_dir = Path(__file__).parent.parent - fallback_yaml = script_dir / "considerations.yaml" - - if fallback_yaml.exists(): - self._log(f"Using fallback considerations from {fallback_yaml}", "INFO") - with open(fallback_yaml) as f: - yaml_data = yaml.safe_load(f) - else: - self._log("Considerations YAML not found, using Phase 1 fallback", "WARNING") - return self.PHASE1_CONSIDERATIONS - else: - # Load YAML from project root - with open(self.considerations_path) as f: - yaml_data = yaml.safe_load(f) - - # Validate YAML structure - if not isinstance(yaml_data, list): - self._log("Invalid YAML structure (not a list), using Phase 1 fallback", "ERROR") - return self.PHASE1_CONSIDERATIONS - - # Validate and filter considerations - valid_considerations = [] - for item in yaml_data: - if self._validate_consideration_schema(item): - valid_considerations.append(item) - else: - self._log( - f"Invalid consideration schema: {item.get('id', 'unknown')}", "WARNING" - ) - - if not valid_considerations: - self._log("No valid considerations in YAML, using Phase 1 fallback", "ERROR") - return self.PHASE1_CONSIDERATIONS - - self._log(f"Loaded {len(valid_considerations)} considerations from YAML", "INFO") - return valid_considerations - - except (OSError, yaml.YAMLError) as e: - # Fail-open: Use Phase 1 fallback on any error - self._log(f"Error loading YAML ({e}), using Phase 1 fallback", "ERROR") - return self.PHASE1_CONSIDERATIONS - - def _validate_consideration_schema(self, consideration: Any) -> bool: - """Validate consideration has required fields. - - Args: - consideration: Consideration dictionary to validate - - Returns: - True if valid, False otherwise - """ - if not isinstance(consideration, dict): - return False - - required_fields = ["id", "category", "question", "severity", "checker", "enabled"] - if not all(field in consideration for field in required_fields): - return False - - # Validate severity - if consideration["severity"] not in ["blocker", "warning"]: - return False - - # Validate enabled - if not isinstance(consideration["enabled"], bool): - return False - - # Validate applicable_session_types if present (optional field for backward compatibility) - if "applicable_session_types" in consideration: - if not isinstance(consideration["applicable_session_types"], list): - return False - - return True - - def check( - self, - transcript_path: Path | list[dict], - session_id: str, - progress_callback: Callable | None = None, - ) -> PowerSteeringResult: - """Main entry point - analyze transcript and make decision using two-phase verification. - - Phase 1: Check concrete evidence (GitHub, filesystem, user confirmation) - Phase 2: SDK analysis (only if no concrete evidence of completion) - Phase 3: Combine results (evidence can override SDK concerns) - - Args: - transcript_path: Path to session transcript JSONL file OR transcript list (for testing) - session_id: Unique session identifier - progress_callback: Optional callback for progress events (event_type, message, details) - - Returns: - PowerSteeringResult with decision and prompt/summary - """ - # Handle transcript list (testing interface) - if isinstance(transcript_path, list): - return self._check_with_transcript_list(transcript_path, session_id) - - # Initialize turn state tracking (outside try block for fail-open) - turn_state: PowerSteeringTurnState | None = None - turn_state_manager: TurnStateManager | None = None - - try: - # Emit start event - self._emit_progress(progress_callback, "start", "Starting power-steering analysis...") - - # 1. Check if disabled - if self._is_disabled(): - return PowerSteeringResult( - decision="approve", reasons=["disabled"], continuation_prompt=None, summary=None - ) - - # 2. Check semaphore (prevent recursion) - if self._already_ran(session_id): - return PowerSteeringResult( - decision="approve", - reasons=["already_ran"], - continuation_prompt=None, - summary=None, - ) - - # 3. Load transcript (with pre-compaction fallback - Issue #1962) - # Check if session was compacted - if so, use the FULL pre-compaction transcript - # instead of the truncated compacted version Claude Code provides - pre_compaction_path = self._get_pre_compaction_transcript(session_id) - compaction_detected = pre_compaction_path is not None - - if pre_compaction_path: - # Session was compacted - load the full transcript from pre-compaction save - self._emit_progress( - progress_callback, - "compaction_detected", - "Session compaction detected - using pre-compaction transcript", - {"pre_compaction_path": str(pre_compaction_path)}, - ) - transcript = self._load_pre_compaction_transcript(pre_compaction_path) - - # If pre-compaction loading failed, fall back to provided transcript - if not transcript: - self._log( - "Pre-compaction transcript load failed, falling back to provided transcript", - "WARNING", - ) - transcript = self._load_transcript(transcript_path) - compaction_detected = False # Reset since we couldn't use pre-compaction - else: - # No compaction or compaction data unavailable - use provided transcript - transcript = self._load_transcript(transcript_path) - - # 3b. Initialize turn state management (fail-open on import error) - if TURN_STATE_AVAILABLE: - turn_state_manager = TurnStateManager( - project_root=self.project_root, - session_id=session_id, - log=lambda msg, level="INFO": self._log(msg, level), - ) - turn_state = turn_state_manager.load_state() - turn_state = turn_state_manager.increment_turn(turn_state) - self._log( - f"Turn state: turn={turn_state.turn_count}, blocks={turn_state.consecutive_blocks}", - "INFO", - ) - - # 3c. Check auto-approve threshold BEFORE running analysis - should_approve, reason, escalation_msg = turn_state_manager.should_auto_approve( - turn_state - ) - - # Display escalation warning if approaching threshold (Issue #2196) - if escalation_msg: - self._log(escalation_msg, "WARNING") - self._emit_progress( - progress_callback, - "escalation_warning", - escalation_msg, - { - "blocks": turn_state.consecutive_blocks, - "threshold": PowerSteeringTurnState.MAX_CONSECUTIVE_BLOCKS, - }, - ) - - if should_approve: - self._log(f"Auto-approve triggered: {reason}", "INFO") - self._emit_progress( - progress_callback, - "auto_approve", - f"Auto-approving after {turn_state.consecutive_blocks} consecutive blocks", - {"reason": reason}, - ) - - # Reset state and approve - turn_state = turn_state_manager.record_approval(turn_state) - turn_state_manager.save_state(turn_state) - - return PowerSteeringResult( - decision="approve", - reasons=["auto_approve_threshold"], - continuation_prompt=None, - summary=f"Auto-approved: {reason}", - ) - - # 4. Detect session type for selective consideration application - session_type = self.detect_session_type(transcript) - self._log(f"Session classified as: {session_type}", "INFO") - self._emit_progress( - progress_callback, - "session_type", - f"Session type: {session_type}", - {"session_type": session_type}, - ) - - # 4b. Backward compatibility: Also check Q&A session (kept for compatibility) - if self._is_qa_session(transcript): - # Reset turn state on approval - if turn_state_manager and turn_state: - turn_state = turn_state_manager.record_approval(turn_state) - turn_state_manager.save_state(turn_state) - return PowerSteeringResult( - decision="approve", - reasons=["qa_session"], - continuation_prompt=None, - summary=None, - ) - - # 4c. State-based verification (Issue #1962 - robust fallback for post-compaction) - # When compaction is detected, supplement transcript analysis with actual state checks - # This provides ground truth even when transcript history is incomplete - if compaction_detected: - state_verification = self._verify_actual_state(session_id) - if state_verification.get("all_passing"): - self._log( - "State-based verification passed (PR mergeable, CI passing, branch current)", - "INFO", - ) - self._emit_progress( - progress_callback, - "state_verified", - "Work completion verified via state checks", - state_verification, - ) - # If all state checks pass, this is strong evidence of completion - # Store for decision-making but don't auto-approve yet (let evidence checker run too) - self._state_verification_passed = True - else: - self._state_verification_passed = False - else: - self._state_verification_passed = False - - # 4d. PHASE 1: Evidence-based verification (fail-fast on concrete completion signals) - if EVIDENCE_AVAILABLE: - try: - evidence_checker = CompletionEvidenceChecker(self.project_root) - evidence_results = [] - - # Check PR status (strongest evidence) - pr_evidence = evidence_checker.check_pr_status() - if pr_evidence: - evidence_results.append(pr_evidence) - - # If PR merged, work is definitely complete - if ( - pr_evidence.evidence_type == EvidenceType.PR_MERGED - and pr_evidence.verified - ): - self._log("PR merged - work complete (concrete evidence)", "INFO") - return PowerSteeringResult( - decision="approve", - reasons=["PR merged successfully"], - ) - - # Check user confirmation (escape hatch) - session_dir = ( - self.project_root / ".claude" / "runtime" / "power-steering" / session_id - ) - user_confirm = evidence_checker.check_user_confirmation(session_dir) - if user_confirm and user_confirm.verified: - evidence_results.append(user_confirm) - self._log("User confirmed completion - allowing stop", "INFO") - return PowerSteeringResult( - decision="approve", - reasons=["User explicitly confirmed work is complete"], - ) - - # Check TODO completion - todo_evidence = evidence_checker.check_todo_completion(transcript_path) - evidence_results.append(todo_evidence) - - # Store evidence for later use in Phase 3 - self._evidence_results = evidence_results - - except Exception as e: - # Fail-open: If evidence checking fails, continue to SDK analysis - self._log(f"Evidence checking failed (non-critical): {e}", "WARNING") - self._evidence_results = [] - - # 5. Analyze against considerations (filtered by session type) - analysis = self._analyze_considerations( - transcript, session_id, session_type, progress_callback - ) - - # 5b. Delta analysis: Check if NEW content addresses previous failures - addressed_concerns: dict[str, str] = {} - user_claims: list[str] = [] - delta_result: DeltaAnalysisResult | None = None - - if ( - TURN_STATE_AVAILABLE - and turn_state - and turn_state.block_history - and turn_state_manager - ): - # Get previous block's failures for delta analysis - previous_block = turn_state.get_previous_block() - if previous_block and previous_block.failed_evidence: - # Initialize delta analyzer for text extraction - delta_analyzer = DeltaAnalyzer(log=lambda msg: self._log(msg, "INFO")) - - # Get delta transcript (new messages since last block) - start_idx, end_idx = turn_state_manager.get_delta_transcript_range( - turn_state, len(transcript) - ) - delta_messages = transcript[start_idx:end_idx] - - self._log( - f"Delta analysis: {len(delta_messages)} new messages since last block", - "INFO", - ) - - # Extract delta text for LLM analysis - delta_text = delta_analyzer._extract_all_text(delta_messages) - - # Use LLM-based claim detection (replaces regex patterns) - if SDK_AVAILABLE and delta_text: - self._log("Using LLM-based claim detection", "DEBUG") - user_claims = analyze_claims_sync(delta_text, self.project_root) - else: - user_claims = [] - - # Use LLM-based address checking for each previous failure - if SDK_AVAILABLE and delta_text: - self._log("Using LLM-based address checking", "DEBUG") - for failure in previous_block.failed_evidence: - evidence = analyze_if_addressed_sync( - failure.consideration_id, - failure.reason, - delta_text, - self.project_root, - ) - if evidence: - addressed_concerns[failure.consideration_id] = evidence - else: - # Fallback to simple DeltaAnalyzer (heuristics) if SDK unavailable - delta_result = delta_analyzer.analyze_delta( - delta_messages, previous_block.failed_evidence - ) - addressed_concerns = delta_result.new_content_addresses_failures - if not user_claims: - user_claims = delta_result.new_claims_detected - - if addressed_concerns: - self._log( - f"Delta addressed {len(addressed_concerns)} concerns: " - f"{list(addressed_concerns.keys())}", - "INFO", - ) - if user_claims: - self._log(f"Detected {len(user_claims)} completion claims", "INFO") - - # 6. Check if this is first stop (visibility feature) - is_first_stop = not self._results_already_shown(session_id) - - # 7. Make decision based on first/subsequent stop - if analysis.has_blockers: - # Filter out addressed concerns from blockers - remaining_blockers = [ - r - for r in analysis.failed_blockers - if r.consideration_id not in addressed_concerns - ] - - # If all blockers were addressed, treat as passing - if not remaining_blockers and addressed_concerns: - self._log( - f"All {len(addressed_concerns)} blockers were addressed in this turn", - "INFO", - ) - analysis = self._create_passing_analysis(analysis, addressed_concerns) - - # Issue #1962: State-based override for post-compaction scenarios - # When session was compacted and state verification passed (PR mergeable, CI passing), - # trust actual state over potentially incomplete transcript analysis - elif ( - compaction_detected - and getattr(self, "_state_verification_passed", False) - and remaining_blockers - ): - self._log( - f"Post-compaction state override: {len(remaining_blockers)} transcript-based " - "blockers overridden by passing state verification (PR mergeable, CI passing)", - "INFO", - ) - self._emit_progress( - progress_callback, - "state_override", - f"Overriding {len(remaining_blockers)} blockers via state verification", - {"blockers_overridden": [r.consideration_id for r in remaining_blockers]}, - ) - # Create a passing analysis with note about state override - override_note = { - r.consideration_id: "Overridden by state verification (PR mergeable, CI passing)" - for r in remaining_blockers - } - analysis = self._create_passing_analysis(analysis, override_note) - - else: - # Actual failures - block - # Mark results shown on first stop to prevent race condition - if is_first_stop: - self._mark_results_shown(session_id) - - # Record block in turn state with full evidence - blockers_to_record = remaining_blockers or analysis.failed_blockers - - if turn_state_manager and turn_state: - # Convert CheckerResults to FailureEvidence - failed_evidence = self._convert_to_failure_evidence( - blockers_to_record, transcript, user_claims - ) - - # Issue #2196: Generate failure fingerprint for loop detection - failed_ids = [r.consideration_id for r in blockers_to_record] - current_fingerprint = turn_state.generate_failure_fingerprint(failed_ids) # type: ignore[attr-defined] - - # Add fingerprint to history - turn_state.failure_fingerprints.append(current_fingerprint) # type: ignore[attr-defined] - - # Check for loop (same failures repeating 3+ times) - if turn_state.detect_loop(current_fingerprint, threshold=3): # type: ignore[attr-defined] - self._log( - f"Loop detected: Same failures repeating (fingerprint={current_fingerprint})", - "WARNING", - ) - self._emit_progress( - progress_callback, - "loop_detected", - f"Same issues repeating {turn_state.failure_fingerprints.count(current_fingerprint)} times", # type: ignore[attr-defined] - {"fingerprint": current_fingerprint, "failed_ids": failed_ids}, - ) - - # Auto-approve to break loop (fail-open design) - turn_state = turn_state_manager.record_approval(turn_state) - turn_state_manager.save_state(turn_state) - - return PowerSteeringResult( - decision="approve", - reasons=["loop_detected"], - continuation_prompt=None, - summary=f"Loop detected: Same {len(failed_ids)} issues repeating. Auto-approved to prevent infinite loop.", - ) - - turn_state = turn_state_manager.record_block_with_evidence( - turn_state, failed_evidence, len(transcript), user_claims - ) - turn_state_manager.save_state(turn_state) - - failed_ids = [r.consideration_id for r in blockers_to_record] - - prompt = self._generate_continuation_prompt( - analysis, transcript, turn_state, addressed_concerns, user_claims - ) - - # Include formatted results in the prompt for visibility - results_text = self._format_results_text(analysis, session_type) - prompt_with_results = f"{prompt}\n{results_text}" - - # Save redirect record for session reflection - self._save_redirect( - session_id=session_id, - failed_considerations=failed_ids, - continuation_prompt=prompt_with_results, - work_summary=None, # Could be enhanced to extract work summary - ) - - return PowerSteeringResult( - decision="block", - reasons=failed_ids, - continuation_prompt=prompt_with_results, - summary=None, - analysis=analysis, - is_first_stop=is_first_stop, - ) - - # All checks passed (or all blockers were addressed) - # FIX (Issue #1744): Check if any checks were actually evaluated - # If all checks were skipped (no results), approve immediately without blocking - if len(analysis.results) == 0: - self._log( - "No power-steering checks applicable for session type - approving immediately", - "INFO", - ) - # Mark complete to prevent re-running - self._mark_complete(session_id) - self._emit_progress( - progress_callback, - "complete", - "Power-steering analysis complete - no applicable checks for session type", - ) - return PowerSteeringResult( - decision="approve", - reasons=["no_applicable_checks"], - continuation_prompt=None, - summary=None, - analysis=analysis, - is_first_stop=False, - ) - - if is_first_stop: - # FIRST STOP: Block to show results (visibility feature) - # Mark results shown AND complete immediately. - # Defense-in-depth for Issue #2548: if session_id lookup fails on the next stop, - # _already_ran() returning True prevents the visibility block from re-triggering. - self._mark_results_shown(session_id) - self._mark_complete(session_id) - self._log("First stop - blocking to display all results for visibility", "INFO") - self._emit_progress( - progress_callback, - "complete", - "Power-steering analysis complete - all checks passed (first stop - displaying results)", - ) - - # Format results for inclusion in continuation_prompt - # This ensures results are visible even when stderr is not shown - results_text = self._format_results_text(analysis, session_type) - - return PowerSteeringResult( - decision="block", - reasons=["first_stop_visibility"], - continuation_prompt=f"All power-steering checks passed! Please present these results to the user:\n{results_text}", - summary=None, - analysis=analysis, - # FIX (Issue #1744): Pass through calculated is_first_stop value - # This prevents infinite loop by allowing stop.py (line 132) to distinguish - # between first stop (display results) vs subsequent stops (don't block). - # Previously hardcoded to True, causing every stop to block indefinitely. - # NOTE: This was fixed in PR #1745; kept here for documentation. - is_first_stop=is_first_stop, - ) - - # SUBSEQUENT STOP: All checks passed, approve - # 8. Generate summary and mark complete - summary = self._generate_summary(transcript, analysis, session_id) - self._mark_complete(session_id) - self._write_summary(session_id, summary) - - # Reset turn state on approval - if turn_state_manager and turn_state: - turn_state = turn_state_manager.record_approval(turn_state) - turn_state_manager.save_state(turn_state) - - # Emit completion event - self._emit_progress( - progress_callback, - "complete", - "Power-steering analysis complete - all checks passed", - ) - - result = PowerSteeringResult( - decision="approve", - reasons=["all_considerations_satisfied"], - continuation_prompt=None, - summary=summary, - analysis=analysis, - is_first_stop=False, - ) - - # Add evidence to result if available - if hasattr(self, "_evidence_results"): - result.evidence_results = self._evidence_results - - return result - - except Exception as e: - # Fail-open: On any error, approve and log - self._log(f"Power-steering error (fail-open): {e}", "ERROR") - return PowerSteeringResult( - decision="approve", - reasons=["error_failopen"], - continuation_prompt=None, - summary=None, - ) - - def _evidence_suggests_complete(self, evidence_results: list) -> bool: - """Check if concrete evidence suggests work is complete. - - Args: - evidence_results: List of Evidence objects from Phase 1 - - Returns: - True if concrete evidence indicates completion - """ - if not evidence_results: - return False - - # Strong evidence types that indicate completion - strong_evidence = [ - EvidenceType.PR_MERGED, - EvidenceType.USER_CONFIRMATION, - EvidenceType.CI_PASSING, - ] - - # Check if any strong evidence is verified - for evidence in evidence_results: - if evidence.evidence_type in strong_evidence and evidence.verified: - return True - - # Check if multiple medium evidence types are verified - verified_count = sum(1 for e in evidence_results if e.verified) - - # If 3+ evidence types verified, trust concrete evidence - return verified_count >= 3 - - def _is_disabled(self) -> bool: - """Check if power-steering is disabled. - - Four-layer disable system (priority order): - 1. Semaphore file in CWD (highest - for worktree-specific disabling) - 2. Semaphore file in shared runtime (for disabling across all worktrees) - 3. Environment variable (medium) - 4. Config file (lowest) - - Worktree Support: - - Checks both CWD/.disabled and shared runtime directory for .disabled file - - This allows disabling power-steering either locally in a worktree - (worktree/.disabled) or globally for all worktrees - (main_repo/.claude/runtime/power-steering/.disabled) - - Returns: - True if disabled, False if enabled - """ - try: - # Check 1: Semaphore file directly in current working directory - # This allows worktree-specific disabling with simple `touch .disabled` - cwd_disabled = Path.cwd() / ".disabled" - if cwd_disabled.exists(): - return True - except (OSError, RuntimeError): - # Fail-open: If CWD check fails, continue to other checks - pass - - try: - # Check 2: Semaphore file in shared runtime directory - # This affects main repo and all worktrees - # Use get_shared_runtime_dir() dynamically to support test mocking - shared_runtime = Path(get_shared_runtime_dir(self.project_root)) - disabled_file = shared_runtime / "power-steering" / ".disabled" - if disabled_file.exists(): - return True - except (OSError, RuntimeError): - # Fail-open: If runtime dir check fails, continue to other checks - pass - - # Check 3: Environment variable - if os.getenv("AMPLIHACK_SKIP_POWER_STEERING"): - return True - - # Check 4: Config file - if not self.config.get("enabled", False): - return True - - return False - - def _validate_path(self, path: Path, allowed_parent: Path) -> bool: - """Validate path is safe to read (permissive for user files). - - Args: - path: Path to validate - allowed_parent: Parent directory path must be under (typically project root) - - Returns: - True if path is safe, False otherwise - - Note: - Allows paths in: - 1. Project root (backward compatibility) - 2. User's home directory (for Claude Code transcripts in ~/.claude/projects/) - 3. Common temp directories (/tmp, /var/tmp, system temp) - - Security: Power-steering only reads files (read-only operations). - Reading user-owned files is safe. No privilege escalation risk. - """ - import tempfile - - try: - # Resolve to absolute paths - path_resolved = path.resolve() - parent_resolved = allowed_parent.resolve() - - # Check 1: Path is within allowed parent (project root) - try: - path_resolved.relative_to(parent_resolved) - self._log("Path validated: within project root", "DEBUG") - return True - except ValueError: - pass # Not in project root, check other allowed locations - - # Check 2: Path is within user's home directory - # This allows Claude Code transcript paths like ~/.claude/projects/ - try: - home = Path.home().resolve() - path_resolved.relative_to(home) - self._log("Path validated: within user home directory", "DEBUG") - return True # In user's home - safe for read-only operations - except ValueError: - pass # Not in home directory, check temp directories - - # Check 3: Path is in common temp directories (for testing) - temp_dirs = [ - Path("/tmp"), - Path("/var/tmp"), - Path(tempfile.gettempdir()), # System temp dir - ] - - for temp_dir in temp_dirs: - try: - path_resolved.relative_to(temp_dir.resolve()) - self._log(f"Path validated: within temp directory {temp_dir}", "DEBUG") - return True # In temp directory - allow for testing - except ValueError: - continue - - # Not in any allowed locations - self._log( - f"Path validation failed: {path_resolved} not in project root, " - f"home directory, or temp directories", - "WARNING", - ) - return False - - except (OSError, RuntimeError) as e: - self._log(f"Path validation error: {e}", "ERROR") - return False - - def _already_ran(self, session_id: str) -> bool: - """Check if power-steering already ran for this session. - - Args: - session_id: Session identifier - - Returns: - True if already ran, False otherwise - """ - semaphore = self.runtime_dir / f".{session_id}_completed" - return semaphore.exists() - - def _get_pre_compaction_transcript(self, session_id: str) -> Path | None: - """Check if session was compacted and return pre-compaction transcript path. - - When Claude Code compacts a session, the transcript_path provided to hooks - only contains the compacted summary (~50 messages). The pre_compact.py hook - saves the FULL transcript before compaction. This method finds that saved - transcript to ensure power-steering analyzes complete session history. - - Args: - session_id: Session identifier - - Returns: - Path to pre-compaction transcript if available, None otherwise - - Note: - See Issue #1962: After compaction, power-steering only saw ~50 messages - instead of 767+ causing false "work incomplete" blocks. - """ - try: - # Check for compaction events in session logs - logs_dir = self.project_root / ".claude" / "runtime" / "logs" - session_dir = logs_dir / session_id - - if not session_dir.exists(): - return None - - # Check if compaction events exist - compaction_file = session_dir / "compaction_events.json" - if not compaction_file.exists(): - return None - - # Parse compaction events to get transcript path - try: - with open(compaction_file) as f: - compaction_events = json.load(f) - except (json.JSONDecodeError, OSError) as e: - self._log(f"Failed to read compaction events: {e}", "WARNING") - return None - - if not compaction_events: - return None - - # Get the most recent compaction event's transcript - # Events are appended chronologically, so last is most recent - latest_event = compaction_events[-1] - saved_transcript_path = latest_event.get("transcript_path") - - if not saved_transcript_path: - # Fallback: Look for standard transcript file locations - possible_paths = [ - session_dir / "CONVERSATION_TRANSCRIPT.md", - session_dir / "conversation_transcript.jsonl", - ] - - # Check transcripts subdirectory for timestamped copies - transcripts_dir = session_dir / "transcripts" - if transcripts_dir.exists(): - transcript_files = sorted( - transcripts_dir.glob("conversation_*.md"), reverse=True - ) - if transcript_files: - possible_paths.insert(0, transcript_files[0]) - - for path in possible_paths: - if path.exists(): - saved_transcript_path = str(path) - break - - if not saved_transcript_path: - self._log("Compaction detected but no transcript path found", "WARNING") - return None - - transcript_path = Path(saved_transcript_path) - - # Security: Validate path is within project - if not self._validate_path(transcript_path, self.project_root): - self._log( - f"Pre-compaction transcript path outside project: {transcript_path}", - "WARNING", - ) - return None - - if transcript_path.exists(): - messages_count = latest_event.get("messages_exported", "unknown") - self._log( - f"Using pre-compaction transcript ({messages_count} messages): {transcript_path}", - "INFO", - ) - return transcript_path - - self._log(f"Pre-compaction transcript not found: {transcript_path}", "WARNING") - return None - - except Exception as e: - # Fail-open: If we can't check for pre-compaction, continue with provided transcript - self._log(f"Pre-compaction transcript check failed: {e}", "WARNING") - return None - - def _load_pre_compaction_transcript(self, transcript_path: Path) -> list[dict]: - """Load pre-compaction transcript from markdown or JSONL format. - - The pre_compact.py hook saves transcripts in markdown format (CONVERSATION_TRANSCRIPT.md). - This method parses that format to extract message data for analysis. - - Args: - transcript_path: Path to pre-compaction transcript file - - Returns: - List of message dictionaries - - Note: - Handles both markdown format from pre_compact.py and JSONL format. - """ - messages = [] - - try: - content = transcript_path.read_text() - - # Detect format by extension and content - if transcript_path.suffix == ".jsonl" or content.strip().startswith("{"): - # JSONL format - parse line by line - for line in content.strip().split("\n"): - if line.strip(): - try: - messages.append(json.loads(line)) - except json.JSONDecodeError: - continue - else: - # Markdown format from context_preservation.py - # Parse conversation entries marked with roles - current_role = None - current_content = [] - - for line in content.split("\n"): - # Detect role headers like "## User" or "## Assistant" or "**User:**" - role_match = None - if ( - line.startswith("## User") - or "**User:**" in line - or line.startswith("### User") - ): - role_match = "user" - elif ( - line.startswith("## Assistant") - or "**Assistant:**" in line - or line.startswith("### Assistant") - ): - role_match = "assistant" - - if role_match: - # Save previous message if exists - if current_role and current_content: - messages.append( - { - "role": current_role, - "content": "\n".join(current_content).strip(), - } - ) - current_role = role_match - current_content = [] - elif current_role: - current_content.append(line) - - # Don't forget the last message - if current_role and current_content: - messages.append( - {"role": current_role, "content": "\n".join(current_content).strip()} - ) - - self._log(f"Loaded {len(messages)} messages from pre-compaction transcript", "INFO") - return messages - - except Exception as e: - self._log(f"Failed to load pre-compaction transcript: {e}", "WARNING") - return [] - - def _verify_actual_state(self, session_id: str) -> dict[str, Any]: - """Verify work completion by checking actual git/GitHub state. - - This provides ground truth verification independent of transcript analysis. - Used as robust fallback when session has been compacted (Issue #1962). - - Args: - session_id: Session identifier - - Returns: - Dict with verification results: - - ci_passing: bool - All CI checks passed - - pr_mergeable: bool - PR is in mergeable state - - branch_current: bool - Branch is up to date with main - - tests_local: bool - Local tests pass (if available) - - all_passing: bool - All checks passed - - Note: - Fail-open design: Returns False for individual checks on errors, - but doesn't block overall verification. - """ - import subprocess - - results = { - "ci_passing": False, - "pr_mergeable": False, - "branch_current": False, - "tests_local": None, # None = not checked - "all_passing": False, - "details": {}, - } - - try: - # 1. Check if there's an open PR for current branch - pr_result = subprocess.run( - ["gh", "pr", "view", "--json", "state,mergeable,statusCheckRollup"], - capture_output=True, - text=True, - timeout=10, - cwd=str(self.project_root), - ) - - if pr_result.returncode == 0: - try: - pr_data = json.loads(pr_result.stdout) - results["details"]["pr_state"] = pr_data.get("state") - results["details"]["mergeable"] = pr_data.get("mergeable") - - # Check PR is open and mergeable - if pr_data.get("state") == "OPEN": - results["pr_mergeable"] = pr_data.get("mergeable") == "MERGEABLE" - - # Check CI status - status_checks = pr_data.get("statusCheckRollup", []) - if status_checks: - # All checks must pass - all_success = all( - check.get("conclusion") in ("SUCCESS", "NEUTRAL", "SKIPPED") - for check in status_checks - if check.get("conclusion") # Ignore pending - ) - # At least some checks must have run - has_completed = any(check.get("conclusion") for check in status_checks) - results["ci_passing"] = all_success and has_completed - results["details"]["ci_checks"] = len(status_checks) - results["details"]["ci_conclusions"] = [ - check.get("conclusion") for check in status_checks - ] - else: - # No status checks configured - consider CI passing - results["ci_passing"] = True - results["details"]["ci_checks"] = 0 - - except json.JSONDecodeError: - self._log("Failed to parse PR data", "WARNING") - else: - # No PR found - check if we're on main branch (might be direct work) - results["details"]["no_pr"] = True - - # 2. Check if branch is up to date with main/master - # Get commits behind main - for main_branch in ["origin/main", "origin/master"]: - behind_result = subprocess.run( - ["git", "rev-list", "--count", f"HEAD..{main_branch}"], - capture_output=True, - text=True, - timeout=5, - cwd=str(self.project_root), - ) - if behind_result.returncode == 0: - commits_behind = int(behind_result.stdout.strip()) - results["branch_current"] = commits_behind == 0 - results["details"]["commits_behind"] = commits_behind - results["details"]["main_branch"] = main_branch - break - - # 3. Determine overall result - # For state verification to pass, we need PR mergeable AND CI passing - # (branch_current is informational but not blocking) - if results.get("pr_mergeable") and results.get("ci_passing"): - results["all_passing"] = True - elif results.get("details", {}).get("no_pr"): - # No PR - check if on main branch directly - results["all_passing"] = results.get("branch_current", False) - - self._log( - f"State verification: ci={results['ci_passing']}, " - f"mergeable={results['pr_mergeable']}, current={results['branch_current']}", - "INFO", - ) - - except subprocess.TimeoutExpired: - self._log("State verification timed out", "WARNING") - except FileNotFoundError: - self._log("gh or git command not found for state verification", "WARNING") - except Exception as e: - self._log(f"State verification failed: {e}", "WARNING") - - return results - - def _results_already_shown(self, session_id: str) -> bool: - """Check if power-steering results were already shown for this session. - - Used for the "always block first" visibility feature. On first stop, - we always block to show results. On subsequent stops, we only block - if there are actual failures. - - Args: - session_id: Session identifier - - Returns: - True if results were already shown, False otherwise - """ - semaphore = self.runtime_dir / f".{session_id}_results_shown" - return semaphore.exists() - - def _mark_results_shown(self, session_id: str) -> None: - """Create semaphore to indicate results have been shown. - - Called after displaying all consideration results on first stop. - - Args: - session_id: Session identifier - """ - try: - semaphore = self.runtime_dir / f".{session_id}_results_shown" - semaphore.parent.mkdir(parents=True, exist_ok=True) - semaphore.touch() - semaphore.chmod(0o600) # Owner read/write only for security - except OSError: - pass # Fail-open: Continue even if semaphore creation fails - - def _mark_complete(self, session_id: str) -> None: - """Create semaphore to prevent re-running. - - Args: - session_id: Session identifier - """ - try: - semaphore = self.runtime_dir / f".{session_id}_completed" - semaphore.parent.mkdir(parents=True, exist_ok=True) - semaphore.touch() - semaphore.chmod(0o600) # Owner read/write only for security - except OSError: - pass # Fail-open: Continue even if semaphore creation fails - - def _get_redirect_file(self, session_id: str) -> Path: - """Get path to redirects file for a session. - - Args: - session_id: Session identifier - - Returns: - Path to redirects.jsonl file - """ - session_dir = self.runtime_dir / session_id - return session_dir / "redirects.jsonl" - - def _load_redirects(self, session_id: str) -> list[PowerSteeringRedirect]: - """Load redirect history for a session. - - Args: - session_id: Session identifier - - Returns: - List of PowerSteeringRedirect objects (empty if none exist) - """ - redirects_file = self._get_redirect_file(session_id) - - if not redirects_file.exists(): - return [] - - redirects = [] - try: - with open(redirects_file) as f: - for line in f: - line = line.strip() - if not line: - continue - try: - data = json.loads(line) - redirect = PowerSteeringRedirect( - redirect_number=data["redirect_number"], - timestamp=data["timestamp"], - failed_considerations=data["failed_considerations"], - continuation_prompt=data["continuation_prompt"], - work_summary=data.get("work_summary"), - ) - redirects.append(redirect) - except (json.JSONDecodeError, KeyError) as e: - self._log(f"Skipping malformed redirect entry: {e}", "WARNING") - continue - except OSError as e: - self._log(f"Error loading redirects: {e}", "WARNING") - return [] - - return redirects - - def _save_redirect( - self, - session_id: str, - failed_considerations: list[str], - continuation_prompt: str, - work_summary: str | None = None, - ) -> None: - """Save a redirect record to persistent storage. - - Args: - session_id: Session identifier - failed_considerations: List of failed consideration IDs - continuation_prompt: The prompt shown to user - work_summary: Optional summary of work done so far - """ - try: - # Load existing redirects to get next number - existing = self._load_redirects(session_id) - redirect_number = len(existing) + 1 - - # Create redirect record - redirect = PowerSteeringRedirect( - redirect_number=redirect_number, - timestamp=datetime.now().isoformat(), - failed_considerations=failed_considerations, - continuation_prompt=continuation_prompt, - work_summary=work_summary, - ) - - # Save to JSONL file (append-only) - redirects_file = self._get_redirect_file(session_id) - redirects_file.parent.mkdir(parents=True, exist_ok=True) - - # Convert to dict for JSON serialization - redirect_dict = { - "redirect_number": redirect.redirect_number, - "timestamp": redirect.timestamp, - "failed_considerations": redirect.failed_considerations, - "continuation_prompt": redirect.continuation_prompt, - "work_summary": redirect.work_summary, - } - - with open(redirects_file, "a") as f: - f.write(json.dumps(redirect_dict) + "\n") - - # Set permissions on new file - if redirect_number == 1: - redirects_file.chmod(0o600) # Owner read/write only for security - - self._log(f"Saved redirect #{redirect_number} for session {session_id}", "INFO") - - except OSError as e: - # Fail-open: Don't block user if we can't save redirect - self._log(f"Failed to save redirect: {e}", "ERROR") - - def _load_transcript(self, transcript_path: Path) -> list[dict]: - """Load transcript from JSONL file with size limits. - - Args: - transcript_path: Path to transcript file - - Returns: - List of message dictionaries (truncated if exceeds MAX_TRANSCRIPT_LINES) - - Raises: - OSError: If file cannot be read - json.JSONDecodeError: If JSONL is malformed - ValueError: If transcript path is outside project root (security check) - - Note: - Transcripts exceeding MAX_TRANSCRIPT_LINES are truncated to prevent - memory exhaustion. A warning is logged when truncation occurs. - """ - # Security: Validate transcript path is within project root - if not self._validate_path(transcript_path, self.project_root): - raise ValueError( - f"Transcript path {transcript_path} is outside project root {self.project_root}" - ) - - messages = [] - truncated = False - - with open(transcript_path) as f: - for line_num, line in enumerate(f, 1): - # Security: Enforce maximum transcript size - if line_num > MAX_TRANSCRIPT_LINES: - truncated = True - break - - line = line.strip() - if not line: - continue - messages.append(json.loads(line)) - - if truncated: - self._log( - f"Transcript truncated at {MAX_TRANSCRIPT_LINES} lines (original: {line_num})", - "WARNING", - ) - - return messages - - def _has_development_indicators( - self, - code_files_modified: bool, - test_executions: int, - pr_dev_operations: bool, - ) -> bool: - """Check if transcript shows development indicators. - - Args: - code_files_modified: Whether code files were modified - test_executions: Number of test executions - pr_dev_operations: Whether PR creation/edit operations were performed - (PR view/merge/review are ops, not development signals) - - Returns: - True if development indicators present - """ - return code_files_modified or test_executions > 0 or pr_dev_operations - - def _has_informational_indicators( - self, - write_edit_operations: int, - read_grep_operations: int, - question_count: int, - user_messages: list[dict], - ) -> bool: - """Check if transcript shows informational session indicators. - - Args: - write_edit_operations: Number of Write/Edit operations - read_grep_operations: Number of Read/Grep operations - question_count: Number of questions in user messages - user_messages: List of user message dicts - - Returns: - True if informational indicators present - """ - # No tool usage or only Read tools with high question density - if write_edit_operations == 0: - if read_grep_operations <= 1 and question_count > 0: - # High question density indicates INFORMATIONAL - if user_messages and question_count / len(user_messages) > 0.5: - return True - return False - - def _has_maintenance_indicators( - self, - write_edit_operations: int, - doc_files_only: bool, - git_operations: bool, - code_files_modified: bool, - ) -> bool: - """Check if transcript shows maintenance indicators. - - Args: - write_edit_operations: Number of Write/Edit operations - doc_files_only: Whether only doc files were modified - git_operations: Whether git operations were performed - code_files_modified: Whether code files were modified - - Returns: - True if maintenance indicators present - """ - # Only doc/config files modified - if write_edit_operations > 0 and doc_files_only: - return True - - # Git operations without code changes - if git_operations and not code_files_modified and write_edit_operations == 0: - return True - - return False - - def _has_investigation_indicators( - self, - read_grep_operations: int, - write_edit_operations: int, - ) -> bool: - """Check if transcript shows investigation indicators. - - Args: - read_grep_operations: Number of Read/Grep operations - write_edit_operations: Number of Write/Edit operations - - Returns: - True if investigation indicators present - """ - # Multiple Read/Grep without modifications - return read_grep_operations >= 2 and write_edit_operations == 0 - - def _has_simple_task_keywords(self, transcript: list[dict]) -> bool: - """Check user messages for simple housekeeping task keywords. - - Simple tasks like "cleanup workspace", "fetch latest", "git pull" should - skip most power-steering checks as they are routine maintenance. - - Args: - transcript: List of message dictionaries - - Returns: - True if simple task keywords found in user messages - """ - # Check first 3 user messages for simple task keywords - user_messages = [m for m in transcript if m.get("type") == "user"][:3] - - if not user_messages: - return False - - for msg in user_messages: - content = str(msg.get("message", {}).get("content", "")).lower() - - # Check for simple task keywords - for keyword in self.SIMPLE_TASK_KEYWORDS: - if keyword in content: - self._log( - f"Simple task keyword '{keyword}' found in user message", - "DEBUG", - ) - return True - - return False - - def _has_investigation_keywords(self, transcript: list[dict]) -> bool: - """Check early user messages for investigation/troubleshooting keywords. - - This check takes PRIORITY over tool-based heuristics. If investigation - keywords are found, the session is classified as INVESTIGATION regardless - of what tools were used. This fixes #1604 where troubleshooting sessions - were incorrectly blocked by development-specific checks. - - Args: - transcript: List of message dictionaries - - Returns: - True if investigation keywords found in early user messages - """ - # Check first 5 user messages for investigation keywords - user_messages = [m for m in transcript if m.get("type") == "user"][:5] - - if not user_messages: - return False - - for msg in user_messages: - content = str(msg.get("message", {}).get("content", "")).lower() - - # Check for investigation keywords - for keyword in self.INVESTIGATION_KEYWORDS: - if keyword in content: - self._log( - f"Investigation keyword '{keyword}' found in user message", - "DEBUG", - ) - return True - - return False - - def detect_session_type(self, transcript: list[dict]) -> str: - """Detect session type for selective consideration application. - - Session Types: - - SIMPLE: Routine housekeeping tasks (cleanup, fetch, sync) - skip most checks - - DEVELOPMENT: Code changes, tests, PR operations - - INFORMATIONAL: Q&A, help queries, capability questions - - MAINTENANCE: Documentation and configuration updates only - - INVESTIGATION: Exploration, analysis, troubleshooting, and debugging - - Detection Priority (UPDATED for Issue #2196): - 1. Environment override (AMPLIHACK_SESSION_TYPE) - 2. Simple task keywords (cleanup, fetch, workspace) - highest priority heuristic - 3. Tool usage patterns (code changes, tests, etc.) - CONCRETE EVIDENCE - 4. Investigation keywords in user messages - TIEBREAKER ONLY - - Tool usage patterns now take priority over keywords because they provide - concrete evidence of the session's actual work. Keywords like "analyze and fix" - are ambiguous, but Write/Edit tools with code changes are definitive signals - of DEVELOPMENT work. Investigation keywords are only checked as a fallback - when tool patterns are ambiguous (fixes #2196). - - Args: - transcript: List of message dictionaries - - Returns: - Session type string: "SIMPLE", "DEVELOPMENT", "INFORMATIONAL", "MAINTENANCE", or "INVESTIGATION" - """ - # Check for environment override first - env_override = os.getenv("AMPLIHACK_SESSION_TYPE", "").upper() - if env_override in [ - "SIMPLE", - "DEVELOPMENT", - "INFORMATIONAL", - "MAINTENANCE", - "INVESTIGATION", - ]: - self._log(f"Session type overridden by environment: {env_override}", "INFO") - return env_override - - # Empty transcript defaults to INFORMATIONAL (fail-open) - if not transcript: - return "INFORMATIONAL" - - # HIGHEST PRIORITY: Simple task keywords (cleanup, fetch, sync, workspace) - # These routine maintenance tasks should skip most power-steering checks - if self._has_simple_task_keywords(transcript): - self._log("Session classified as SIMPLE via keyword detection", "INFO") - return "SIMPLE" - - # Collect indicators from transcript BEFORE keyword checking - # Tool usage patterns are stronger signals than keywords (fixes #2196) - code_files_modified = False - doc_files_only = True - write_edit_operations = 0 - read_grep_operations = 0 - test_executions = 0 - pr_dev_operations = False # PR creation/edit (development signals) - git_operations = False - - # Count questions in user messages for INFORMATIONAL detection - user_messages = [m for m in transcript if m.get("type") == "user"] - question_count = 0 - if user_messages: - for msg in user_messages: - content = str(msg.get("message", {}).get("content", "")) - question_count += content.count("?") - - # Analyze tool usage - for msg in transcript: - if msg.get("type") == "assistant" and "message" in msg: - content = msg["message"].get("content", []) - if not isinstance(content, list): - content = [content] - for block in content: - if isinstance(block, dict) and block.get("type") == "tool_use": - tool_name = block.get("name", "") - tool_input = block.get("input", {}) - - # Write/Edit operations - if tool_name in ["Write", "Edit"]: - write_edit_operations += 1 - file_path = tool_input.get("file_path", "") - - # Check if code file using class constant (use endswith to avoid false positives) - if any(file_path.endswith(ext) for ext in self.CODE_FILE_EXTENSIONS): - code_files_modified = True - doc_files_only = False - - # Check if doc file using class constants (use endswith or special names) - is_doc_file = any( - file_path.endswith(ext) if ext.startswith(".") else ext in file_path - for ext in self.DOC_FILE_EXTENSIONS - ) - is_config_file = any( - file_path.endswith(ext) for ext in self.CONFIG_FILE_EXTENSIONS - ) - - if not is_doc_file and not is_config_file: - doc_files_only = False - - # Read/Grep operations (investigation indicators) - elif tool_name in ["Read", "Grep", "Glob"]: - read_grep_operations += 1 - - # Test execution - elif tool_name == "Bash": - command = tool_input.get("command", "") - # Test patterns using class constant - if any(pattern in command for pattern in self.TEST_COMMAND_PATTERNS): - test_executions += 1 - - # PR operations - distinguish dev (create/edit) from ops (view/merge/review) - if "gh pr create" in command or "gh pr edit" in command: - pr_dev_operations = True - elif "gh pr" in command: - pass # view/merge/checks/diff/ready - ops, not development - - # Git operations - if "git commit" in command or "git push" in command: - git_operations = True - - # Decision logic (REFINED for Issue #2196): - # 1. Investigation keywords checked early BUT can be overridden by CODE modifications - # 2. CODE modifications (code files) take priority → DEVELOPMENT - # 3. NON-CODE modifications (docs, configs, git) DON'T override investigation keywords - # 4. Default to INFORMATIONAL (fail-open) - - # Check for investigation keywords early - has_investigation_keywords = self._has_investigation_keywords(transcript) - - # DEVELOPMENT: CODE modifications override investigation keywords (fixes #2196) - # Only override keywords if we have actual CODE file modifications - # Doc/config updates or git operations should NOT override investigation keywords - # PR ops (view/merge/review/checks) are NOT development signals (fixes #2563) - if code_files_modified or test_executions > 0 or pr_dev_operations: - # Strong signal: Write/Edit of CODE files, tests run, PR creation/editing - self._log("Session classified as DEVELOPMENT via CODE modification patterns", "INFO") - return "DEVELOPMENT" - - # INVESTIGATION: Keywords found and NO code modifications - # This handles "investigate X", "how does X work", "troubleshoot Y" with: - # - No tools (pure questions) - # - Doc/config updates only (documenting findings) - # - Git operations only (committing investigation notes) - if has_investigation_keywords: - self._log( - "Session classified as INVESTIGATION via keywords (no code modifications)", "INFO" - ) - return "INVESTIGATION" - - # INFORMATIONAL: No tool usage or only Read tools with high question density - # Questions without investigation keywords - if self._has_informational_indicators( - write_edit_operations, read_grep_operations, question_count, user_messages - ): - return "INFORMATIONAL" - - # INVESTIGATION: Tool-based heuristics (Read/Grep without modifications) - # Catches investigation sessions that don't have explicit keywords - if self._has_investigation_indicators(read_grep_operations, write_edit_operations): - self._log("Session classified as INVESTIGATION via tool usage patterns", "INFO") - return "INVESTIGATION" - - # MAINTENANCE: Only doc/config files modified OR git operations without code changes - if self._has_maintenance_indicators( - write_edit_operations, doc_files_only, git_operations, code_files_modified - ): - return "MAINTENANCE" - - # Default to INFORMATIONAL if unclear (fail-open, conservative) - return "INFORMATIONAL" - - def get_applicable_considerations(self, session_type: str) -> list[dict[str, Any]]: - """Get considerations applicable to a specific session type. - - Args: - session_type: Session type ("SIMPLE", "DEVELOPMENT", "INFORMATIONAL", "MAINTENANCE", "INVESTIGATION") - - Returns: - List of consideration dictionaries applicable to this session type - """ - # SIMPLE sessions skip ALL considerations - they are routine maintenance tasks - # like cleanup, fetch, sync, workspace management that don't need verification - if session_type == "SIMPLE": - self._log("SIMPLE session - skipping all considerations", "INFO") - return [] - - # Filter considerations based on session type - applicable = [] - - for consideration in self.considerations: - # Check if consideration has applicable_session_types field - applicable_types = consideration.get("applicable_session_types", []) - - # If no field or empty, check if this is Phase 1 fallback - if not applicable_types: - # Phase 1 considerations (no applicable_session_types field) - # Only apply to DEVELOPMENT sessions by default - if session_type == "DEVELOPMENT": - applicable.append(consideration) - continue - - # Check if this session type is in the list - if session_type in applicable_types or "*" in applicable_types: - applicable.append(consideration) - - return applicable - - def _is_qa_session(self, transcript: list[dict]) -> bool: - """Detect if session is interactive Q&A (skip power-steering). - - Heuristics: - 1. No tool calls (no file operations) - 2. High question count in user messages - 3. Short session (< 5 turns) - - Args: - transcript: List of message dictionaries - - Returns: - True if Q&A session, False otherwise - """ - # Count tool uses - check for tool_use blocks in assistant messages - # Note: We check both 'type' field and 'name' field because transcript - # format can vary between direct tool_use blocks and nested formats - tool_uses = 0 - for msg in transcript: - if msg.get("type") == "assistant" and "message" in msg: - content = msg["message"].get("content", []) - if not isinstance(content, list): - content = [content] - for block in content: - if isinstance(block, dict): - # Check for tool_use type OR presence of name field (tool indicator) - if block.get("type") == "tool_use" or ( - "name" in block and block.get("name") - ): - tool_uses += 1 - - # If we have substantial tool usage, not Q&A - if tool_uses >= 2: - return False - - # If no tool uses, check for Q&A pattern - if tool_uses == 0: - # Count user messages with questions - user_messages = [m for m in transcript if m.get("type") == "user"] - if len(user_messages) == 0: - return True # No user messages = skip - - questions = sum( - 1 for m in user_messages if "?" in str(m.get("message", {}).get("content", "")) - ) - - # If >50% of user messages are questions, likely Q&A - if questions / len(user_messages) > 0.5: - return True - - # Short sessions with few tools = likely Q&A - if len(transcript) < 5 and tool_uses < 2: - return True - - return False - - def _create_passing_analysis( - self, - original_analysis: ConsiderationAnalysis, - addressed_concerns: dict[str, str], - ) -> ConsiderationAnalysis: - """Create a modified analysis with addressed blockers marked as satisfied. - - Used when all blockers were addressed in the current turn to convert - a failing analysis to a passing one. - - Args: - original_analysis: The original analysis with blockers - addressed_concerns: Map of concern_id -> how it was addressed - - Returns: - New ConsiderationAnalysis with blockers converted to satisfied - """ - # Create a copy of results with addressed concerns marked satisfied - modified_results = dict(original_analysis.results) - - for consideration_id, how_addressed in addressed_concerns.items(): - if consideration_id in modified_results: - old_result = modified_results[consideration_id] - modified_results[consideration_id] = CheckerResult( - consideration_id=consideration_id, - satisfied=True, - reason=f"{old_result.reason} [ADDRESSED: {how_addressed}]", - severity=old_result.severity, - ) - - # Create new analysis with modified results - return ConsiderationAnalysis(results=modified_results) - - def _convert_to_failure_evidence( - self, - failed_results: list[CheckerResult], - transcript: list[dict], - user_claims: list[str] | None = None, - ) -> list["FailureEvidence"]: - """Convert CheckerResults to FailureEvidence with evidence quotes. - - Extracts specific evidence from the transcript to show WHY each - check failed, enabling the agent to understand exactly what's missing. - - Args: - failed_results: List of failed CheckerResult objects - transcript: Full transcript for evidence extraction - user_claims: User claims detected (to mark as was_claimed_complete) - - Returns: - List of FailureEvidence objects with detailed evidence - """ - if not TURN_STATE_AVAILABLE: - return [] - - evidence_list: list[FailureEvidence] = [] - claimed_ids = set() - - # Extract consideration IDs that were claimed as complete - if user_claims: - for claim in user_claims: - claim_lower = claim.lower() - for result in failed_results: - cid = result.consideration_id.lower() - # Simple heuristic: if claim mentions words from consideration ID - if any(word in claim_lower for word in cid.split("_") if len(word) > 2): - claimed_ids.add(result.consideration_id) - - for result in failed_results: - # Try to find specific evidence quote from transcript - quote = self._find_evidence_quote(result, transcript) - - evidence = FailureEvidence( - consideration_id=result.consideration_id, - reason=result.reason, - evidence_quote=quote, - was_claimed_complete=result.consideration_id in claimed_ids, - ) - evidence_list.append(evidence) - - return evidence_list - - def _find_evidence_quote( - self, - result: CheckerResult, - transcript: list[dict], - ) -> str | None: - """Find a specific quote from transcript showing why check failed. - - Searches for relevant context based on the consideration type to - provide concrete evidence of what's missing or failing. - - Args: - result: CheckerResult to find evidence for - transcript: Full transcript to search - - Returns: - Evidence quote string if found, None otherwise - """ - cid = result.consideration_id.lower() - - # Define search patterns for each consideration type - search_terms: dict[str, list[str]] = { - "todos": ["todo", "task", "item", "remaining"], - "testing": ["test", "pytest", "unittest", "failing", "error"], - "ci": ["ci", "github actions", "pipeline", "build", "workflow"], - "workflow": ["step", "workflow", "phase"], - "review": ["review", "feedback", "comment"], - "philosophy": ["philosophy", "simplicity", "stub", "placeholder"], - "docs": ["documentation", "readme", "doc"], - } - - # Find which search terms apply to this consideration - relevant_terms = [] - for key, terms in search_terms.items(): - if key in cid: - relevant_terms.extend(terms) - - if not relevant_terms: - return None - - # Search recent transcript for relevant content - recent_messages = transcript[-20:] if len(transcript) > 20 else transcript - - for msg in reversed(recent_messages): - content = self._extract_message_text(msg).lower() - - for term in relevant_terms: - if term in content: - # Found relevant content - extract context - idx = content.find(term) - start = max(0, idx - 30) - end = min(len(content), idx + len(term) + 70) - - # Get original case text - original_content = self._extract_message_text(msg) - quote = original_content[start:end].strip() - - if len(quote) > 10: # Only return meaningful quotes - return f"...{quote}..." - - return None - - def _analyze_considerations( - self, - transcript: list[dict], - session_id: str, - session_type: str | None = None, - progress_callback: Callable | None = None, - ) -> ConsiderationAnalysis: - """Analyze transcript against all enabled considerations IN PARALLEL. - - Phase 4 (Performance): Uses asyncio.gather() to run ALL SDK checks in parallel, - reducing total time from ~220s (sequential) to ~15-20s (parallel). - - Key design decisions: - - Transcript is loaded ONCE upfront, shared across all parallel workers - - ALL checks run - no early exit - for comprehensive feedback - - No caching - session-specific analysis doesn't benefit from caching - - Fail-open: Any errors result in "satisfied" to never block users - - Args: - transcript: List of message dictionaries (PRE-LOADED, not fetched by workers) - session_id: Session identifier - session_type: Session type for selective consideration application (auto-detected if None) - progress_callback: Optional callback for progress events - - Returns: - ConsiderationAnalysis with results from ALL considerations - """ - # Auto-detect session type if not provided - if session_type is None: - session_type = self.detect_session_type(transcript) - self._log(f"Auto-detected session type: {session_type}", "DEBUG") - - # Get considerations applicable to this session type - applicable_considerations = self.get_applicable_considerations(session_type) - - # Filter to enabled considerations only - enabled_considerations = [] - for consideration in applicable_considerations: - # Check if enabled in consideration itself - if not consideration.get("enabled", True): - continue - # Also check config for backward compatibility - if not self.config.get("checkers_enabled", {}).get(consideration["id"], True): - continue - enabled_considerations.append(consideration) - - # Emit progress for all categories upfront - categories = set(c.get("category", "Unknown") for c in enabled_considerations) - for category in categories: - self._emit_progress( - progress_callback, - "category", - f"Checking {category}", - {"category": category}, - ) - - # Emit progress for parallel execution start - self._emit_progress( - progress_callback, - "parallel_start", - f"Running {len(enabled_considerations)} checks in parallel...", - {"count": len(enabled_considerations)}, - ) - - # Run all considerations in parallel using asyncio - try: - # Use asyncio.run() to execute the parallel async method - # This is the single event loop for all parallel checks - start_time = datetime.now() - - analysis = asyncio.run( - self._analyze_considerations_parallel_async( - transcript=transcript, - session_id=session_id, - enabled_considerations=enabled_considerations, - progress_callback=progress_callback, - ) - ) - - elapsed = (datetime.now() - start_time).total_seconds() - self._log( - f"Parallel analysis completed: {len(enabled_considerations)} checks in {elapsed:.1f}s", - "INFO", - ) - self._emit_progress( - progress_callback, - "parallel_complete", - f"Completed {len(enabled_considerations)} checks in {elapsed:.1f}s", - {"count": len(enabled_considerations), "elapsed_seconds": elapsed}, - ) - - return analysis - - except Exception as e: - # Fail-open: On any error with parallel execution, return empty analysis - self._log(f"Parallel analysis failed (fail-open): {e}", "ERROR") - return ConsiderationAnalysis() - - async def _analyze_considerations_parallel_async( - self, - transcript: list[dict], - session_id: str, - enabled_considerations: list[dict[str, Any]], - progress_callback: Callable | None = None, - ) -> ConsiderationAnalysis: - """Async implementation that runs ALL considerations in parallel. - - Args: - transcript: Pre-loaded transcript (shared across all workers) - session_id: Session identifier - enabled_considerations: List of enabled consideration dictionaries - progress_callback: Optional callback for progress events - - Returns: - ConsiderationAnalysis with results from all considerations - """ - analysis = ConsiderationAnalysis() - - # Create async tasks for ALL considerations - # Each task receives the SAME transcript (no re-fetching) - tasks = [ - self._check_single_consideration_async( - consideration=consideration, - transcript=transcript, - session_id=session_id, - ) - for consideration in enabled_considerations - ] - - # Run ALL tasks in parallel with overall timeout - # return_exceptions=True ensures all tasks complete even if some fail - try: - async with asyncio.timeout(PARALLEL_TIMEOUT): - results = await asyncio.gather(*tasks, return_exceptions=True) - except TimeoutError: - self._log(f"Parallel execution timed out after {PARALLEL_TIMEOUT}s", "WARNING") - # Fail-open: Return empty analysis on timeout - return analysis - - # Process results from all parallel tasks - for consideration, result in zip(enabled_considerations, results, strict=False): - if isinstance(result, Exception): - # Task raised an exception - fail-open - self._log( - f"Check '{consideration['id']}' failed with exception: {result}", - "WARNING", - ) - checker_result = CheckerResult( - consideration_id=consideration["id"], - satisfied=True, # Fail-open - reason=f"Error: {result}", - severity=consideration["severity"], - ) - elif isinstance(result, CheckerResult): - # Normal result - checker_result = result - else: - # Unexpected result type - fail-open - self._log( - f"Check '{consideration['id']}' returned unexpected type: {type(result)}", - "WARNING", - ) - checker_result = CheckerResult( - consideration_id=consideration["id"], - satisfied=True, # Fail-open - reason="Unexpected result type", - severity=consideration["severity"], - ) - - analysis.add_result(checker_result) - - # Emit individual result progress - self._emit_progress( - progress_callback, - "consideration_result", - f"{'✓' if checker_result.satisfied else '✗'} {consideration['question']}", - { - "consideration_id": consideration["id"], - "satisfied": checker_result.satisfied, - "question": consideration["question"], - }, - ) - - return analysis - - async def _check_single_consideration_async( - self, - consideration: dict[str, Any], - transcript: list[dict], - session_id: str, - ) -> CheckerResult: - """Check a single consideration asynchronously. - - Phase 5 (SDK-First): Use Claude SDK as PRIMARY method - - ALL considerations analyzed by SDK first (when available) - - Specific checkers (_check_*) used ONLY as fallback - - Fail-open when SDK unavailable or fails - - This is the parallel worker that handles one consideration. - The transcript is already loaded - this method does NOT fetch it. - - Args: - consideration: Consideration dictionary - transcript: Pre-loaded transcript (shared, not fetched) - session_id: Session identifier - - Returns: - CheckerResult with satisfaction status - """ - try: - # SDK-FIRST: Try SDK for ALL considerations (when available) - if SDK_AVAILABLE: - try: - # Use async SDK function directly (already awaitable) - # Returns tuple: (satisfied, reason) - satisfied, sdk_reason = await analyze_consideration( - conversation=transcript, - consideration=consideration, - project_root=self.project_root, - ) - - # SDK succeeded - return result with SDK-provided reason - return CheckerResult( - consideration_id=consideration["id"], - satisfied=satisfied, - reason=( - "SDK analysis: satisfied" - if satisfied - else f"SDK analysis: {sdk_reason or consideration['question'] + ' not met'}" - ), - severity=consideration["severity"], - ) - except Exception as e: - # SDK failed - log to stderr and fall through to fallback - import sys - - error_msg = f"[Power Steering SDK Error] {consideration['id']}: {e!s}\n" - sys.stderr.write(error_msg) - sys.stderr.flush() - - self._log( - f"SDK analysis failed for '{consideration['id']}': {e}", - "DEBUG", - ) - # Continue to fallback methods below - - # FALLBACK: Use heuristic checkers when SDK unavailable or failed - checker_name = consideration["checker"] - - # Dispatch to specific checker or generic analyzer - if hasattr(self, checker_name) and callable(getattr(self, checker_name)): - checker_func = getattr(self, checker_name) - satisfied = checker_func(transcript, session_id) - else: - # Generic analyzer for considerations without specific checker - satisfied = self._generic_analyzer(transcript, session_id, consideration) - - return CheckerResult( - consideration_id=consideration["id"], - satisfied=satisfied, - reason=(f"Heuristic fallback: {'satisfied' if satisfied else 'not met'}"), - severity=consideration["severity"], - ) - - except Exception as e: - # Fail-open: Never block on errors - self._log( - f"Checker error for '{consideration['id']}': {e}", - "WARNING", - ) - return CheckerResult( - consideration_id=consideration["id"], - satisfied=True, # Fail-open - reason=f"Error (fail-open): {e}", - severity=consideration["severity"], - ) - - def _format_results_text(self, analysis: ConsiderationAnalysis, session_type: str) -> str: - """Format analysis results as text for inclusion in continuation_prompt. - - This allows users to see results even when stderr isn't visible. - - Note on message branches: This method handles three cases: - 1. Some checks passed → "ALL CHECKS PASSED" - 2. No checks ran (all skipped) → "NO CHECKS APPLICABLE" - 3. Some checks failed → "CHECKS FAILED" - - Case #2 is primarily for testing - in production, check() returns early - (line 759) when len(analysis.results)==0, so this method won't be called. - However, tests call this method directly to verify message formatting works. - - Args: - analysis: ConsiderationAnalysis with results - session_type: Session type (e.g., "SIMPLE", "STANDARD") - - Returns: - Formatted text string with results grouped by category - """ - lines = [] - lines.append("\n" + "=" * 60) - lines.append("⚙️ POWER-STEERING ANALYSIS RESULTS") - lines.append("=" * 60 + "\n") - lines.append(f"Session Type: {session_type}\n") - - # Group results by category - by_category: dict[str, list[tuple]] = {} - for consideration in self.considerations: - category = consideration.get("category", "Unknown") - cid = consideration["id"] - result = analysis.results.get(cid) - - if category not in by_category: - by_category[category] = [] - - by_category[category].append((consideration, result)) - - # Display by category - total_passed = 0 - total_failed = 0 - total_skipped = 0 - - for category, items in sorted(by_category.items()): - lines.append(f"📋 {category}") - lines.append("-" * 40) - - for consideration, result in items: - if result is None: - indicator = "⬜" # Not checked (skipped) - total_skipped += 1 - elif result.satisfied: - indicator = "✅" - total_passed += 1 - else: - indicator = "❌" - total_failed += 1 - - question = consideration.get("question", consideration["id"]) - severity = consideration.get("severity", "warning") - severity_tag = " [blocker]" if severity == "blocker" else "" - - lines.append(f" {indicator} {question}{severity_tag}") - - lines.append("") - - # Summary line - lines.append("=" * 60) - if total_failed == 0 and total_passed > 0: - # Some checks passed and none failed - self._log( - f"Message branch: ALL_CHECKS_PASSED (passed={total_passed}, failed=0, skipped={total_skipped})", - "DEBUG", - ) - lines.append(f"✅ ALL CHECKS PASSED ({total_passed} passed, {total_skipped} skipped)") - lines.append("\n📌 This was your first stop. Next stop will proceed without blocking.") - lines.append("\n💡 To disable power-steering: export AMPLIHACK_SKIP_POWER_STEERING=1") - lines.append(" Or create: .claude/runtime/power-steering/.disabled") - elif total_failed == 0 and total_passed == 0: - # No checks were evaluated (all skipped) - not a "pass", just no applicable checks - self._log( - f"Message branch: NO_CHECKS_APPLICABLE (passed=0, failed=0, skipped={total_skipped})", - "DEBUG", - ) - lines.append(f"⚠️ NO CHECKS APPLICABLE ({total_skipped} skipped for session type)") - lines.append("\n📌 No power-steering checks apply to this session type.") - lines.append(" This is expected for simple Q&A or informational sessions.") - else: - # Some checks failed - self._log( - f"Message branch: CHECKS_FAILED (passed={total_passed}, failed={total_failed}, skipped={total_skipped})", - "DEBUG", - ) - lines.append( - f"❌ CHECKS FAILED ({total_passed} passed, {total_failed} failed, {total_skipped} skipped)" - ) - lines.append("\n📌 Address the failed checks above before stopping.") - lines.append("=" * 60 + "\n") - - return "\n".join(lines) - - # ======================================================================== - # Phase 1: Top 5 Critical Checkers - # ======================================================================== - - def _check_todos_complete(self, transcript: list[dict], session_id: str) -> bool: - """Check if all TODO items completed. - - Args: - transcript: List of message dictionaries - session_id: Session identifier - - Returns: - True if all TODOs completed, False otherwise - """ - # Find last TodoWrite tool call - last_todo_write = None - for msg in reversed(transcript): - if msg.get("type") == "assistant" and "message" in msg: - content = msg["message"].get("content", []) - if isinstance(content, list): - for block in content: - if isinstance(block, dict) and block.get("type") == "tool_use": - if block.get("name") == "TodoWrite": - last_todo_write = block.get("input", {}) - break - if last_todo_write: - break - - # If no TodoWrite found, consider satisfied (no todos to check) - if not last_todo_write: - return True - - # Check todos in last TodoWrite - todos = last_todo_write.get("todos", []) - if not todos: - return True - - # Check if any todos are not completed - for todo in todos: - status = todo.get("status", "pending") - if status != "completed": - return False # Found incomplete todo - - return True # All todos completed - - def _extract_incomplete_todos(self, transcript: list[dict]) -> list[str]: - """Extract list of incomplete todo items from transcript. - - Helper method used by continuation prompt generation to show - specific items the agent needs to complete. - - Args: - transcript: List of message dictionaries - - Returns: - List of incomplete todo item descriptions - """ - incomplete_todos = [] - - # Find last TodoWrite tool call - last_todo_write = None - for msg in reversed(transcript): - if msg.get("type") == "assistant" and "message" in msg: - content = msg["message"].get("content", []) - if isinstance(content, list): - for block in content: - if isinstance(block, dict) and block.get("type") == "tool_use": - if block.get("name") == "TodoWrite": - last_todo_write = block.get("input", {}) - break - if last_todo_write: - break - - if not last_todo_write: - return [] - - todos = last_todo_write.get("todos", []) - for todo in todos: - status = todo.get("status", "pending") - if status != "completed": - content = todo.get("content", "Unknown task") - incomplete_todos.append(f"[{status}] {content}") - - return incomplete_todos - - def _extract_next_steps_mentioned(self, transcript: list[dict]) -> list[str]: - """Extract specific next steps mentioned in recent assistant messages. - - Helper method used by continuation prompt generation to show - specific next steps the agent mentioned but hasn't completed. - - Args: - transcript: List of message dictionaries - - Returns: - List of next step descriptions (extracted sentences/phrases) - """ - next_steps = [] - next_steps_triggers = [ - "next step", - "next steps", - "follow-up", - "remaining", - "still need", - "todo", - "left to", - ] - - # Check recent assistant messages - recent_messages = [m for m in transcript[-15:] if m.get("type") == "assistant"][-5:] - - for msg in recent_messages: - content = msg.get("message", {}).get("content", []) - if isinstance(content, list): - for block in content: - if isinstance(block, dict) and block.get("type") == "text": - text = str(block.get("text", "")) - text_lower = text.lower() - - # Check if this block mentions next steps - if any(trigger in text_lower for trigger in next_steps_triggers): - # Extract sentences containing the trigger - sentences = text.replace("\n", " ").split(". ") - for sentence in sentences: - sentence_lower = sentence.lower() - if any( - trigger in sentence_lower for trigger in next_steps_triggers - ): - clean_sentence = sentence.strip() - if clean_sentence and len(clean_sentence) > 10: - # Truncate long sentences - if len(clean_sentence) > 150: - clean_sentence = clean_sentence[:147] + "..." - if clean_sentence not in next_steps: - next_steps.append(clean_sentence) - - return next_steps[:5] # Limit to 5 items - - def _check_workflow_invocation(self, transcript: list[dict], session_id: str) -> bool: - """Check if workflow was properly invoked using Claude SDK analysis. - - Uses context-aware AI analysis to detect workflow invocation patterns: - - Explicit Skill tool invocation - - Explicit Read tool invocation - - Implicit step-by-step workflow following - - Async completion (PR created for review, CI running) - - Issue #2040: Enforce workflow invocation compliance - - Args: - transcript: List of message dictionaries - session_id: Session identifier - - Returns: - True if workflow properly invoked or not required, False otherwise - """ - try: - # Import SDK analysis function - from claude_power_steering import analyze_workflow_invocation_sync - - # Determine session type from state if available - session_type = "DEVELOPMENT" # Default - try: - state_file = self.runtime_dir / session_id / "turn_state.json" - if state_file.exists(): - state = json.loads(state_file.read_text()) - session_type = state.get("session_type", "DEVELOPMENT") - except Exception as e: - self._log( - f"Could not load session type from state file, using default: {e}", "DEBUG" - ) - - # Use SDK analysis for workflow invocation validation - valid, reason = analyze_workflow_invocation_sync( - transcript, session_type, self.project_root - ) - - if not valid: - # Log violation details - self._log_violation( - "workflow_invocation", - { - "reason": reason or "Workflow not properly invoked", - "session_type": session_type, - }, - session_id, - ) - - return valid - - except ImportError: - # SDK not available - fail open - import sys - - sys.stderr.write( - "[Power Steering] claude_power_steering not available, skipping workflow check\n" - ) - return True - except Exception as e: - # Fail-open on errors - import sys - - sys.stderr.write(f"[Power Steering] Error in _check_workflow_invocation: {e}\n") - return True - - def _transcript_to_text(self, transcript: list[dict]) -> str: - """Convert transcript list to plain text for pattern matching. - - Args: - transcript: List of message dictionaries - - Returns: - Plain text representation of transcript - """ - lines = [] - for msg in transcript: - role = msg.get("type", "unknown") - if role == "user": - lines.append(f"User: {self._extract_message_text(msg)}") - elif role == "assistant": - lines.append(f"Claude: {self._extract_message_text(msg)}") - return "\n".join(lines) - - def _extract_message_text(self, msg: dict) -> str: - """Extract text content from message. - - Args: - msg: Message dictionary - - Returns: - Text content - """ - message = msg.get("message", {}) - content = message.get("content", []) - - if isinstance(content, str): - return content - - if isinstance(content, list): - texts = [] - for block in content: - if isinstance(block, dict): - if block.get("type") == "text": - texts.append(block.get("text", "")) - elif block.get("type") == "tool_use": - # Include tool invocations in text - tool_name = block.get("name", "") - tool_input = block.get("input", {}) - texts.append(f'{tool_input}') - return " ".join(texts) - - return "" - - def _log_violation(self, consideration_id: str, details: dict, session_id: str) -> None: - """Log violation details to session logs. - - Args: - consideration_id: ID of failed consideration - details: Violation details - session_id: Session identifier - """ - try: - log_file = self.runtime_dir / session_id / "violations.json" - log_file.parent.mkdir(parents=True, exist_ok=True) - - violations = [] - if log_file.exists(): - violations = json.loads(log_file.read_text()) - - violations.append( - { - "consideration_id": consideration_id, - "timestamp": datetime.now().isoformat(), - "details": details, - } - ) - - log_file.write_text(json.dumps(violations, indent=2), encoding="utf-8") - except Exception as e: - self._log(f"Could not write violation log (non-critical): {e}", "WARNING") - - def _check_no_direct_main_commit(self, transcript: list[dict], session_id: str) -> bool: - """Check that the agent did not commit directly to main. - - Verifies the mandatory user preference that all code changes go through - a feature branch and PR, never committing directly to main/master. - - Args: - transcript: List of message dictionaries - session_id: Session identifier - - Returns: - True if no direct-to-main commits detected, False otherwise - """ - for i, msg in enumerate(transcript): - if msg.get("type") == "assistant" and "message" in msg: - content = msg["message"].get("content", []) - if isinstance(content, list): - for block in content: - if isinstance(block, dict) and block.get("type") == "tool_use": - if block.get("name") == "Bash": - command = block.get("input", {}).get("command", "") - # Detect git commit on main/master - if "git commit" in command: - # Check NEARBY messages for branch context - if self._is_on_main_branch_near(transcript, i): - return False - # Detect git push to main/master (explicit or bare) - if "git push" in command: - if "origin main" in command or "origin master" in command: - return False - # Bare git push (no branch specified) while on main - if "origin main" not in command and "origin master" not in command: - # Only flag if no branch is specified at all - # (git push, git push origin, git push -u origin) - parts = command.strip().split() - # If command is just "git push" or "git push origin" - # (no branch arg), check if we're on main - has_branch_arg = len(parts) > 3 or any( - p.startswith("feat/") or p.startswith("fix/") or p.startswith("docs/") - for p in parts - ) - if not has_branch_arg and self._is_on_main_branch_near(transcript, i): - return False - return True - - def _is_on_main_branch_near(self, transcript: list[dict], commit_index: int) -> bool: - """Check if git context NEAR a commit command shows we're on main/master. - - Searches within 10 messages before the commit for the most recent - branch indicator. This avoids false positives where the session started - on main but switched to a feature branch before committing. - - Args: - transcript: List of message dictionaries - commit_index: Index of the message containing the git commit - - Returns: - True if nearest branch evidence shows main/master - """ - # Search the 10 messages before the commit for branch context - start = max(0, commit_index - 10) - # Also check the most recent branch indicator, not just any indicator - for msg in reversed(transcript[start:commit_index]): - if msg.get("type") == "tool_result": - output = str(msg.get("message", {}).get("content", "")).lower() - # If we find a feature branch indicator, we're NOT on main - if "on branch " in output and "on branch main" not in output and "on branch master" not in output: - return False - # If we find main/master indicator, we ARE on main - if "on branch main" in output or "on branch master" in output: - return True - if "* main" in output or "* master" in output: - return True - # No branch context found nearby — fail-open (assume not on main) - return False - - def _check_dev_workflow_complete(self, transcript: list[dict], session_id: str) -> bool: - """Check if full DEFAULT_WORKFLOW followed. - - Heuristics: - - Look for multiple agent invocations (architect, builder, reviewer) - - Check for test execution - - Verify git operations (commit, push) - - Args: - transcript: List of message dictionaries - session_id: Session identifier - - Returns: - True if workflow complete, False otherwise - """ - # Extract tool names used - tools_used = set() - for msg in transcript: - if msg.get("type") == "assistant" and "message" in msg: - content = msg["message"].get("content", []) - if isinstance(content, list): - for block in content: - if isinstance(block, dict) and block.get("type") == "tool_use": - tools_used.add(block.get("name", "")) - - has_file_ops = any(t in tools_used for t in ["Edit", "Write"]) - - # If no file operations, likely not a development task - if not has_file_ops: - return True - - # Check for ACTUAL test/validation commands, not just "Bash was used" - has_tests = False - direct_patterns = self.TEST_COMMAND_PATTERNS + self.VALIDATION_COMMAND_PATTERNS - for msg in transcript: - if msg.get("type") == "assistant" and "message" in msg: - content = msg["message"].get("content", []) - if isinstance(content, list): - for block in content: - if isinstance(block, dict) and block.get("type") == "tool_use": - if block.get("name") == "Bash": - command = block.get("input", {}).get("command", "") - if any(p in command for p in direct_patterns): - has_tests = True - break - # Accept python -c/node -e only with real validation - if any( - p in command for p in self.INLINE_VALIDATION_PATTERNS - ) and self._is_meaningful_validation(command): - has_tests = True - break - if has_tests: - break - - if not has_tests: - return False - - return True - - # File extensions where TODO/FIXME/stubs are acceptable (docs, config, YAML) - NON_CODE_EXTENSIONS = [".md", ".txt", ".rst", ".yml", ".yaml", ".json", ".toml", ".cfg", ".ini"] - - def _check_philosophy_compliance(self, transcript: list[dict], session_id: str) -> bool: - """Check for PHILOSOPHY adherence (zero-BS). - - Heuristics: - - Look for "TODO", "FIXME", "XXX" in Write/Edit tool calls to CODE files - - Check for stub implementations (NotImplementedError, pass) - - Detect placeholder comments - - Skip documentation, YAML, and config files where these words may - appear legitimately (e.g., YAML questions mentioning TODO, docs - explaining the philosophy) - - Args: - transcript: List of message dictionaries - session_id: Session identifier - - Returns: - True if compliant, False otherwise - """ - # Check Write and Edit tool calls for anti-patterns - for msg in transcript: - if msg.get("type") == "assistant" and "message" in msg: - content = msg["message"].get("content", []) - if isinstance(content, list): - for block in content: - if isinstance(block, dict) and block.get("type") == "tool_use": - tool_name = block.get("name", "") - if tool_name in ["Write", "Edit"]: - tool_input = block.get("input", {}) - file_path = tool_input.get("file_path", "") - - # Skip non-code files (docs, YAML, config) where - # TODO/FIXME may appear legitimately - if any(file_path.endswith(ext) for ext in self.NON_CODE_EXTENSIONS): - continue - - file_path_lower = file_path.lower() - # Skip test files — they may contain TODO/NotImplementedError - # as test data or assertion targets, not as actual stubs - is_test_file = ( - "/test" in file_path_lower - or "/tests/" in file_path_lower - or file_path_lower.split("/")[-1].startswith("test_") - ) - - # Check content for anti-patterns - content_to_check = "" - if "content" in tool_input: - content_to_check = str(tool_input["content"]) - elif "new_string" in tool_input: - content_to_check = str(tool_input["new_string"]) - - # Look for TODO/FIXME/XXX (skip test files where these - # may appear as test data or assertion strings) - if not is_test_file and re.search( - r"\b(TODO|FIXME|XXX)\b", content_to_check - ): - return False - - # Look for NotImplementedError (skip test files where - # this appears in pytest.raises assertions) - if not is_test_file and "NotImplementedError" in content_to_check: - return False - - # Look for stub patterns (with optional -> return type): - # - Single-line: def f(): pass / def f() -> None: pass - # - Multi-line: def f():\n pass - # - Ellipsis: def f(): ... / def f() -> int: ... - # Skip if @abstractmethod context detected (legitimate pattern) - # Use specific ABC patterns to avoid false matches on - # "ABC Corp", "ABC123", etc. (Issue: round 4 audit D4) - has_abstract = ( - "@abstractmethod" in content_to_check - or "from abc import" in content_to_check.lower() - or "import abc" in content_to_check.lower() - or re.search(r"class\s+\w+\(.*\bABC\b", content_to_check) - ) - if not has_abstract: - if re.search( - r"def\s+\w+\([^)]*\)(?:\s*->.*?)?:\s*(?:pass|\.\.\.)\s*$", - content_to_check, - re.MULTILINE, - ): - return False - if re.search( - r"def\s+\w+\([^)]*\)(?:\s*->.*?)?:\s*\n\s+(?:pass|\.\.\.)\s*$", - content_to_check, - re.MULTILINE, - ): - return False - - return True - - @staticmethod - def _is_meaningful_validation(command: str) -> bool: - """Check if a python -c or node -e command does meaningful validation. - - Rejects trivial commands like print('hello') and accepts commands that - import modules, open files, parse data, or run actual validation logic. - - Args: - command: The full Bash command string - - Returns: - True if the command appears to do real validation - """ - validation_signals = [ - "import ", "from ", "open(", "load(", "parse(", - "validate", "check", "assert", "yaml", "json", - "safe_load", "read_text", "read()", - ] - cmd_lower = command.lower() - return any(signal in cmd_lower for signal in validation_signals) - - def _check_local_testing(self, transcript: list[dict], session_id: str) -> bool: - """Check if agent tested locally. - - Heuristics: - - Look for Bash tool calls with pytest, npm test, cargo test, etc. - - Check exit codes (0 = success) - - Look for "PASSED" or "OK" in output - - Args: - transcript: List of message dictionaries - session_id: Session identifier - - Returns: - True if tests run and passed, False otherwise - """ - # Look for test execution in Bash tool calls - for msg in transcript: - if msg.get("type") == "tool_result" and "message" in msg: - msg_data = msg["message"] - if msg_data.get("tool_use_id"): - # Find corresponding tool_use - for prev_msg in transcript: - if prev_msg.get("type") == "assistant" and "message" in prev_msg: - content = prev_msg["message"].get("content", []) - if isinstance(content, list): - for block in content: - if isinstance(block, dict): - if block.get("type") == "tool_use" and block.get( - "id" - ) == msg_data.get("tool_use_id"): - # Check if this was a test command - tool_name = block.get("name", "") - if tool_name == "Bash": - command = block.get("input", {}).get("command", "") - # Look for test commands using class constant - if any( - pattern in command - for pattern in self.TEST_COMMAND_PATTERNS - ): - # Check result - result_content = msg_data.get("content", []) - if isinstance(result_content, list): - for result_block in result_content: - if isinstance(result_block, dict): - if ( - result_block.get("type") - == "tool_result" - ): - # Check if tests passed - output = str( - result_block.get( - "content", "" - ) - ) - if ( - "PASSED" in output - or "passed" in output - ): - return True - if ( - "OK" in output - and "FAILED" not in output - ): - return True - - # Also accept validation commands (ruff, mypy, etc.) as testing - # for sessions where formal test suites don't exist or aren't applicable - for msg in transcript: - if msg.get("type") == "assistant" and "message" in msg: - content = msg["message"].get("content", []) - if isinstance(content, list): - for block in content: - if isinstance(block, dict) and block.get("type") == "tool_use": - if block.get("name") == "Bash": - command = block.get("input", {}).get("command", "") - # Accept linting/type-checking tools directly - if any( - pattern in command - for pattern in self.VALIDATION_COMMAND_PATTERNS - ): - return True - # Accept python -c / node -e only if they do - # meaningful validation (not just print('hello')) - if any( - pattern in command - for pattern in self.INLINE_VALIDATION_PATTERNS - ) and self._is_meaningful_validation(command): - return True - - # No tests or validation found - return False - - def _user_prefers_no_auto_merge(self) -> bool: - """Detect if user has set preference to never auto-merge PRs. - - Searches .claude/context/USER_PREFERENCES.md for pattern: - "(never|must not|do not|don't) ... merge ... without ... (permission|approval|explicit)" - - Returns: - True if preference detected, False otherwise (fail-open on any error) - """ - try: - preferences_path = self.project_root / ".claude" / "context" / "USER_PREFERENCES.md" - - if not preferences_path.exists(): - return False - - content = preferences_path.read_text(encoding="utf-8") - - # Pattern: (never|must not|do not|don't).*merge.*without.*(permission|approval|explicit) - pattern = r"(?i)(never|must not|do not|don\'t).*merge.*without.*(permission|approval|explicit)" - - return re.search(pattern, content, re.DOTALL) is not None - - except Exception as e: - # Fail-open: any error returns False - self._log(f"Error detecting merge preference: {e}", "WARNING") - return False - - def _check_ci_status_no_auto_merge(self, transcript: list[dict]) -> bool: - """Check CI status WITHOUT requiring PR merge. - - Used when user preference "never merge without permission" is active. - Treats "PR ready + CI passing" as valid completion state. - - Args: - transcript: List of message dictionaries - - Returns: - True if PR ready and CI passing, False if CI failing or draft PR - """ - # Look for PR and CI indicators - pr_mentioned = False - ci_mentioned = False - ci_passing = False - is_draft = False - - for msg in transcript: - if msg.get("type") == "assistant" and "message" in msg: - content = msg["message"].get("content", []) - if isinstance(content, list): - for block in content: - if isinstance(block, dict): - if block.get("type") == "text": - text = str(block.get("text", "")).lower() - - # Check for PR mentions - if any( - keyword in text - for keyword in ["pr #", "pull request", "created pr"] - ): - pr_mentioned = True - - # Check for draft PR - if "draft" in text and "pr" in text: - is_draft = True - - # Check for CI mentions - if any( - keyword in text - for keyword in [ - "ci", - "github actions", - "continuous integration", - "checks", - ] - ): - ci_mentioned = True - - # Check for passing indicators - if any( - keyword in text - for keyword in [ - "passing", - "passed", - "success", - "ready for review", - "ready for your review", - ] - ): - ci_passing = True - - # Check for failing indicators - if any( - keyword in text - for keyword in ["failing", "failed", "error"] - ): - return False - - # If draft PR, not ready - if is_draft: - return False - - # If CI mentioned and failing, return False - if ci_mentioned and not ci_passing: - return False - - # If PR mentioned with CI passing, or PR ready indicators - if pr_mentioned and (ci_passing or not ci_mentioned): - return True - - # If neither PR nor CI mentioned, assume satisfied (fail-open) - if not pr_mentioned and not ci_mentioned: - return True - - # Default: if we have indicators but unclear state, be conservative - return ci_passing or not ci_mentioned - - def _check_ci_status(self, transcript: list[dict], session_id: str) -> bool: - """Check if CI passing/mergeable (preference-aware). - - This method delegates to the appropriate CI checker based on user preference: - - If user prefers no auto-merge: use _check_ci_status_no_auto_merge() - - Otherwise: use standard CI check logic (requires merge indicators) - - Heuristics (standard mode): - - Look for CI status checks (gh pr view, CI commands) - - Check for "passing", "success", "mergeable" (strict - requires "mergeable") - - Look for failure indicators - - Args: - transcript: List of message dictionaries - session_id: Session identifier - - Returns: - True if CI passing or not applicable, False if CI failing - """ - # Check user preference first (lazy detection) - if self._user_prefers_no_auto_merge(): - return self._check_ci_status_no_auto_merge(transcript) - - # Standard logic for users without preference (strict - requires "mergeable") - ci_mentioned = False - mergeable_mentioned = False - - for msg in transcript: - if msg.get("type") == "assistant" and "message" in msg: - content = msg["message"].get("content", []) - if isinstance(content, list): - for block in content: - if isinstance(block, dict): - # Check text content for CI mentions - if block.get("type") == "text": - text = str(block.get("text", "")) - text_lower = text.lower() - - if any( - keyword in text_lower - for keyword in [ - "ci", - "github actions", - "continuous integration", - ] - ): - ci_mentioned = True - - # Standard mode: only accept explicit "mergeable" or "passing" + "mergeable" - # Don't accept just "ready" or "passing" alone - if "mergeable" in text_lower: - mergeable_mentioned = True - - # Check for failure indicators - if any( - keyword in text_lower - for keyword in ["failing", "failed", "error"] - ): - return False - - # If CI not mentioned, consider satisfied (not applicable) - if not ci_mentioned: - return True - - # Standard mode requires explicit "mergeable" indicator - return mergeable_mentioned - - # ======================================================================== - # Phase 2: Additional Checkers (16 new methods) - # ======================================================================== - - def _generic_analyzer( - self, transcript: list[dict], session_id: str, consideration: dict[str, Any] - ) -> bool: - """Generic analyzer for considerations without specific checkers. - - Uses simple keyword matching on the consideration question. - Phase 2: Simple heuristics (future: LLM-based analysis) - - Args: - transcript: List of message dictionaries - session_id: Session identifier - consideration: Consideration dictionary with question - - Returns: - True if satisfied (fail-open default), False if potential issues detected - """ - # Extract keywords from question (simple tokenization) - question = consideration.get("question", "").lower() - keywords = [ - word - for word in re.findall(r"\b\w+\b", question) - if len(word) > 3 and word not in ["were", "does", "need", "that", "this", "with"] - ] - - if not keywords: - # No keywords to check, assume satisfied - return True - - # Build transcript text for searching - transcript_text = "" - for msg in transcript: - if msg.get("type") in ["user", "assistant"]: - content = msg.get("message", {}).get("content", "") - if isinstance(content, str): - transcript_text += content.lower() + " " - elif isinstance(content, list): - for block in content: - if isinstance(block, dict) and block.get("type") == "text": - transcript_text += str(block.get("text", "")).lower() + " " - - # Check if keywords appear in transcript - keyword_found = any(keyword in transcript_text for keyword in keywords) - - # Default to satisfied (fail-open), only flag if suspicious patterns - # This is intentionally conservative to avoid false positives - self._log( - f"Generic analyzer for '{consideration['id']}': keywords={keywords}, found={keyword_found}", - "DEBUG", - ) - - return True # Phase 2: Always satisfied (fail-open) - - def _check_agent_unnecessary_questions(self, transcript: list[dict], session_id: str) -> bool: - """Check if agent asked unnecessary questions instead of proceeding autonomously. - - Detects use of AskUserQuestion tool, which is the concrete signal that the - agent stopped to ask the user something. Simple question marks in prose - (explanations, documentation, rhetorical questions) are NOT counted. - - Args: - transcript: List of message dictionaries - session_id: Session identifier - - Returns: - True if no excessive questioning, False if agent over-asked - """ - # Count actual AskUserQuestion tool invocations (the concrete signal) - ask_user_count = 0 - for msg in transcript: - if msg.get("type") == "assistant" and "message" in msg: - content = msg["message"].get("content", []) - if isinstance(content, list): - for block in content: - if isinstance(block, dict) and block.get("type") == "tool_use": - if block.get("name") == "AskUserQuestion": - ask_user_count += 1 - - # More than 3 explicit AskUserQuestion invocations suggests the agent - # was not working autonomously. This avoids false positives from - # question marks in prose, documentation, or code comments. - if ask_user_count > 3: - return False - - return True - - def _check_objective_completion(self, transcript: list[dict], session_id: str) -> bool: - """Check if original user objective was fully accomplished. - - Looks for completion indicators in later messages. - - Args: - transcript: List of message dictionaries - session_id: Session identifier - - Returns: - True if objective appears complete, False otherwise - """ - # Get first user message (the objective) - first_user_msg = None - for msg in transcript: - if msg.get("type") == "user": - first_user_msg = msg - break - - if not first_user_msg: - return True # No objective to check - - # Look for completion indicators in assistant messages - completion_indicators = [ - "complete", - "finished", - "done", - "implemented", - "successfully", - "all tests pass", - "pr created", - "pr ready", - "pushed to", - "merged", - "no bug", - "no issue found", - "not a bug", - "as expected", - "by design", - "no changes needed", - ] - - for msg in reversed(transcript[-10:]): # Check last 10 messages - if msg.get("type") == "assistant": - content = msg.get("message", {}).get("content", []) - if isinstance(content, list): - for block in content: - if isinstance(block, dict) and block.get("type") == "text": - text = str(block.get("text", "")).lower() - if any(indicator in text for indicator in completion_indicators): - return True - - # Also check for structural completion: PR creation or git push - for msg in transcript: - if msg.get("type") == "assistant" and "message" in msg: - content = msg["message"].get("content", []) - if isinstance(content, list): - for block in content: - if isinstance(block, dict) and block.get("type") == "tool_use": - if block.get("name") == "Bash": - command = block.get("input", {}).get("command", "") - if "gh pr create" in command or "git push" in command: - return True - - return False # No completion indicators found - - # Paths that indicate user-facing/public code changes requiring doc updates - # Paths indicating user-facing/public code. __init__.py and __main__.py - # are only public when inside a public directory (commands, skills, etc.) - # so they are checked separately via _is_public_init. - PUBLIC_CODE_INDICATORS = [ - "/commands/", - "/skills/", - "/scenarios/", - "/cli/", - "/cli.py", - "__main__.py", - "setup.py", - "pyproject.toml", - ] - - def _check_documentation_updates(self, transcript: list[dict], session_id: str) -> bool: - """Check if relevant documentation files were updated. - - Only flags missing docs when PUBLIC-FACING code was changed (commands, - skills, CLIs, public APIs). Internal code changes (hooks, utilities, - tests, configs) do not require documentation updates. - - Args: - transcript: List of message dictionaries - session_id: Session identifier - - Returns: - True if docs updated or not applicable, False if needed but missing - """ - public_code_modified = False - doc_files_modified = False - - for msg in transcript: - if msg.get("type") == "assistant" and "message" in msg: - content = msg["message"].get("content", []) - if isinstance(content, list): - for block in content: - if isinstance(block, dict) and block.get("type") == "tool_use": - tool_name = block.get("name", "") - if tool_name in ["Write", "Edit"]: - tool_input = block.get("input", {}) - file_path = tool_input.get("file_path", "").lower() - - # Only flag public-facing code changes - is_code = any( - file_path.endswith(ext) for ext in self.CODE_FILE_EXTENSIONS - ) - is_public = any( - indicator in file_path - for indicator in self.PUBLIC_CODE_INDICATORS - ) - # __init__.py is public only inside public dirs - if "__init__.py" in file_path and any( - d in file_path for d in ["/commands/", "/skills/", "/scenarios/"] - ): - is_public = True - if is_code and is_public: - public_code_modified = True - - # Check for doc files using class constant - if any(file_path.endswith(ext) if ext.startswith(".") else ext in file_path for ext in self.DOC_FILE_EXTENSIONS): - doc_files_modified = True - - # Only flag if public-facing code was changed without doc updates - if public_code_modified and not doc_files_modified: - return False - - return True - - def _check_tutorial_needed(self, transcript: list[dict], session_id: str) -> bool: - """Check if new feature needs tutorial/how-to. - - Detects new user-facing features that should have examples. - - Args: - transcript: List of message dictionaries - session_id: Session identifier - - Returns: - True if tutorial exists or not needed, False if missing - """ - # Look for new feature indicators - feature_keywords = ["new feature", "add feature", "implement feature", "create feature"] - has_new_feature = False - - for msg in transcript: - if msg.get("type") == "user": - content = str(msg.get("message", {}).get("content", "")).lower() - if any(keyword in content for keyword in feature_keywords): - has_new_feature = True - break - - if not has_new_feature: - return True # No new feature, tutorial not needed - - # Check for example/tutorial files - tutorial_patterns = ["example", "tutorial", "how_to", "guide", "demo"] - has_tutorial = False - - for msg in transcript: - if msg.get("type") == "assistant" and "message" in msg: - content = msg["message"].get("content", []) - if isinstance(content, list): - for block in content: - if isinstance(block, dict) and block.get("type") == "tool_use": - tool_name = block.get("name", "") - if tool_name in ["Write", "Edit"]: - file_path = block.get("input", {}).get("file_path", "").lower() - if any(pattern in file_path for pattern in tutorial_patterns): - has_tutorial = True - break - - return has_tutorial - - def _check_presentation_needed(self, transcript: list[dict], session_id: str) -> bool: - """Check if work needs presentation deck. - - Detects high-impact work that should be presented to stakeholders. - - Args: - transcript: List of message dictionaries - session_id: Session identifier - - Returns: - True if presentation exists or not needed, False if missing - """ - # This is a low-priority check, default to satisfied - # Could be enhanced to detect high-impact work patterns - return True - - def _check_feature_docs_discoverable(self, transcript: list[dict], session_id: str) -> bool: - """Check if feature documentation is discoverable from multiple paths. - - Verifies new features have documentation discoverable from README and docs/ directory. - This ensures users can find documentation through: - 1. README features/documentation section - 2. docs/ directory listing - - Args: - transcript: List of message dictionaries - session_id: Session identifier - - Returns: - True if docs are discoverable or not applicable, False if missing navigation - """ - try: - # Phase 1: Detect new features - # Look for new commands, agents, skills, scenarios in Write/Edit operations - new_features = [] - docs_file = None - - for msg in transcript: - if msg.get("type") == "assistant" and "message" in msg: - content = msg["message"].get("content", []) - if isinstance(content, list): - for block in content: - if isinstance(block, dict) and block.get("type") == "tool_use": - tool_name = block.get("name", "") - if tool_name in ["Write", "Edit"]: - file_path = block.get("input", {}).get("file_path", "") - - # Detect new feature by file location - if ".claude/commands/" in file_path and file_path.endswith( - ".md" - ): - new_features.append(("command", file_path)) - elif ".claude/agents/" in file_path and file_path.endswith( - ".md" - ): - new_features.append(("agent", file_path)) - elif ".claude/skills/" in file_path: - new_features.append(("skill", file_path)) - elif ".claude/scenarios/" in file_path: - new_features.append(("scenario", file_path)) - - # Track docs file creation in docs/ - if "docs/" in file_path and file_path.endswith(".md"): - docs_file = file_path - - # Edge case 1: No new features detected - if not new_features: - return True - - # Edge case 2: Docs-only session (no code files modified) - # But NOT if the "docs" are actually feature definitions (.md files - # in commands/agents/skills) — those ARE the feature, not just docs - if self._is_docs_only_session(transcript) and not new_features: - return True - - # Edge case 3: Internal changes (tools/, tests/, etc.) - # If all features are in internal paths, pass - internal_paths = [".claude/tools/", "tests/", ".claude/runtime/"] - all_internal = all( - any(internal in feature[1] for internal in internal_paths) - for feature in new_features - ) - if all_internal: - return True - - # Phase 2: Check for docs file in docs/ directory - if not docs_file: - return False # New feature but no docs file created - - # Phase 3: Verify 2+ navigation paths in README - readme_paths_count = 0 - - for msg in transcript: - if msg.get("type") == "assistant" and "message" in msg: - content = msg["message"].get("content", []) - if isinstance(content, list): - for block in content: - if isinstance(block, dict) and block.get("type") == "tool_use": - tool_name = block.get("name", "") - if tool_name in ["Write", "Edit"]: - file_path = block.get("input", {}).get("file_path", "") - - # Check if README was edited - if "readme.md" in file_path.lower(): - # Get the new content to check for documentation links - new_string = block.get("input", {}).get("new_string", "") - content_to_check = block.get("input", {}).get("content", "") - full_content = new_string or content_to_check - - # Count references to the docs file - if docs_file and full_content: - # Extract just the filename from the path - doc_filename = docs_file.split("/")[-1] - # Count occurrences of the doc filename in README content - readme_paths_count += full_content.count(doc_filename) - - # Need at least 2 navigation paths (e.g., Features section + Documentation section) - if readme_paths_count < 2: - return False - - # All checks passed - return True - - except Exception as e: - # Fail-open: Return True on errors to avoid blocking users - self._log(f"PR content validation error (fail-open): {e}", "WARNING") - return True - - def _is_docs_only_session(self, transcript: list[dict]) -> bool: - """Check if session only modified documentation files. - - Helper method to detect docs-only sessions where no code files were touched. - - Args: - transcript: List of message dictionaries - - Returns: - True if only .md files were modified, False if code files modified - """ - try: - code_modified = False - docs_modified = False - - for msg in transcript: - if msg.get("type") == "assistant" and "message" in msg: - content = msg["message"].get("content", []) - if isinstance(content, list): - for block in content: - if isinstance(block, dict) and block.get("type") == "tool_use": - tool_name = block.get("name", "") - if tool_name in ["Write", "Edit"]: - file_path = block.get("input", {}).get("file_path", "") - - # Check for code files using class constant - if any(file_path.endswith(ext) for ext in self.CODE_FILE_EXTENSIONS): - code_modified = True - - # Check for doc files using class constant - if any(file_path.endswith(ext) if ext.startswith(".") else ext in file_path for ext in self.DOC_FILE_EXTENSIONS): - docs_modified = True - - # Docs-only session if docs modified but no code files - return docs_modified and not code_modified - - except Exception as e: - # Fail-open: Return False on errors (assume code might be modified) - self._log(f"Docs-only session detection error (fail-open): {e}", "WARNING") - return False - - def _check_next_steps(self, transcript: list[dict], session_id: str) -> bool: - """Check that work is complete with NO remaining next steps (Issue #2196 - Enhanced). - - UPDATED LOGIC (Issue #2196): - - Uses regex patterns to detect STRUCTURED next steps (bulleted lists) - - Handles negation ("no next steps", "no remaining work") - - Ignores status observations ("CI pending", "waiting for") - - Prevents false positives on completion statements - - INVERTED LOGIC: If the agent mentions concrete next steps in structured format, - work is incomplete. Simple keywords without structure are ignored to prevent - false positives. - - Args: - transcript: List of message dictionaries - session_id: Session identifier - - Returns: - True if NO next steps found (work is complete) - False if next steps ARE found (work is incomplete - should continue) - """ - # Structured next steps patterns: keywords followed by bulleted/numbered lists - # Pattern structure: (keyword) + colon + newline + bullet/number marker - # Examples: "Next steps:\n- Fix bugs", "TODO:\n1. Test", "Remaining:\n• Deploy" - concrete_next_steps_patterns = [ - r"(next steps?|remaining|todo|outstanding|still need to):\s*[\r\n]+\s*[-•*\d.]", - ] - - # Negation patterns indicate completion - negation_patterns = [ - r"no\s+(next\s+steps?|remaining|outstanding|todo)", - r"(next\s+steps?|remaining|outstanding|todo)\s+(?:are\s+)?(?:none|empty|complete)", - r"all\s+(?:done|complete|finished)", - r"nothing\s+(?:left|remaining|outstanding)", - ] - - # Check RECENT assistant messages (last 10) for structured next steps - recent_messages = [m for m in transcript[-20:] if m.get("type") == "assistant"][-10:] - - for msg in reversed(recent_messages): - content = msg.get("message", {}).get("content", []) - if isinstance(content, list): - for block in content: - if isinstance(block, dict) and block.get("type") == "text": - text = str(block.get("text", "")) - - # First check for negation patterns (completion statements) - # These should PASS the check (return True) - negation_matched = False - for pattern in negation_patterns: - if re.search(pattern, text, re.IGNORECASE): - self._log( - "Completion statement found: negation pattern matched", - "INFO", - ) - negation_matched = True - break - - # Skip structured detection for this message if negation matched - if negation_matched: - continue - - # Check for STRUCTURED next steps (bulleted/numbered lists) - # These indicate CONCRETE remaining work - for pattern in concrete_next_steps_patterns: - if re.search(pattern, text, re.IGNORECASE): - # Before flagging, check if ALL bullet items are - # user-handoff or deferred-to-issue patterns - handoff_patterns = [ - r"wait\s+for\s+(ci|review|approval|merge)", - r"(user|you)\s+(should|can|may|need to)", - r"filed\s+(as|in)\s+#", - r"tracked\s+in\s+#", - r"when\s+ci\s+passes", - r"pr\s+ready\s+for\s+review", - r"ready\s+for\s+(review|merge|approval)", - r"waiting\s+for\s+(review|approval|ci|merge)", - ] - text_lower = text.lower() - is_handoff = any( - re.search(hp, text_lower) - for hp in handoff_patterns - ) - if is_handoff: - self._log( - "Structured list detected but contains handoff/deferred items - treating as complete", - "INFO", - ) - continue # Skip this match, not real remaining work - self._log( - f"Structured next steps found: pattern '{pattern}' - agent should continue", - "INFO", - ) - return False # Work is INCOMPLETE (concrete next steps exist) - - # No structured next steps found - work is complete - return True - - def _check_docs_organization(self, transcript: list[dict], session_id: str) -> bool: - """Check if investigation/session docs are organized properly. - - Verifies documentation is in correct directories. - - Args: - transcript: List of message dictionaries - session_id: Session identifier - - Returns: - True if docs properly organized, False otherwise - """ - # Check for doc files created in wrong locations - for msg in transcript: - if msg.get("type") == "assistant" and "message" in msg: - content = msg["message"].get("content", []) - if isinstance(content, list): - for block in content: - if isinstance(block, dict) and block.get("type") == "tool_use": - if block.get("name") == "Write": - file_path = block.get("input", {}).get("file_path", "") - - # Check for investigation/session docs in wrong places - if any( - pattern in file_path.lower() - for pattern in ["investigation", "session", "log"] - ): - # Should be in .claude/runtime or .claude/docs - if ".claude" not in file_path: - return False - - return True - - def _check_investigation_docs(self, transcript: list[dict], session_id: str) -> bool: - """Check if investigation findings were documented. - - Ensures exploration work is captured in persistent documentation. - - Args: - transcript: List of message dictionaries - session_id: Session identifier - - Returns: - True if investigation documented, False if missing - """ - # Look for investigation indicators - investigation_keywords = [ - "investigate", - "investigation", - "explore", - "exploration", - "research", - "analyze", - "analyse", - "analysis", - "findings", - ] - - has_investigation = False - for msg in transcript: - if msg.get("type") == "user": - content = str(msg.get("message", {}).get("content", "")).lower() - if any(keyword in content for keyword in investigation_keywords): - has_investigation = True - break - - if not has_investigation: - return True # No investigation, docs not needed - - # Check for documentation of findings - doc_created = False - for msg in transcript: - if msg.get("type") == "assistant" and "message" in msg: - content = msg["message"].get("content", []) - if isinstance(content, list): - for block in content: - if isinstance(block, dict) and block.get("type") == "tool_use": - if block.get("name") == "Write": - file_path = block.get("input", {}).get("file_path", "").lower() - if any( - pattern in file_path for pattern in [".md", "readme", "doc"] - ): - doc_created = True - break - - return doc_created - - def _check_shortcuts(self, transcript: list[dict], session_id: str) -> bool: - """Check if any quality shortcuts were taken. - - Identifies compromises like skipped error handling or incomplete validation. - - Args: - transcript: List of message dictionaries - session_id: Session identifier - - Returns: - True if no shortcuts, False if compromises detected - """ - # Look for shortcut indicators in code - shortcut_patterns = [ - r"\bpass\b.*#.*\blater\b", - r"#.*\bhack\b", - r"#.*\bworkaround\b", - r"#.*\btemporary\b", - r"#.*\bfix\b.*\blater\b", - ] - - for msg in transcript: - if msg.get("type") == "assistant" and "message" in msg: - content = msg["message"].get("content", []) - if isinstance(content, list): - for block in content: - if isinstance(block, dict) and block.get("type") == "tool_use": - tool_name = block.get("name", "") - if tool_name in ["Write", "Edit"]: - tool_input = block.get("input", {}) - content_to_check = str(tool_input.get("content", "")) + str( - tool_input.get("new_string", "") - ) - - # Check for shortcut patterns - for pattern in shortcut_patterns: - if re.search(pattern, content_to_check, re.IGNORECASE): - return False - - return True - - def _check_interactive_testing(self, transcript: list[dict], session_id: str) -> bool: - """Check if agent tested interactively beyond automated tests. - - Looks for manual verification, edge case testing, UI validation. - - Args: - transcript: List of message dictionaries - session_id: Session identifier - - Returns: - True if interactive testing done, False if only automated tests - """ - # Look for interactive testing indicators in assistant messages - interactive_keywords = [ - "manually tested", - "manually verified", - "tried it", - "verified the output", - "checked the result", - "confirmed it works", - "validated the behavior", - "tested end-to-end", - "ran the command", - "tested with real", - ] - - for msg in transcript: - if msg.get("type") == "assistant": - content = msg.get("message", {}).get("content", []) - if isinstance(content, list): - for block in content: - if isinstance(block, dict) and block.get("type") == "text": - text = str(block.get("text", "")).lower() - if any(keyword in text for keyword in interactive_keywords): - return True - - # Also accept if automated tests show a substantial passing count. - # Use regex to find patterns like "N passed" or "N tests passed" - # instead of naively counting occurrences of "passed" and "ok". - for msg in transcript: - if msg.get("type") == "tool_result": - output = str(msg.get("message", {}).get("content", "")) - # Match pytest-style "N passed" or "N tests passed" - match = re.search(r"(\d+)\s+passed", output, re.IGNORECASE) - if match: - count = int(match.group(1)) - if count >= 10: - return True - - return False - - def _check_unrelated_changes(self, transcript: list[dict], session_id: str) -> bool: - """Check if there are unrelated changes in PR. - - Detects scope creep by checking if files span too many unrelated - top-level directories. A focused change should touch files in 1-3 - related directories. Touching 6+ distinct top-level directories - suggests scope creep. - - Previous heuristic (>20 files = scope creep) was replaced because - file count has no correlation with relatedness — a legitimate refactor - can touch 50 files in one module while a 5-file change can span - unrelated areas. - - Args: - transcript: List of message dictionaries - session_id: Session identifier - - Returns: - True if changes appear focused, False if too scattered - """ - # Collect distinct top-level project directories of modified files - top_dirs = set() - project_root_str = str(self.project_root) - - for msg in transcript: - if msg.get("type") == "assistant" and "message" in msg: - content = msg["message"].get("content", []) - if isinstance(content, list): - for block in content: - if isinstance(block, dict) and block.get("type") == "tool_use": - if block.get("name") in ["Write", "Edit"]: - file_path = block.get("input", {}).get("file_path", "") - if not file_path: - continue - # Convert to project-relative path - try: - rel = os.path.relpath(file_path, project_root_str) - except ValueError: - continue # Different drives on Windows - parts = rel.split(os.sep) - # Skip paths outside project (.. prefix) - if parts and parts[0] != ".." and len(parts) >= 2: - top_dirs.add(parts[0]) - - # 6+ distinct top-level project directories suggests scattered changes - if len(top_dirs) >= 6: - return False - - return True - - def _check_root_pollution(self, transcript: list[dict], session_id: str) -> bool: - """Check if PR polluted project root with new files. - - Flags new top-level files that should be in subdirectories. - - Args: - transcript: List of message dictionaries - session_id: Session identifier - - Returns: - True if no root pollution, False if new top-level files added - """ - # Check for new files in project root - for msg in transcript: - if msg.get("type") == "assistant" and "message" in msg: - content = msg["message"].get("content", []) - if isinstance(content, list): - for block in content: - if isinstance(block, dict) and block.get("type") == "tool_use": - if block.get("name") == "Write": - file_path = block.get("input", {}).get("file_path", "") - - # Check if file is in root (only one path component) - path_parts = file_path.strip("/").split("/") - if len(path_parts) == 1: - # New file in root - check if it's acceptable - filename = path_parts[0].lower() - acceptable_root_files = [ - "readme", - "license", - "makefile", - "dockerfile", - ".gitignore", - ".gitattributes", - ".dockerignore", - ".editorconfig", - ".env.example", - "setup.py", - "setup.cfg", - "pyproject.toml", - "requirements.txt", - "package.json", - "tsconfig.json", - "cargo.toml", - "go.mod", - "docker-compose", - "justfile", - "claude.md", - ".pre-commit", - "conftest.py", - "pytest.ini", - "manifest.in", - ] - - if not any( - acceptable in filename - for acceptable in acceptable_root_files - ): - return False - - return True - - def _check_pr_description(self, transcript: list[dict], session_id: str) -> bool: - """Check if PR description is clear and complete. - - Verifies PR has summary, test plan, and context. - - Args: - transcript: List of message dictionaries - session_id: Session identifier - - Returns: - True if PR description adequate, False if missing or incomplete - """ - # Look for PR creation (gh pr create) - pr_created = False - pr_body = "" - - for msg in transcript: - if msg.get("type") == "assistant" and "message" in msg: - content = msg["message"].get("content", []) - if isinstance(content, list): - for block in content: - if isinstance(block, dict) and block.get("type") == "tool_use": - if block.get("name") == "Bash": - command = block.get("input", {}).get("command", "") - if "gh pr create" in command: - pr_created = True - pr_body = command.lower() - - if not pr_created: - return True # No PR, check not applicable - - # Check PR body for required sections - required_sections = ["summary", "test", "plan"] - has_all_sections = all(section in pr_body for section in required_sections) - - return has_all_sections - - def _check_review_responses(self, transcript: list[dict], session_id: str) -> bool: - """Check if PR review comments were addressed. - - Only triggers when there is concrete evidence of actual PR review activity - (gh pr review, gh api for PR comments, reviewer requested changes). Does NOT - trigger on the generic word 'review' in user messages, which caused widespread - false positives. - - Args: - transcript: List of message dictionaries - session_id: Session identifier - - Returns: - True if reviews addressed or no PR reviews exist, False if unaddressed - """ - # Look for concrete PR review signals in tool calls, not generic keywords. - # These indicate actual GitHub PR review comments exist. - pr_review_command_patterns = [ - "gh pr review", - "requested changes", - "changes_requested", - "reviewer comment", - "review comment", - ] - has_pr_reviews = False - - for msg in transcript: - # Check Bash tool calls for PR review commands - if msg.get("type") == "assistant" and "message" in msg: - content = msg["message"].get("content", []) - if isinstance(content, list): - for block in content: - if isinstance(block, dict) and block.get("type") == "tool_use": - if block.get("name") == "Bash": - command = block.get("input", {}).get("command", "").lower() - if any(p in command for p in pr_review_command_patterns): - has_pr_reviews = True - break - # Narrow gh api match: only review/comment endpoints - if "gh api repos/" in command and ( - "/reviews" in command or "/comments" in command - ): - has_pr_reviews = True - break - # Check tool results for review-related output - if msg.get("type") == "tool_result": - output = str(msg.get("message", {}).get("content", "")).lower() - if "requested changes" in output or "changes_requested" in output: - has_pr_reviews = True - - if not has_pr_reviews: - return True # No PR reviews to address - - # Look for response indicators showing reviews were handled - response_keywords = ["addressed", "fixed", "updated", "resolved", "pushed"] - for msg in transcript: - if msg.get("type") == "assistant": - content = msg.get("message", {}).get("content", []) - if isinstance(content, list): - for block in content: - if isinstance(block, dict) and block.get("type") == "text": - text = str(block.get("text", "")).lower() - if any(keyword in text for keyword in response_keywords): - return True - - return False - - def _check_branch_rebase(self, transcript: list[dict], session_id: str) -> bool: - """Check if branch needs rebase on main. - - Verifies branch is up to date with main. - - Args: - transcript: List of message dictionaries - session_id: Session identifier - - Returns: - True if branch is current, False if needs rebase - """ - # Look for git status or branch checks - for msg in transcript: - if msg.get("type") == "tool_result": - output = str(msg.get("message", {}).get("content", "")).lower() - - # Check for "behind" indicators - if "behind" in output or "diverged" in output: - return False - - # Check for "up to date" indicators - if "up to date" in output or "up-to-date" in output: - return True - - # Default to satisfied if no information - return True - - def _check_ci_precommit_mismatch(self, transcript: list[dict], session_id: str) -> bool: - """Check for CI failures contradicting passing pre-commit. - - Identifies divergence between local pre-commit and CI checks. - - Args: - transcript: List of message dictionaries - session_id: Session identifier - - Returns: - True if no mismatch, False if divergence detected - """ - # Look for pre-commit passing - precommit_passed = False - ci_failed = False - - for msg in transcript: - if msg.get("type") in ["assistant", "tool_result"]: - content_str = str(msg.get("message", {})).lower() - - # Check for pre-commit success - if "pre-commit" in content_str or "precommit" in content_str: - if "passed" in content_str or "success" in content_str: - precommit_passed = True - - # Check for CI failure - if "ci" in content_str or "github actions" in content_str: - if "failed" in content_str or "failing" in content_str: - ci_failed = True - - # If both conditions met, there's a mismatch - if precommit_passed and ci_failed: - return False - - return True - - # ======================================================================== - # Progress Tracking - # ======================================================================== - - def _emit_progress( - self, - progress_callback: Callable | None, - event_type: str, - message: str, - details: dict | None = None, - ) -> None: - """Emit progress event to callback if provided. - - Fail-safe design: Never raises exceptions that would break checker. - - Args: - progress_callback: Optional callback function - event_type: Event type (start/category/consideration/complete) - message: Progress message - details: Optional event details - """ - if progress_callback is None: - return - - try: - progress_callback(event_type, message, details) - except Exception as e: - # Fail-safe: Log but never raise - self._log(f"Progress callback error: {e}", "WARNING") - - # ======================================================================== - # Output Generation - # ======================================================================== - - def _generate_continuation_prompt( - self, - analysis: ConsiderationAnalysis, - transcript: list[dict] | None = None, - turn_state: Optional["PowerSteeringTurnState"] = None, - addressed_concerns: dict[str, str] | None = None, - user_claims: list[str] | None = None, - ) -> str: - """Generate actionable continuation prompt with turn-awareness and evidence. - - Enhanced to show: - - Specific incomplete TODO items that need completion - - Specific "next steps" mentioned that indicate incomplete work - - User claims vs actual evidence gap - - Persistent failures across blocks - - Escalating severity on repeated blocks - - Args: - analysis: Analysis results with failed considerations - transcript: Optional transcript for extracting specific incomplete items - turn_state: Optional turn state for turn-aware prompting - addressed_concerns: Optional dict of concerns addressed in this turn - user_claims: Optional list of completion claims detected from user/agent - - Returns: - Formatted continuation prompt with evidence and turn information - """ - blocks = turn_state.consecutive_blocks if turn_state else 1 - threshold = PowerSteeringTurnState.MAX_CONSECUTIVE_BLOCKS if TURN_STATE_AVAILABLE else 10 - - # Extract specific incomplete items for detailed guidance - incomplete_todos = [] - next_steps_mentioned = [] - if transcript: - incomplete_todos = self._extract_incomplete_todos(transcript) - next_steps_mentioned = self._extract_next_steps_mentioned(transcript) - - # Escalating tone based on block count - if blocks == 1: - severity_header = "First check" - elif blocks <= threshold // 2: - severity_header = f"Block {blocks}/{threshold}" - else: - severity_header = ( - f"**CRITICAL: Block {blocks}/{threshold}** - Auto-approval approaching" - ) - - prompt_parts = [ - "", - "=" * 60, - f"POWER-STEERING Analysis - {severity_header}", - "=" * 60, - "", - ] - - # CRITICAL: Show specific incomplete items that MUST be completed - if incomplete_todos or next_steps_mentioned: - prompt_parts.append("**INCOMPLETE WORK DETECTED - YOU MUST CONTINUE:**") - prompt_parts.append("") - - if incomplete_todos: - prompt_parts.append("**Incomplete TODO Items** (you MUST complete these):") - for todo in incomplete_todos: - prompt_parts.append(f" • {todo}") - prompt_parts.append("") - - if next_steps_mentioned: - prompt_parts.append("**Next Steps You Mentioned** (you MUST complete these):") - for step in next_steps_mentioned: - prompt_parts.append(f" • {step}") - prompt_parts.append("") - - prompt_parts.append( - "**ACTION REQUIRED**: Continue working on the items above. " - "Do NOT stop until ALL todos are completed and NO next steps remain." - ) - prompt_parts.append("") - - # Show progress if addressing concerns - if addressed_concerns: - prompt_parts.append("**Progress Since Last Block** (recognized from your actions):") - for concern_id, how_addressed in addressed_concerns.items(): - prompt_parts.append(f" + {concern_id}: {how_addressed}") - prompt_parts.append("") - - # Show user claims vs evidence gap - if user_claims: - prompt_parts.append("**Completion Claims Detected:**") - prompt_parts.append("You or Claude claimed the following:") - for claim in user_claims[:3]: # Limit to 3 claims - prompt_parts.append(f" - {claim[:100]}...") # Truncate long claims - prompt_parts.append("") - prompt_parts.append( - "**However, the checks below still failed.** " - "Please provide specific evidence these checks pass, or complete the remaining work." - ) - prompt_parts.append("") - - # Show persistent failures if repeated blocks - if turn_state and blocks > 1: - persistent = turn_state.get_persistent_failures() - repeatedly_failed = {k: v for k, v in persistent.items() if v > 1} - - if repeatedly_failed: - prompt_parts.append("**Persistent Issues** (failed multiple times):") - for cid, count in sorted(repeatedly_failed.items(), key=lambda x: -x[1]): - prompt_parts.append(f" - {cid}: Failed {count} times") - prompt_parts.append("") - prompt_parts.append("These issues require immediate attention.") - prompt_parts.append("") - - # Show current failures grouped by category with evidence - prompt_parts.append("**Current Failures:**") - prompt_parts.append("") - - by_category = analysis.group_by_category() - - for category, failed in by_category.items(): - # Filter out addressed concerns - remaining_failures = [ - r - for r in failed - if not addressed_concerns or r.consideration_id not in addressed_concerns - ] - if remaining_failures: - prompt_parts.append(f"### {category}") - for result in remaining_failures: - prompt_parts.append(f" - **{result.consideration_id}**: {result.reason}") - - # Show evidence if available from turn state - if turn_state and turn_state.block_history: - current_block = turn_state.get_previous_block() - if current_block: - for ev in current_block.failed_evidence: - if ev.consideration_id == result.consideration_id: - if ev.evidence_quote: - prompt_parts.append(f" Evidence: {ev.evidence_quote}") - if ev.was_claimed_complete: - prompt_parts.append( - " **Note**: This was claimed complete but check still fails" - ) - prompt_parts.append("") - - # Call to action - prompt_parts.append("**Next Steps:**") - prompt_parts.append("1. Complete the failed checks listed above") - prompt_parts.append("2. Provide specific evidence that checks now pass") - remaining = threshold - blocks - prompt_parts.append(f"3. Or continue working ({remaining} more blocks until auto-approval)") - prompt_parts.append("") - - # Add acknowledgment hint if nearing auto-approve threshold - if blocks >= threshold // 2: - prompt_parts.append( - "**Tip**: If checks are genuinely complete, say 'I acknowledge these concerns' " - "or create SESSION_SUMMARY.md to indicate intentional completion." - ) - prompt_parts.append("") - - prompt_parts.extend( - [ - "To disable power-steering immediately:", - " mkdir -p .claude/runtime/power-steering && touch .claude/runtime/power-steering/.disabled", - ] - ) - - return "\n".join(prompt_parts) - - def _generate_summary( - self, transcript: list[dict], analysis: ConsiderationAnalysis, session_id: str - ) -> str: - """Generate session summary for successful completion. - - Args: - transcript: List of message dictionaries - analysis: Analysis results - session_id: Session identifier - - Returns: - Formatted summary - """ - summary_parts = [ - "# Power-Steering Session Summary", - "", - f"**Session ID**: {session_id}", - f"**Completed**: {datetime.now().isoformat()}", - "", - "## Status", - "All critical checks passed - session complete.", - "", - "## Considerations Verified", - ] - - # List all satisfied checks - for consideration in self.considerations: - result = analysis.results.get(consideration["id"]) - if result and result.satisfied: - summary_parts.append(f"- ✓ {consideration['question']}") - - summary_parts.append("") - summary_parts.append("---") - summary_parts.append("Generated by Power-Steering Mode (Phase 2)") - - return "\n".join(summary_parts) - - def _write_summary(self, session_id: str, summary: str) -> None: - """Write summary to file. - - Args: - session_id: Session identifier - summary: Summary content - """ - try: - summary_dir = self.runtime_dir / session_id - summary_path = summary_dir / "summary.md" - _write_with_retry(summary_path, summary, mode="w") - summary_path.chmod(0o644) # Owner read/write, others read - except OSError: - pass # Fail-open: Continue even if summary writing fails - - def _check_with_transcript_list( - self, transcript: list[dict], session_id: str - ) -> PowerSteeringResult: - """Testing interface: Check with transcript list instead of file path. - - Args: - transcript: Transcript as list of message dicts - session_id: Session identifier - - Returns: - PowerSteeringResult with compaction context and considerations - """ - # Initialize compaction context - compaction_context = CompactionContext() - - # Check if compaction handling is enabled - compaction_enabled = self._is_consideration_enabled("compaction_handling") - - # Run compaction validation - considerations = [] - if COMPACTION_AVAILABLE and compaction_enabled: - try: - validator = CompactionValidator(self.project_root) - validation_result = validator.validate(transcript, session_id) - compaction_context = validation_result.compaction_context - - # Create consideration result - compaction_check = CheckerResult( - consideration_id="compaction_handling", - satisfied=validation_result.passed, - reason="; ".join(validation_result.warnings) - if validation_result.warnings - else "No compaction issues detected", - severity="warning", - recovery_steps=validation_result.recovery_steps, - executed=True, - ) - - considerations.append(compaction_check) - except Exception as e: - # Fail-open: Log error but don't block - self._log(f"Compaction validation error: {e}", "WARNING") - compaction_check = CheckerResult( - consideration_id="compaction_handling", - satisfied=True, # Fail-open - reason="Compaction validation skipped due to error", - severity="warning", - executed=True, - ) - considerations.append(compaction_check) - elif not compaction_enabled: - # Add disabled marker - compaction_check = CheckerResult( - consideration_id="compaction_handling", - satisfied=True, - reason="Compaction handling disabled", - severity="warning", - executed=False, - ) - considerations.append(compaction_check) - - # Return result - return PowerSteeringResult( - decision="approve", - reasons=["test_mode"], - compaction_context=compaction_context, - considerations=considerations, - ) - - def _is_consideration_enabled(self, consideration_id: str) -> bool: - """Check if a consideration is enabled in considerations.yaml. - - Args: - consideration_id: ID of consideration to check - - Returns: - True if enabled or not found (default enabled), False if explicitly disabled - """ - try: - considerations_path = ( - self.project_root / ".claude" / "tools" / "amplihack" / "considerations.yaml" - ) - if not considerations_path.exists(): - return True # Default enabled - - import yaml - - with open(considerations_path) as f: - considerations = yaml.safe_load(f) - - if not considerations: - return True - - for consideration in considerations: - if consideration.get("id") == consideration_id: - return consideration.get("enabled", True) - - return True # Not found = default enabled - except Exception as e: - self._log( - f"Could not check consideration enabled state, defaulting to enabled: {e}", "DEBUG" - ) - return True # Fail-open - - def _check_compaction_handling(self, transcript: list[dict], session_id: str) -> bool: - """Consideration checker for compaction validation. - - Called by consideration framework. Returns True if compaction - was handled appropriately or didn't occur. - - Args: - transcript: Full conversation transcript - session_id: Session identifier - - Returns: - True if no compaction or validation passed, False if failed - """ - if not COMPACTION_AVAILABLE: - return True # Fail-open if validator not available - - try: - validator = CompactionValidator(self.project_root) - result = validator.validate(transcript, session_id) - return result.passed - except Exception as e: - self._log(f"Compaction validation error: {e}", "WARNING") - return True # Fail-open on errors - - def _log(self, message: str, level: str = "INFO") -> None: - """Log message to power-steering log file. - - Args: - message: Message to log - level: Log level (INFO, WARNING, ERROR) - """ - try: - log_file = self.runtime_dir / "power_steering.log" - timestamp = datetime.now().isoformat() - - # Create with restrictive permissions if it doesn't exist - is_new = not log_file.exists() - - # Use retry-enabled write for cloud sync resilience - log_entry = f"[{timestamp}] {level}: {message}\n" - _write_with_retry(log_file, log_entry, mode="a") - - # Set permissions on new files - if is_new: - log_file.chmod(0o600) # Owner read/write only for security - except OSError: - pass # Fail silently on logging errors - - -# ============================================================================ -# Module Interface -# ============================================================================ - - -def check_session( - transcript_path: Path, session_id: str, project_root: Path | None = None -) -> PowerSteeringResult: - """Convenience function to check session completeness. - - Args: - transcript_path: Path to transcript JSONL file - session_id: Session identifier - project_root: Project root (auto-detected if None) - - Returns: - PowerSteeringResult with decision - """ - checker = PowerSteeringChecker(project_root) - return checker.check(transcript_path, session_id) - - -def is_disabled(project_root: Path | None = None) -> bool: - """Standalone function to check if power-steering is disabled. - - This function exists primarily for testing purposes, allowing tests - to check the disabled status without creating a full PowerSteeringChecker - instance. - - Args: - project_root: Project root directory (auto-detected if None) - - Returns: - True if power-steering is disabled, False if enabled - """ - try: - checker = PowerSteeringChecker(project_root) - return checker._is_disabled() - except Exception: - # Fail-open: If checker creation fails, assume not disabled - return False - - -if __name__ == "__main__": - # For testing: Allow running directly - if len(sys.argv) < 3: - print("Usage: power_steering_checker.py ") - sys.exit(1) - - transcript_path = Path(sys.argv[1]) - session_id = sys.argv[2] - - result = check_session(transcript_path, session_id) - print(json.dumps({"decision": result.decision, "reasons": result.reasons}, indent=2)) diff --git a/amplifier-bundle/tools/amplihack/hooks/power_steering_diagnostics.py b/amplifier-bundle/tools/amplihack/hooks/power_steering_diagnostics.py deleted file mode 100755 index 63366e50c..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/power_steering_diagnostics.py +++ /dev/null @@ -1,302 +0,0 @@ -#!/usr/bin/env python3 -""" -Diagnostic logging and infinite loop detection for power-steering. - -This module provides instrumentation for debugging power-steering state -management issues, including diagnostic logging in JSONL format and -infinite loop pattern detection. - -Philosophy: -- Ruthlessly Simple: Single-purpose diagnostic utilities -- Fail-Open: Logging failures never block the system -- Zero-BS: All functions work or don't exist -- Standard library only - -Public API (the "studs"): - DiagnosticLogger: JSONL logging for power-steering events - detect_infinite_loop: Detect stall, oscillation, high failure patterns -""" - -import json -import os -from collections.abc import Callable -from dataclasses import dataclass -from datetime import datetime -from pathlib import Path - -__all__ = ["DiagnosticLogger", "detect_infinite_loop", "InfiniteLoopDiagnostics"] - - -@dataclass -class InfiniteLoopDiagnostics: - """Result of infinite loop detection analysis. - - Attributes: - stall_detected: True if counter stuck at same value - stall_value: Value counter is stuck at (if stalled) - stall_count: Number of times same value repeated - oscillation_detected: True if A → B → A → B pattern found - oscillation_values: Values oscillating between (if oscillating) - high_failure_rate: True if write failure rate > 30% - write_failure_rate: Actual write failure rate (0.0 to 1.0) - health_status: Overall health ("healthy", "warning", "critical") - """ - - stall_detected: bool = False - stall_value: int | None = None - stall_count: int = 0 - oscillation_detected: bool = False - oscillation_values: list[int] = None - high_failure_rate: bool = False - write_failure_rate: float = 0.0 - health_status: str = "healthy" - - def __post_init__(self): - if self.oscillation_values is None: - self.oscillation_values = [] - - -class DiagnosticLogger: - """JSONL diagnostic logger for power-steering events. - - Logs events to .claude/runtime/power-steering/{session_id}/diagnostic.jsonl - in JSON Lines format for easy debugging and analysis. - - Philosophy: - - Fail-open: Logging failures never block operations - - Standard library only: No external dependencies - - JSONL format: One event per line, easy to parse - """ - - def __init__( - self, - project_root: Path, - session_id: str, - log_callback: Callable[[str], None] | None = None, - ): - """Initialize diagnostic logger. - - Args: - project_root: Project root directory - session_id: Current session identifier - log_callback: Optional callback for logging messages - """ - self.project_root = project_root - self.session_id = session_id - self.log_callback = log_callback or (lambda msg: None) - - def get_log_file_path(self) -> Path: - """Get path to diagnostic log file.""" - return ( - self.project_root - / ".claude" - / "runtime" - / "power-steering" - / self.session_id - / "diagnostic.jsonl" - ) - - def log_event( - self, - event_type: str, - details: dict | None = None, - ) -> None: - """Log diagnostic event in JSONL format. - - Fail-open: Errors during logging are caught and don't block. - - Args: - event_type: Type of event (state_write, state_read, etc.) - details: Additional event details - """ - try: - log_file = self.get_log_file_path() - log_file.parent.mkdir(parents=True, exist_ok=True) - - event = { - "timestamp": datetime.now().isoformat(), - "event": event_type, - "pid": os.getpid(), - "details": details or {}, - } - - # Append to JSONL file (one JSON object per line) - with open(log_file, "a") as f: - f.write(json.dumps(event) + "\n") - - except OSError as e: - # Fail-open: Log error but don't raise - self.log_callback(f"Failed to write diagnostic log: {e}") - - def log_state_write_attempt( - self, - turn_count: int, - attempt_number: int = 1, - ) -> None: - """Log state write attempt.""" - self.log_event( - "state_write_attempt", - { - "turn_count": turn_count, - "attempt": attempt_number, - }, - ) - - def log_state_write_success( - self, - turn_count: int, - attempt_number: int = 1, - ) -> None: - """Log successful state write.""" - self.log_event( - "state_write_success", - { - "turn_count": turn_count, - "attempt": attempt_number, - }, - ) - - def log_state_write_failure( - self, - turn_count: int, - attempt_number: int, - error: str, - ) -> None: - """Log failed state write.""" - self.log_event( - "state_write_failure", - { - "turn_count": turn_count, - "attempt": attempt_number, - "error": error, - }, - ) - - def log_state_read( - self, - turn_count: int, - ) -> None: - """Log state read.""" - self.log_event( - "state_read", - {"turn_count": turn_count}, - ) - - def log_verification_failed( - self, - expected_count: int, - actual_count: int, - ) -> None: - """Log verification failure.""" - self.log_event( - "verification_failed", - { - "expected_turn_count": expected_count, - "actual_turn_count": actual_count, - }, - ) - - def log_monotonicity_violation( - self, - old_count: int, - new_count: int, - ) -> None: - """Log monotonicity violation.""" - self.log_event( - "monotonicity_violation", - { - "previous_turn_count": old_count, - "new_turn_count": new_count, - }, - ) - - -def detect_infinite_loop( - log_file: Path, - stall_threshold: int = 10, - oscillation_window: int = 4, -) -> InfiniteLoopDiagnostics: - """Detect infinite loop patterns from diagnostic log. - - Analyzes diagnostic log to detect three patterns: - 1. Counter stall: Same value repeated N times - 2. Oscillation: A → B → A → B pattern - 3. High failure rate: >30% write failures - - Args: - log_file: Path to diagnostic.jsonl file - stall_threshold: Number of repeats to consider stall - oscillation_window: Window size for oscillation detection - - Returns: - InfiniteLoopDiagnostics with detection results - """ - diagnostics = InfiniteLoopDiagnostics() - - try: - if not log_file.exists(): - return diagnostics - - # Parse log entries - entries = [] - with open(log_file) as f: - for line in f: - try: - entries.append(json.loads(line)) - except json.JSONDecodeError: - continue # Skip malformed lines - - if not entries: - return diagnostics - - # Extract turn counts from write events - turn_counts = [] - write_attempts = 0 - write_failures = 0 - - for entry in entries: - event = entry.get("event", "") - details = entry.get("details", {}) - - if event == "state_write_attempt": - write_attempts += 1 - elif event == "state_write_success": - turn_count = details.get("turn_count") - if turn_count is not None: - turn_counts.append(turn_count) - elif event == "state_write_failure": - write_failures += 1 - - # Pattern 1: Counter stall detection - if len(turn_counts) >= stall_threshold: - last_n = turn_counts[-stall_threshold:] - if len(set(last_n)) == 1: - diagnostics.stall_detected = True - diagnostics.stall_value = last_n[0] - diagnostics.stall_count = stall_threshold - - # Pattern 2: Oscillation detection - if len(turn_counts) >= oscillation_window: - last_n = turn_counts[-oscillation_window:] - if len(set(last_n)) == 2 and last_n[0] == last_n[2] and last_n[1] == last_n[3]: - diagnostics.oscillation_detected = True - diagnostics.oscillation_values = list(set(last_n)) - - # Pattern 3: High failure rate - if write_attempts > 0: - diagnostics.write_failure_rate = write_failures / write_attempts - diagnostics.high_failure_rate = diagnostics.write_failure_rate > 0.30 - - # Determine overall health status - if diagnostics.stall_detected or diagnostics.oscillation_detected: - diagnostics.health_status = "critical" - elif diagnostics.high_failure_rate: - diagnostics.health_status = "warning" - else: - diagnostics.health_status = "healthy" - - except OSError: - # Fail-open: Return empty diagnostics on error - pass - - return diagnostics diff --git a/amplifier-bundle/tools/amplihack/hooks/power_steering_state.py b/amplifier-bundle/tools/amplihack/hooks/power_steering_state.py deleted file mode 100755 index 1ee4b53a8..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/power_steering_state.py +++ /dev/null @@ -1,960 +0,0 @@ -#!/usr/bin/env python3 -""" -Turn-aware state management for power-steering with delta analysis. - -Manages session state including turn counts, consecutive blocks, detailed -failure evidence, and delta-based transcript analysis for intelligent -turn-aware decisions. - -Philosophy: -- Ruthlessly Simple: Single-purpose module with clear contract -- Fail-Open: Never block users due to bugs - always allow stop on errors -- Zero-BS: No stubs, every function works or doesn't exist -- Modular: Self-contained brick with standard library only - -Public API (the "studs"): - FailureEvidence: Detailed evidence of why a consideration failed - BlockSnapshot: Full snapshot of a block event with evidence - PowerSteeringTurnState: Dataclass holding turn state - TurnStateManager: Manages loading/saving/incrementing turn state - DeltaAnalyzer: Analyzes delta transcript since last block -""" - -import json -import os -import tempfile -from collections.abc import Callable -from dataclasses import dataclass, field -from datetime import datetime -from pathlib import Path -from typing import ClassVar - -from .fallback_heuristics import AddressedChecker - -__all__ = [ - "FailureEvidence", - "BlockSnapshot", - "PowerSteeringTurnState", - "TurnStateManager", - "DeltaAnalyzer", - "DeltaAnalysisResult", -] - - -@dataclass -class FailureEvidence: - """Detailed evidence of why a consideration failed. - - Stores not just the ID but the specific reason and evidence quote, - enabling the agent to understand exactly what went wrong. - - Attributes: - consideration_id: ID of the failed consideration - reason: Human-readable reason for failure - evidence_quote: Specific quote from transcript showing failure (if any) - timestamp: When this failure was detected - was_claimed_complete: True if user/agent claimed this was done - """ - - consideration_id: str - reason: str - evidence_quote: str | None = None - timestamp: str | None = None - was_claimed_complete: bool = False - - def __post_init__(self): - if self.timestamp is None: - self.timestamp = datetime.now().isoformat() - - def to_dict(self) -> dict: - """Serialize to dict.""" - return { - "consideration_id": self.consideration_id, - "reason": self.reason, - "evidence_quote": self.evidence_quote, - "timestamp": self.timestamp, - "was_claimed_complete": self.was_claimed_complete, - } - - @classmethod - def from_dict(cls, data: dict) -> "FailureEvidence": - """Deserialize from dict.""" - return cls( - consideration_id=data["consideration_id"], - reason=data["reason"], - evidence_quote=data.get("evidence_quote"), - timestamp=data.get("timestamp"), - was_claimed_complete=data.get("was_claimed_complete", False), - ) - - -@dataclass -class BlockSnapshot: - """Snapshot of a single block event with full context. - - Tracks not just what failed, but WHERE in the transcript we were - and WHY things failed with specific evidence. - - Attributes: - block_number: Which block this is (1-indexed) - timestamp: When the block occurred - transcript_index: Last message index in transcript at time of block - transcript_length: Total transcript length at time of block - failed_evidence: List of FailureEvidence objects (detailed failures) - user_claims_detected: List of claims user/agent made about completion - """ - - block_number: int - timestamp: str - transcript_index: int - transcript_length: int - failed_evidence: list[FailureEvidence] = field(default_factory=list) - user_claims_detected: list[str] = field(default_factory=list) - - def to_dict(self) -> dict: - """Serialize to dict.""" - return { - "block_number": self.block_number, - "timestamp": self.timestamp, - "transcript_index": self.transcript_index, - "transcript_length": self.transcript_length, - "failed_evidence": [ev.to_dict() for ev in self.failed_evidence], - "user_claims_detected": self.user_claims_detected, - } - - @classmethod - def from_dict(cls, data: dict) -> "BlockSnapshot": - """Deserialize from dict.""" - return cls( - block_number=data["block_number"], - timestamp=data["timestamp"], - transcript_index=data["transcript_index"], - transcript_length=data["transcript_length"], - failed_evidence=[ - FailureEvidence.from_dict(ev) for ev in data.get("failed_evidence", []) - ], - user_claims_detected=data.get("user_claims_detected", []), - ) - - -@dataclass -class PowerSteeringTurnState: - """Enhanced state tracking for turn-aware power-steering. - - Tracks how many turns have occurred in the session, consecutive - blocks (failed stop attempts), and detailed history with evidence - for intelligent turn-aware decisions and delta analysis. - - Attributes: - session_id: Unique identifier for the session - turn_count: Number of turns in the session - consecutive_blocks: Number of consecutive power-steering blocks - first_block_timestamp: ISO timestamp of first block in current sequence - last_block_timestamp: ISO timestamp of most recent block - block_history: Full snapshots of each block with evidence - last_analyzed_transcript_index: Track where we left off for delta analysis - """ - - session_id: str - turn_count: int = 0 - consecutive_blocks: int = 0 - first_block_timestamp: str | None = None - last_block_timestamp: str | None = None - block_history: list[BlockSnapshot] = field(default_factory=list) - last_analyzed_transcript_index: int = 0 - - # Maximum consecutive blocks before auto-approve triggers (increased from 3) - MAX_CONSECUTIVE_BLOCKS: ClassVar[int] = 10 - - def to_dict(self) -> dict: - """Convert state to dictionary for JSON serialization.""" - return { - "session_id": self.session_id, - "turn_count": self.turn_count, - "consecutive_blocks": self.consecutive_blocks, - "first_block_timestamp": self.first_block_timestamp, - "last_block_timestamp": self.last_block_timestamp, - "block_history": [snap.to_dict() for snap in self.block_history], - "last_analyzed_transcript_index": self.last_analyzed_transcript_index, - } - - @classmethod - def from_dict(cls, data: dict, session_id: str) -> "PowerSteeringTurnState": - """Create state from dictionary. - - Args: - data: Dictionary from JSON - session_id: Session ID to use (may override stored value) - - Returns: - PowerSteeringTurnState instance - """ - return cls( - session_id=session_id, - turn_count=data.get("turn_count", 0), - consecutive_blocks=data.get("consecutive_blocks", 0), - first_block_timestamp=data.get("first_block_timestamp"), - last_block_timestamp=data.get("last_block_timestamp"), - block_history=[BlockSnapshot.from_dict(snap) for snap in data.get("block_history", [])], - last_analyzed_transcript_index=data.get("last_analyzed_transcript_index", 0), - ) - - def get_previous_block(self) -> BlockSnapshot | None: - """Get the most recent block snapshot (if any).""" - return self.block_history[-1] if self.block_history else None - - def get_persistent_failures(self) -> dict[str, int]: - """Get considerations that have failed multiple times. - - Returns: - Dict mapping consideration_id -> number of times it failed - """ - failure_counts: dict[str, int] = {} - for snapshot in self.block_history: - for evidence in snapshot.failed_evidence: - cid = evidence.consideration_id - failure_counts[cid] = failure_counts.get(cid, 0) + 1 - return failure_counts - - def get_all_previous_failure_ids(self) -> list[str]: - """Get all consideration IDs that failed in previous blocks. - - Returns: - List of unique consideration IDs from all previous blocks - """ - seen: set = set() - result: list[str] = [] - for snapshot in self.block_history: - for evidence in snapshot.failed_evidence: - if evidence.consideration_id not in seen: - seen.add(evidence.consideration_id) - result.append(evidence.consideration_id) - return result - - -@dataclass -class DeltaAnalysisResult: - """Result of analyzing delta transcript since last block.""" - - new_content_addresses_failures: dict[str, str] # consideration_id -> evidence - new_claims_detected: list[str] # Claims user/agent made - new_content_summary: str # Brief summary of what happened in delta - - -class DeltaAnalyzer: - """Analyzes new transcript content since last block. - - This is the key component for turn-aware analysis - instead of - looking at the ENTIRE transcript each time, we look ONLY at - the delta (new content) and see if it addresses previous failures. - - NOTE: This class provides FALLBACK analysis using simple heuristics. - The primary path uses LLM-based analysis via claude_power_steering.py: - - analyze_claims_sync() for completion claim detection - - analyze_if_addressed_sync() for failure address checking - - This fallback exists for when Claude SDK is unavailable. - - Philosophy: - - Standard library only (no external deps) - - Fail-open (errors don't block user) - - Single responsibility (delta analysis only) - - LLM-first, heuristics as fallback - """ - - def __init__(self, log: Callable[[str], None] | None = None): - """Initialize delta analyzer. - - Args: - log: Optional logging callback - """ - self.log = log or (lambda msg: None) - self._fallback_checker = AddressedChecker() - - def analyze_delta( - self, - delta_messages: list[dict], - previous_failures: list[FailureEvidence], - ) -> DeltaAnalysisResult: - """Analyze new transcript content against previous failures. - - Args: - delta_messages: New transcript messages since last block - previous_failures: List of failures from previous block - - Returns: - DeltaAnalysisResult with what the delta addresses - """ - addressed: dict[str, str] = {} - claims: list[str] = [] - - # Extract all text from delta - delta_text = self._extract_all_text(delta_messages) - - # Detect claims - claims = self._detect_claims(delta_text) - - # Check if delta addresses each previous failure - for failure in previous_failures: - evidence = self._check_if_addressed( - failure, - delta_text, - delta_messages, - ) - if evidence: - addressed[failure.consideration_id] = evidence - - # Generate summary - summary = self._summarize_delta(delta_messages, addressed, claims) - - return DeltaAnalysisResult( - new_content_addresses_failures=addressed, - new_claims_detected=claims, - new_content_summary=summary, - ) - - def _extract_all_text(self, messages: list[dict]) -> str: - """Extract all text content from messages.""" - texts = [] - for msg in messages: - content = self._extract_message_content(msg) - if content: - texts.append(content) - return "\n".join(texts) - - def _extract_message_content(self, msg: dict) -> str: - """Extract text from a single message.""" - content = msg.get("content", msg.get("message", "")) - - if isinstance(content, str): - return content - - if isinstance(content, dict): - inner = content.get("content", "") - if isinstance(inner, str): - return inner - if isinstance(inner, list): - return self._extract_from_blocks(inner) - - if isinstance(content, list): - return self._extract_from_blocks(content) - - return "" - - def _extract_from_blocks(self, blocks: list) -> str: - """Extract text from content blocks.""" - texts = [] - for block in blocks: - if isinstance(block, dict): - if block.get("type") == "text": - texts.append(str(block.get("text", ""))) - return " ".join(texts) - - def _detect_claims(self, text: str) -> list[str]: - """Detect completion claims in text (FALLBACK - simple keyword matching). - - NOTE: This is a fallback method. The primary path uses LLM-based - analysis via analyze_claims_sync() in claude_power_steering.py. - - Args: - text: Text to search - - Returns: - List of detected claim strings with context - """ - claims = [] - text_lower = text.lower() - - # Simple keyword-based fallback (not regex) - claim_keywords = [ - "completed", - "finished", - "all done", - "tests passing", - "ci green", - "pr ready", - "workflow complete", - ] - - for keyword in claim_keywords: - if keyword in text_lower: - # Find keyword position and extract context - idx = text_lower.find(keyword) - start = max(0, idx - 50) - end = min(len(text), idx + len(keyword) + 50) - context = text[start:end].strip() - claim_text = f"...{context}..." - if claim_text not in claims: - claims.append(claim_text) - - if claims: - self.log(f"[Fallback] Detected {len(claims)} completion claims in delta") - - return claims - - def _check_if_addressed( - self, - failure: FailureEvidence, - delta_text: str, - delta_messages: list[dict], - ) -> str | None: - """Check if the delta addresses a specific failure. - - Uses heuristics based on consideration type to determine if - the new content shows the concern was addressed. - - Args: - failure: Previous failure to check - delta_text: All text from delta - delta_messages: Delta messages (for structured analysis) - - Returns: - Evidence string if addressed, None otherwise - """ - return self._fallback_checker.check_if_addressed( - consideration_id=failure.consideration_id, delta_text=delta_text - ) - - def _summarize_delta( - self, - messages: list[dict], - addressed: dict[str, str], - claims: list[str], - ) -> str: - """Generate brief summary of delta content. - - Returns: - Human-readable summary string - """ - num_messages = len(messages) - num_addressed = len(addressed) - num_claims = len(claims) - - parts = [f"{num_messages} new messages"] - - if num_addressed > 0: - parts.append(f"{num_addressed} concerns addressed") - - if num_claims > 0: - parts.append(f"{num_claims} completion claims") - - return ", ".join(parts) - - -class TurnStateManager: - """Manages turn state persistence and operations with delta analysis support. - - Handles loading, saving, and incrementing turn state with - atomic writes, fail-open error handling, and enhanced evidence tracking. - - Attributes: - project_root: Project root directory - session_id: Current session identifier - log: Optional logging callback - _previous_turn_count: Track previous turn count for monotonicity validation - _diagnostic_logger: Diagnostic logger for instrumentation - """ - - def __init__( - self, - project_root: Path, - session_id: str, - log: Callable[[str], None] | None = None, - ): - """Initialize turn state manager. - - Args: - project_root: Project root directory - session_id: Current session identifier - log: Optional callback for logging messages - """ - self.project_root = project_root - self.session_id = session_id - self.log = log or (lambda msg: None) - self._previous_turn_count: int | None = None - - # Import DiagnosticLogger - try both relative and absolute imports - self._diagnostic_logger = None - try: - # Try relative import first (when running as module) - from .power_steering_diagnostics import DiagnosticLogger - - self._diagnostic_logger = DiagnosticLogger(project_root, session_id, log) - except (ImportError, ValueError): - try: - # Try absolute import (when running tests or standalone) - from power_steering_diagnostics import DiagnosticLogger - - self._diagnostic_logger = DiagnosticLogger(project_root, session_id, log) - except ImportError as e: - # Fail-open: Continue without diagnostic logging - self.log(f"Warning: Could not load diagnostic logger: {e}") - - def get_state_file_path(self) -> Path: - """Get path to the state file for this session. - - Returns: - Path to turn_state.json file - """ - return ( - self.project_root - / ".claude" - / "runtime" - / "power-steering" - / self.session_id - / "turn_state.json" - ) - - def load_state(self) -> PowerSteeringTurnState: - """Load state from disk with validation. - - Fail-open: Returns empty state on any error. - Validates state integrity and monotonicity. - - Returns: - PowerSteeringTurnState instance - """ - state_file = self.get_state_file_path() - - try: - if state_file.exists(): - data = json.loads(state_file.read_text()) - state = PowerSteeringTurnState.from_dict(data, self.session_id) - - # Validate state integrity - self._validate_state(state) - - # Track for monotonicity checking - self._previous_turn_count = state.turn_count - - # Diagnostic logging - if self._diagnostic_logger: - self._diagnostic_logger.log_state_read(state.turn_count) - - self.log(f"Loaded turn state from {state_file}") - return state - except (json.JSONDecodeError, OSError, KeyError) as e: - self.log(f"Failed to load state (fail-open): {e}") - - # Return empty state - return PowerSteeringTurnState(session_id=self.session_id) - - def _validate_state(self, state: PowerSteeringTurnState) -> None: - """Validate state integrity (Phase 2: Defensive Validation). - - Checks: - - Counter is non-negative - - Counter is within reasonable bounds (< 1000) - - Last message not empty (if blocks exist) - - Fail-open: Logs warnings but doesn't raise exceptions. - - Args: - state: State to validate - """ - try: - # Check counter bounds - if state.turn_count < 0: - self.log(f"WARNING: Invalid turn_count: {state.turn_count} (negative)") - - if state.turn_count >= 1000: - self.log(f"WARNING: Suspicious turn_count: {state.turn_count} (>= 1000)") - - # Check block history consistency - if state.consecutive_blocks > 0 and not state.block_history: - self.log("WARNING: consecutive_blocks > 0 but block_history is empty") - - except Exception as e: - # Fail-open: Don't raise, just log - self.log(f"State validation warning: {e}") - - def save_state( - self, - state: PowerSteeringTurnState, - previous_state: PowerSteeringTurnState | None = None, - ) -> None: - """Save state to disk using atomic write pattern with enhancements. - - Enhancements (Phase 2 & 3): - - Monotonicity check: Ensures counter never decreases - - fsync: Force write to disk - - Verification read: Read back temp file to verify - - Retry logic: 3 attempts with exponential backoff - - Diagnostic logging: Track all write attempts - - Fail-open: Logs error but does not raise on failure. - - Args: - state: State to save - previous_state: Previous state for monotonicity check (optional) - """ - # Phase 2: Monotonicity validation (WARN only, don't block - fail-open) - if previous_state is not None: - if state.turn_count < previous_state.turn_count: - error_msg = ( - f"Monotonicity violation: turn_count decreased from " - f"{previous_state.turn_count} to {state.turn_count}" - ) - self.log(f"WARNING: {error_msg} (continuing with fail-open)") - - # Log diagnostic event - if self._diagnostic_logger: - self._diagnostic_logger.log_monotonicity_violation( - previous_state.turn_count, - state.turn_count, - ) - - # CHANGED: Warn but don't raise (fail-open principle) - # Continue with save operation - - # Also check against tracked previous value - if self._previous_turn_count is not None: - if state.turn_count < self._previous_turn_count: - error_msg = ( - f"Monotonicity regression detected: counter went from " - f"{self._previous_turn_count} to {state.turn_count}" - ) - self.log(f"WARNING: {error_msg} (continuing with fail-open)") - - if self._diagnostic_logger: - self._diagnostic_logger.log_monotonicity_violation( - self._previous_turn_count, - state.turn_count, - ) - - # CHANGED: Warn but don't raise (fail-open principle) - # Continue with save operation - - # Phase 3: Atomic write with retry and verification - state_file = self.get_state_file_path() - max_retries = 3 - retry_delay = 0.1 # Start with 100ms - - for attempt in range(1, max_retries + 1): - try: - # Diagnostic logging: Write attempt - if self._diagnostic_logger: - self._diagnostic_logger.log_state_write_attempt( - state.turn_count, - attempt, - ) - - # Ensure parent directory exists - state_file.parent.mkdir(parents=True, exist_ok=True) - - # Atomic write: temp file + fsync + rename - fd, temp_path = tempfile.mkstemp( - dir=state_file.parent, - prefix="turn_state_", - suffix=".tmp", - ) - - try: - # Write to temp file - with os.fdopen(fd, "w") as f: - state_data = state.to_dict() - json.dump(state_data, f, indent=2) - - # Phase 3: fsync to ensure data is written to disk - f.flush() - os.fsync(f.fileno()) - - # Phase 3: Verification read from temp file - temp_path_obj = Path(temp_path) - if not temp_path_obj.exists(): - raise OSError("Temp file doesn't exist after write") - - # Verify content matches - verified_data = json.loads(temp_path_obj.read_text()) - if verified_data.get("turn_count") != state.turn_count: - # Diagnostic logging: Verification failed - if self._diagnostic_logger: - self._diagnostic_logger.log_verification_failed( - state.turn_count, - verified_data.get("turn_count", -1), - ) - raise OSError("Verification failed: turn_count mismatch") - - # Atomic rename - os.rename(temp_path, state_file) - - # Phase 3: Verify final path exists - if not state_file.exists(): - raise OSError("State file doesn't exist after rename") - - # Verify final file content - final_data = json.loads(state_file.read_text()) - if final_data.get("turn_count") != state.turn_count: - if self._diagnostic_logger: - self._diagnostic_logger.log_verification_failed( - state.turn_count, - final_data.get("turn_count", -1), - ) - raise OSError("Final verification failed: turn_count mismatch") - - # Success! Update tracked value - self._previous_turn_count = state.turn_count - - # Diagnostic logging: Success - if self._diagnostic_logger: - self._diagnostic_logger.log_state_write_success( - state.turn_count, - attempt, - ) - - self.log(f"Saved turn state to {state_file} (attempt {attempt})") - return # Success - exit retry loop - - except Exception as e: - # Clean up temp file on error - try: - if Path(temp_path).exists(): - os.unlink(temp_path) - except OSError: - pass - raise e - - except OSError as e: - error_msg = str(e) - self.log(f"Save attempt {attempt}/{max_retries} failed: {error_msg}") - - # Diagnostic logging: Write failure - if self._diagnostic_logger: - self._diagnostic_logger.log_state_write_failure( - state.turn_count, - attempt, - error_msg, - ) - - # If this was the last attempt, give up (fail-open) - if attempt >= max_retries: - self.log(f"Failed to save state after {max_retries} attempts (fail-open)") - return - - # Exponential backoff before retry - import time - - time.sleep(retry_delay) - retry_delay *= 2 - - def increment_turn(self, state: PowerSteeringTurnState) -> PowerSteeringTurnState: - """Increment turn count and return updated state. - - Args: - state: Current state - - Returns: - Updated state with incremented turn count - """ - state.turn_count += 1 - self.log(f"Turn count incremented to {state.turn_count}") - return state - - def record_block_with_evidence( - self, - state: PowerSteeringTurnState, - failed_evidence: list[FailureEvidence], - transcript_length: int, - user_claims: list[str] | None = None, - ) -> PowerSteeringTurnState: - """Record a power-steering block with full evidence. - - This is the enhanced block recording that stores detailed failure - information for turn-aware analysis. - - Args: - state: Current state - failed_evidence: List of FailureEvidence objects (not just IDs) - transcript_length: Current transcript length - user_claims: Claims detected from user/agent (e.g., "I've completed X") - - Returns: - Updated state with new block snapshot - """ - now = datetime.now().isoformat() - - # Increment consecutive blocks - state.consecutive_blocks += 1 - - # Record timestamps - if state.first_block_timestamp is None: - state.first_block_timestamp = now - state.last_block_timestamp = now - - # Create block snapshot - snapshot = BlockSnapshot( - block_number=state.consecutive_blocks, - timestamp=now, - transcript_index=state.last_analyzed_transcript_index, - transcript_length=transcript_length, - failed_evidence=failed_evidence, - user_claims_detected=user_claims or [], - ) - - # Update state - state.block_history.append(snapshot) - state.last_analyzed_transcript_index = transcript_length - - self.log( - f"Recorded block #{state.consecutive_blocks}: " - f"{len(failed_evidence)} failures with evidence, " - f"transcript at index {transcript_length}" - ) - - return state - - def record_approval(self, state: PowerSteeringTurnState) -> PowerSteeringTurnState: - """Record a power-steering approval (reset consecutive blocks). - - Args: - state: Current state - - Returns: - Updated state with blocks reset - """ - state.consecutive_blocks = 0 - state.first_block_timestamp = None - state.last_block_timestamp = None - state.block_history = [] - state.last_analyzed_transcript_index = 0 - - self.log("Recorded approval - reset block state") - return state - - def get_delta_transcript_range( - self, - state: PowerSteeringTurnState, - current_transcript_length: int, - ) -> tuple[int, int]: - """Get the range of transcript to analyze (delta since last block). - - Args: - state: Current state - current_transcript_length: Current transcript length - - Returns: - Tuple of (start_index, end_index) for delta analysis - """ - start_index = state.last_analyzed_transcript_index - end_index = current_transcript_length - - self.log( - f"Delta transcript range: [{start_index}:{end_index}] " - f"(analyzing {end_index - start_index} new messages)" - ) - - return start_index, end_index - - def should_auto_approve(self, state: PowerSteeringTurnState) -> tuple[bool, str, str | None]: - """Determine if auto-approval should trigger with escalating context. - - Auto-approval triggers purely on consecutive blocks count. - This is a fail-open design - after N blocks, we let the user go - regardless of whether concerns were detected as addressed. - - Args: - state: Current state - - Returns: - Tuple of (should_approve, reason, escalation_message) - escalation_message is non-None if we're getting close to threshold - """ - blocks = state.consecutive_blocks - threshold = PowerSteeringTurnState.MAX_CONSECUTIVE_BLOCKS - - # Not at threshold yet - if blocks < threshold: - # Generate escalation warning if we're past halfway - escalation_msg = None - if blocks >= threshold // 2: - remaining = threshold - blocks - escalation_msg = ( - f"Warning: {blocks}/{threshold} blocks used. " - f"Auto-approval in {remaining} more blocks if issues persist." - ) - - return ( - False, - f"{blocks}/{threshold} consecutive blocks", - escalation_msg, - ) - - # Threshold met - auto-approve unconditionally (fail-open design) - return ( - True, - f"Auto-approve: {blocks} blocks reached threshold ({threshold})", - None, - ) - - def get_diagnostics(self) -> dict: - """Get diagnostic information about current state (Phase 4: User Visibility). - - Analyzes diagnostic log to detect infinite loop patterns. - - Returns: - Dictionary with diagnostic information - """ - diagnostics = { - "stall_detected": False, - "stall_value": None, - "stall_count": 0, - "oscillation_detected": False, - "oscillation_values": [], - "write_failure_rate": 0.0, - "high_failure_rate_alert": False, - } - - try: - if self._diagnostic_logger: - log_file = self._diagnostic_logger.get_log_file_path() - - # Use detect_infinite_loop from diagnostics module - try: - from .power_steering_diagnostics import detect_infinite_loop - except (ImportError, ValueError): - from power_steering_diagnostics import detect_infinite_loop - - result = detect_infinite_loop(log_file) - - diagnostics.update( - { - "stall_detected": result.stall_detected, - "stall_value": result.stall_value, - "stall_count": result.stall_count, - "oscillation_detected": result.oscillation_detected, - "oscillation_values": result.oscillation_values, - "write_failure_rate": result.write_failure_rate, - "high_failure_rate_alert": result.high_failure_rate, - } - ) - - except Exception as e: - self.log(f"Failed to get diagnostics: {e}") - - return diagnostics - - def generate_power_steering_message(self, state: PowerSteeringTurnState) -> str: - """Generate power steering message customized based on state (Phase 4: REQ-2). - - Message includes: - - Turn count - - Consecutive blocks count - - Customization based on block history - - Args: - state: Current power steering state - - Returns: - Customized message string - """ - turn_count = state.turn_count - blocks = state.consecutive_blocks - - if blocks == 0: - return f"Turn {turn_count}: Power steering check" - if blocks == 1: - return f"Turn {turn_count}: First power steering block (block {blocks})" - return ( - f"Turn {turn_count}: Power steering block {blocks} - " - f"Issues persist from previous attempts" - ) diff --git a/amplifier-bundle/tools/amplihack/hooks/pre_compact.py b/amplifier-bundle/tools/amplihack/hooks/pre_compact.py deleted file mode 100755 index 7a715e23b..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/pre_compact.py +++ /dev/null @@ -1,191 +0,0 @@ -#!/usr/bin/env python3 -""" -PreCompact Hook - amplihack Style -Automatically exports conversation transcript before context compaction. -Ensures no conversation history is lost when Claude Code compacts context. -""" - -import json -import sys -from datetime import datetime -from pathlib import Path -from typing import Any - -# Clean import setup -sys.path.insert(0, str(Path(__file__).parent)) -sys.path.insert(0, str(Path(__file__).parent.parent)) - -# Import dependencies with clean structure -from context_preservation import ContextPreserver -from hook_processor import HookProcessor - - -class PreCompactHook(HookProcessor): - """Hook processor for pre-compact events.""" - - def __init__(self): - super().__init__("pre_compact") - # Initialize session attributes - self.session_id = self.get_session_id() - self.session_dir = self.log_dir / self.session_id - self.session_dir.mkdir(parents=True, exist_ok=True) - - def process(self, input_data: dict[str, Any]) -> dict[str, Any]: - """Process pre-compact event and export conversation transcript. - - Args: - input_data: Input from Claude Code containing conversation data - - Returns: - Confirmation of export completion - """ - try: - # Get conversation data - conversation = input_data.get("conversation", []) - messages = input_data.get("messages", []) - - # Use either conversation or messages data - conversation_data = conversation if conversation else messages - - self.log( - f"Exporting conversation with {len(conversation_data)} entries before compaction" - ) - - # Create context preserver - preserver = ContextPreserver(self.session_id) - # Override the session_dir to use the hook's session directory - # This ensures all files are saved in the correct location - preserver.session_dir = self.session_dir - - # Extract original request if it exists in the conversation - original_request = None - for entry in conversation_data: - if entry.get("role") == "user" and len(entry.get("content", "")) > 50: - # Found substantial user input - try to extract as original request - try: - original_request = preserver.extract_original_request(entry["content"]) - self.log( - f"Original request extracted from conversation: {original_request.get('target', 'Unknown')}" - ) - break - except Exception as e: - self.log(f"Failed to extract original request: {e}") - - # Export the full conversation transcript - transcript_path = preserver.export_conversation_transcript(conversation_data) - self.log(f"Conversation transcript exported to: {transcript_path}") - - # Also create a copy in the transcripts subdirectory for easy access - transcripts_dir = self.session_dir / "transcripts" - transcripts_dir.mkdir(exist_ok=True) - transcript_copy = ( - transcripts_dir / f"conversation_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md" - ) - - try: - with open(transcript_path) as src, open(transcript_copy, "w") as dst: - dst.write(src.read()) - self.log(f"Transcript copy created in: {transcript_copy}") - except Exception as e: - self.log(f"Failed to create transcript copy: {e}", "WARNING") - - # Save compaction event metadata - compaction_info = { - "timestamp": datetime.now().isoformat(), - "session_id": self.session_id, - "messages_exported": len(conversation_data), - "transcript_path": transcript_path, - "original_request_preserved": original_request is not None, - "compaction_trigger": input_data.get("trigger", "unknown"), - } - - # Save metadata - metadata_file = self.session_dir / "compaction_events.json" - events = [] - if metadata_file.exists(): - try: - with open(metadata_file) as f: - events = json.load(f) - except Exception: - events = [] - - events.append(compaction_info) - - with open(metadata_file, "w") as f: - json.dump(events, f, indent=2) - - # Save metrics - self.save_metric("messages_exported", len(conversation_data)) - self.save_metric("compaction_events", len(events)) - self.save_metric("transcript_exported", True) - - return { - "status": "success", - "message": f"Conversation exported successfully - {len(conversation_data)} messages preserved", - "transcript_path": transcript_path, - "metadata": compaction_info, - } - - except Exception as e: - error_msg = f"Failed to export conversation before compaction: {e}" - self.log(error_msg) - self.save_metric("transcript_exported", False) - - return {"status": "error", "message": error_msg, "error": str(e)} - - def restore_conversation_from_latest(self) -> list[dict[str, Any]]: - """Restore conversation from the latest transcript. - - Returns: - List of conversation messages or empty list if not found - """ - try: - # Find latest session using the log directory - logs_dir = ( - self.log_dir - if hasattr(self, "log_dir") - else (self.project_root / ".claude" / "runtime" / "logs") - ) - - if not logs_dir.exists(): - self.log("No logs directory found") - return [] - - # Find session directories (format: YYYYMMDD_HHMMSS) - import re - - session_dirs = [ - d for d in logs_dir.iterdir() if d.is_dir() and re.match(r"\d{8}_\d{6}", d.name) - ] - - if not session_dirs: - self.log("No session logs found") - return [] - - # Get the latest session - latest_session = sorted(session_dirs)[-1].name - - transcript_file = logs_dir / latest_session / "CONVERSATION_TRANSCRIPT.md" - - if not transcript_file.exists(): - self.log(f"No transcript found for session: {latest_session}") - return [] - - self.log(f"Restored conversation from session: {latest_session}") - return [ - {"source": "transcript", "path": str(transcript_file), "session": latest_session} - ] - - except Exception as e: - self.log(f"Failed to restore conversation: {e}") - return [] - - -def main(): - """Entry point for the pre-compact hook.""" - hook = PreCompactHook() - hook.run() - - -if __name__ == "__main__": - main() diff --git a/amplifier-bundle/tools/amplihack/hooks/pre_tool_use.py b/amplifier-bundle/tools/amplihack/hooks/pre_tool_use.py deleted file mode 100755 index 7798e2374..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/pre_tool_use.py +++ /dev/null @@ -1,491 +0,0 @@ -#!/usr/bin/env python3 -""" -Claude Code hook for pre tool use events. -Prevents dangerous operations like git commit --no-verify -and deletion of the current working directory. -""" - -import os -import re -import shlex -import subprocess -import sys -from pathlib import Path -from typing import Any - -sys.path.insert(0, str(Path(__file__).parent)) -from hook_processor import HookProcessor - -CWD_DELETION_ERROR_MESSAGE = """ -🚫 OPERATION BLOCKED - Working Directory Deletion Prevented - -You attempted to delete a directory that contains your current working directory: - Target: {target} - CWD: {cwd} - -Deleting the CWD would break the current session. If you need to clean up -this directory, first change to a different working directory. - -🔒 This protection cannot be disabled programmatically. -""".strip() - -CWD_RENAME_ERROR_MESSAGE = """ -🚫 OPERATION BLOCKED - Working Directory Rename Prevented - -You attempted to move/rename a directory that contains your current working directory: - Source: {source} - CWD: {cwd} - -Moving or renaming the CWD would break the current session. To rename this directory: - 1. First change to a different working directory (e.g., cd ..) - 2. Then perform the rename operation - 3. Change back into the renamed directory if needed - -🔒 This protection cannot be disabled programmatically. -""".strip() - -# Pattern to detect recursive rm or rmdir commands. -# Catches: rm -rf, rm -r, rm -fr, rm -Rf, rm -r -f, rm --recursive, /bin/rm -rf -_RM_RECURSIVE_RE = re.compile( - r"\brm\s+" - r"(?:" - r"-[a-zA-Z]*[rR][a-zA-Z]*" # combined flags: -rf, -fr, -Rf, etc. - r"|(?:-[a-zA-Z]+\s+)*-[rR]" # separated flags: -f -r, -v -r, etc. - r"|--recursive" # long form - r")", -) -_RMDIR_RE = re.compile(r"\brmdir(?:\s|$)") - -# Pattern to detect mv commands (move/rename). -# Catches: mv, /bin/mv, /usr/bin/mv, optionally prefixed with env assignments, -# sudo (with optional flags), or command. Examples: -# mv src dst -# /bin/mv src dst -# VAR=1 mv src dst -# sudo mv src dst -# sudo -u root /usr/bin/mv src dst -_MV_RE = re.compile( - r"(?:^|[;&|])\s*" # start of command or after separator - r"(?:\w+=\S+\s+)*" # optional env assignments - r"(?:sudo\s+)?" # optional sudo - r"(?:-\w+(?:\s+\S+)?\s+)*" # optional sudo flags (e.g., -u root) - r"(?:command\s+)?" # optional 'command' builtin - r"(?:/(?:usr/)?bin/)?mv\s+" # mv or /bin/mv or /usr/bin/mv -) - -MAIN_BRANCH_ERROR_MESSAGE = """ -⛔ Direct commits to '{branch}' branch are not allowed. - -Please use the feature branch workflow: - 1. Create a feature branch: git checkout -b feature/your-feature-name - 2. Make your commits on the feature branch - 3. Create a Pull Request to merge into {branch} - -This protection cannot be bypassed with --no-verify. -""".strip() - - -class PreToolUseHook(HookProcessor): - """Hook processor for pre tool use events.""" - - def __init__(self): - super().__init__("pre_tool_use") - self.strategy = None - - def process(self, input_data: dict[str, Any]) -> dict[str, Any]: - """Process pre tool use event and block dangerous operations. - - Args: - input_data: Input from Claude Code containing tool use details - - Returns: - Dict with 'block' key set to True if operation should be blocked - """ - tool_use = input_data.get("toolUse", {}) - tool_name = tool_use.get("name", "") - tool_input = tool_use.get("input", {}) - - if tool_name != "Bash": - return {} - - # Detect launcher and select strategy - self.strategy = self._select_strategy() - if self.strategy: - self.log(f"Using strategy: {self.strategy.__class__.__name__}") - strategy_result = self.strategy.handle_pre_tool_use(input_data) - if strategy_result: - self.log("Strategy provided custom pre-tool handling") - return strategy_result - - command = tool_input.get("command", "") - - # Check for CWD deletion before any other checks - cwd_block = self._check_cwd_deletion(command) - if cwd_block: - return cwd_block - - # Check for CWD rename/move - cwd_rename_block = self._check_cwd_rename(command) - if cwd_rename_block: - return cwd_rename_block - - is_git_commit = "git commit" in command - is_git_push = "git push" in command - has_no_verify = "--no-verify" in command - is_git_command = is_git_commit or is_git_push - - if not is_git_command: - return {} - - if is_git_commit: - try: - result = subprocess.run( - ["git", "branch", "--show-current"], - cwd=self.project_root, - capture_output=True, - text=True, - timeout=5, - ) - - if result.returncode == 0: - current_branch = result.stdout.strip() - - if current_branch in ["main", "master"]: - self.log( - f"BLOCKED: Commit to {current_branch} branch detected", - "ERROR", - ) - - return { - "block": True, - "message": MAIN_BRANCH_ERROR_MESSAGE.format(branch=current_branch), - } - else: - self.log( - f"Git branch detection failed (exit {result.returncode}), allowing operation", - "WARNING", - ) - - except subprocess.TimeoutExpired: - self.log( - "Git branch detection timed out after 5s, allowing operation", - "WARNING", - ) - except FileNotFoundError: - self.log( - "Git not found in PATH, allowing operation", - "WARNING", - ) - except Exception as e: - self.log( - f"Git branch detection failed: {e}, allowing operation", - "WARNING", - ) - - if has_no_verify and is_git_command: - self.log("BLOCKED: Dangerous operation detected (--no-verify flag)", "ERROR") - - return { - "block": True, - "message": """ -🚫 OPERATION BLOCKED - -You attempted to use --no-verify which bypasses critical quality checks: -- Code formatting (ruff, prettier) -- Type checking (pyright) -- Secret detection -- Trailing whitespace fixes - -This defeats the purpose of our quality gates. - -✅ Instead, fix the underlying issues: -1. Run: pre-commit run --all-files -2. Fix the violations -3. Commit without --no-verify - -For true emergencies, ask a human to override this protection. - -🔒 This protection cannot be disabled programmatically. -""".strip(), - } - - # Allow all other operations - return {} - - def _check_cwd_deletion(self, command: str) -> dict[str, Any]: - """Check if a command would delete the current working directory. - - Detects rm -r/-rf/-fr and rmdir commands targeting the CWD or a parent. - Returns a block dict if dangerous, empty dict if safe. - """ - # Quick check: does the command contain a recursive rm or rmdir? - has_rm_recursive = _RM_RECURSIVE_RE.search(command) - has_rmdir = _RMDIR_RE.search(command) - - if not has_rm_recursive and not has_rmdir: - return {} - - try: - cwd = Path(os.getcwd()).resolve() - except OSError: - self.log("CWD inaccessible, cannot check deletion safety", "WARNING") - return {} - - # Extract path arguments from rm/rmdir commands in the full command. - # Split on command separators (;, &&, ||) but NOT single pipe | - segments = re.split(r";|&&|\|\|", command) - - for segment in segments: - segment = segment.strip() - if not segment: - continue - - # Check if this segment contains a dangerous rm or rmdir - if _RM_RECURSIVE_RE.search(segment) or _RMDIR_RE.search(segment): - # Extract the path arguments (everything after flags) - paths = self._extract_rm_paths(segment) - for p in paths: - try: - target = Path(p).resolve() - except (OSError, ValueError): - continue - - # Block if CWD is equal to or a child of the target - try: - cwd.relative_to(target) - self.log( - f"BLOCKED: Directory deletion would destroy CWD. " - f"Target={target}, CWD={cwd}", - "ERROR", - ) - return { - "block": True, - "message": CWD_DELETION_ERROR_MESSAGE.format(target=target, cwd=cwd), - } - except ValueError: - # CWD is not under target - safe - continue - - return {} - - def _check_cwd_rename(self, command: str) -> dict[str, Any]: - """Check if a command would rename/move the current working directory. - - Detects mv commands where the source is the CWD or a parent of CWD. - Returns a block dict if dangerous, empty dict if safe. - """ - # Quick check: does the command contain an mv command? - if not _MV_RE.search(command): - return {} - - try: - cwd = Path(os.getcwd()).resolve() - except OSError: - self.log("CWD inaccessible, cannot check rename safety", "WARNING") - return {} - - # Extract path arguments from mv commands in the full command. - # Split on command separators (;, &&, ||) but NOT single pipe | - segments = re.split(r";|&&|\|\|", command) - - for segment in segments: - segment = segment.strip() - if not segment: - continue - - # Check if this segment contains an mv command - if _MV_RE.search(segment): - # Extract all source paths (mv supports multiple sources) - source_paths = self._extract_mv_source_paths(segment) - if not source_paths: - continue - - for source_path in source_paths: - # Check for glob characters - if present, be conservative - if any(c in source_path for c in "*?["): - # Extract the non-glob prefix (e.g., /tmp/par* -> /tmp/par) - prefix = source_path.split("*")[0].split("?")[0].split("[")[0] - if prefix: - try: - # Get the directory containing the glob and the basename prefix - prefix_path = Path(prefix) - glob_dir = prefix_path.parent.resolve() - basename_prefix = prefix_path.name # e.g., "par" from "/tmp/par" - - # Check if CWD's path contains a component that: - # 1. Is in the same directory as the glob - # 2. Starts with the basename prefix - for parent in [cwd] + list(cwd.parents): - if parent.parent == glob_dir: - # This CWD component is in the glob directory - if parent.name.startswith(basename_prefix): - # The glob could match this path component - self.log( - f"BLOCKED: mv with glob pattern might affect CWD. " - f"Pattern={source_path}, CWD={cwd}", - "ERROR", - ) - return { - "block": True, - "message": CWD_RENAME_ERROR_MESSAGE.format( - source=source_path, cwd=cwd - ), - } - except (OSError, ValueError): - pass - continue - - try: - source = Path(source_path).resolve() - except (OSError, ValueError): - continue - - # Block if CWD is equal to or a child of the source - try: - cwd.relative_to(source) - self.log( - f"BLOCKED: Directory rename would invalidate CWD. " - f"Source={source}, CWD={cwd}", - "ERROR", - ) - return { - "block": True, - "message": CWD_RENAME_ERROR_MESSAGE.format(source=source, cwd=cwd), - } - except ValueError: - # CWD is not under source - safe for this source - continue - - return {} - - @staticmethod - def _extract_rm_paths(segment: str) -> list[str]: - """Extract path arguments from an rm or rmdir command segment. - - Uses shlex.split() to handle quoted paths properly. - Skips flags (tokens starting with -) and the command name itself. - """ - try: - tokens = shlex.split(segment) - except ValueError: - # Malformed shell syntax - fall back to simple split - tokens = segment.split() - - paths: list[str] = [] - skip_command = True - - for token in tokens: - # Skip tokens before the rm/rmdir command name - if skip_command: - if token in ("rm", "rmdir") or token.endswith("/rm") or token.endswith("/rmdir"): - skip_command = False - continue - - # Skip flags - if token.startswith("-"): - continue - - # Everything else is a path argument - paths.append(token) - - return paths - - @staticmethod - def _extract_mv_source_paths(segment: str) -> list[str]: - """Extract all source paths from an mv command segment. - - Uses shlex.split() to handle quoted paths properly. - Supports both standard and -t/--target-directory forms: - - mv src1 src2 dest/ - - mv -t dest/ src1 src2 - """ - try: - tokens = shlex.split(segment) - except ValueError: - # Malformed shell syntax - skip - return [] - - # Find the mv command position (skip env vars, sudo, command prefix) - mv_index = None - for i, token in enumerate(tokens): - if token == "mv" or token.endswith("/mv"): - mv_index = i - break - - if mv_index is None: - return [] - - args = tokens[mv_index + 1 :] - non_flag_args: list[str] = [] - target_dir_mode = False - i = 0 - - while i < len(args): - arg = args[i] - # End of options marker - if arg == "--": - non_flag_args.extend(args[i + 1 :]) - break - # Option handling - if arg.startswith("-") and arg != "-": - # Handle -t/--target-directory (takes next arg as target dir) - if arg in ("-t", "--target-directory"): - target_dir_mode = True - # Skip the directory argument - if i + 1 < len(args): - i += 2 - continue - return [] # Malformed - # Handle --target-directory=DIR - if arg.startswith("--target-directory="): - target_dir_mode = True - i += 1 - continue - # Other flags (skip) - i += 1 - continue - # Non-option argument - non_flag_args.append(arg) - i += 1 - - if not non_flag_args: - return [] - - # If target dir specified via -t, all remaining args are sources - if target_dir_mode: - return non_flag_args - - # Standard form: mv src1 src2 ... dest - all but last are sources - if len(non_flag_args) >= 2: - return non_flag_args[:-1] - - # Single arg - treat conservatively as potential source - return non_flag_args - - def _select_strategy(self): - """Detect launcher and select appropriate strategy.""" - try: - sys.path.insert(0, str(self.project_root / "src" / "amplihack")) - from amplihack.context.adaptive.detector import LauncherDetector - from amplihack.context.adaptive.strategies import ClaudeStrategy, CopilotStrategy - - detector = LauncherDetector(self.project_root) - launcher_type = detector.detect() - - if launcher_type == "copilot": - return CopilotStrategy(self.project_root, self.log) - return ClaudeStrategy(self.project_root, self.log) - - except ImportError as e: - self.log(f"Adaptive strategy not available: {e}", "DEBUG") - return None - - -def main(): - """Entry point for the pre tool use hook.""" - hook = PreToolUseHook() - hook.run() - - -if __name__ == "__main__": - main() diff --git a/amplifier-bundle/tools/amplihack/hooks/precommit_installer.py b/amplifier-bundle/tools/amplihack/hooks/precommit_installer.py deleted file mode 100755 index 2cdb70a79..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/precommit_installer.py +++ /dev/null @@ -1,349 +0,0 @@ -#!/usr/bin/env python3 -""" -Claude Code startup hook for pre-commit installation. - -Philosophy: -- Automatically install pre-commit hooks when config exists -- Fail gracefully - never break session start -- Simple and focused - one responsibility -- Respect user preferences via environment variables -""" - -import os -import subprocess -import sys -from pathlib import Path -from typing import Any - -# Clean import structure -sys.path.insert(0, str(Path(__file__).parent)) -from hook_processor import HookProcessor - - -class PrecommitInstallerHook(HookProcessor): - """Hook processor for installing pre-commit hooks at session start. - - This hook automatically installs pre-commit hooks when: - - A git repository is detected - - .pre-commit-config.yaml exists - - pre-commit is available - - Hooks are not already installed - - Not disabled via environment variable - - Environment Variables: - AMPLIHACK_AUTO_PRECOMMIT: Set to "0", "false", "no", or "off" to disable - """ - - def __init__(self): - super().__init__("precommit_installer") - - def process(self, input_data: dict[str, Any]) -> dict[str, Any]: - """Process session start event and install pre-commit if needed. - - Args: - input_data: Input from Claude Code - - Returns: - Empty dict (no context to add) - """ - try: - # Check if disabled via environment variable - if self._is_env_disabled(): - self.log("Pre-commit auto-install disabled via environment variable") - self.save_metric("precommit_env_disabled", True) - return {} - - # Check if we're in a git repo - if not self._is_git_repo(): - self.log("Not a git repository - skipping pre-commit check") - self.save_metric("precommit_not_git_repo", True) - return {} - - # Check if pre-commit config exists - config_file = self.project_root / ".pre-commit-config.yaml" - if not config_file.exists(): - self.log("No .pre-commit-config.yaml found - skipping") - self.save_metric("precommit_no_config", True) - return {} - - self.log("Found pre-commit config, checking installation...") - - # Check if pre-commit is available - precommit_info = self._is_precommit_available() - if not precommit_info["available"]: - self.log("⚠️ pre-commit not installed - skipping hook installation", "WARNING") - print( - "\n⚠️ pre-commit is not installed but .pre-commit-config.yaml exists", - file=sys.stderr, - ) - print(" Install with: pip install pre-commit\n", file=sys.stderr) - self.save_metric("precommit_available", False) - return {} - - # Log pre-commit version - version = precommit_info.get("version", "unknown") - self.log(f"pre-commit available: {version}") - self.save_metric("precommit_available", True) - self.save_metric("precommit_version", version) - - # Check if hooks are installed - hooks_status = self._are_hooks_installed() - if hooks_status["installed"]: - self.log("✅ pre-commit hooks already installed") - self.save_metric("precommit_already_installed", True) - return {} - - if hooks_status.get("corrupted"): - self.log("⚠️ Existing hook file appears corrupted, will reinstall", "WARNING") - self.save_metric("precommit_corrupted", True) - - # Install the hooks - self.log("Installing pre-commit hooks...") - install_result = self._install_hooks() - - if install_result["success"]: - self.log("✅ Successfully installed pre-commit hooks") - print("\n✅ Installed pre-commit hooks\n", file=sys.stderr) - self.save_metric("precommit_installed", True) - else: - error_msg = install_result.get("error", "Unknown error") - self.log(f"⚠️ Failed to install pre-commit hooks: {error_msg}", "WARNING") - print( - f"\n⚠️ Failed to install pre-commit hooks: {error_msg}", - file=sys.stderr, - ) - print( - " You may need to run 'pre-commit install' manually\n", - file=sys.stderr, - ) - self.save_metric("precommit_installed", False) - self.save_metric("precommit_install_error", error_msg) - - except Exception as e: - # Fail gracefully - don't break session start - self.log(f"Pre-commit check failed: {e}", "WARNING") - self.save_metric("precommit_check_error", str(e)) - - return {} - - def _is_env_disabled(self) -> bool: - """Check if pre-commit auto-install is disabled via environment variable. - - Returns: - True if disabled via AMPLIHACK_AUTO_PRECOMMIT environment variable - - Environment variable values that disable: - - "0" - - "false" - - "no" - - "off" - """ - env_value = os.environ.get("AMPLIHACK_AUTO_PRECOMMIT", "").lower() - return env_value in ("0", "false", "no", "off") - - def _is_git_repo(self) -> bool: - """Check if current directory is a git repository. - - Returns: - True if .git directory exists and is a directory - """ - git_dir = self.project_root / ".git" - return git_dir.exists() and git_dir.is_dir() - - def _is_precommit_available(self) -> dict[str, Any]: - """Check if pre-commit command is available and get version info. - - Returns: - Dictionary with: - - available (bool): Whether pre-commit is available - - version (str): Version string if available - - error (str): Error message if not available - """ - try: - result = subprocess.run( - ["pre-commit", "--version"], - capture_output=True, - text=True, - timeout=5, - cwd=self.project_root, - ) - - if result.returncode == 0: - # Extract version from output like "pre-commit 3.5.0" - version = result.stdout.strip() - return { - "available": True, - "version": version, - } - return { - "available": False, - "error": f"pre-commit --version returned {result.returncode}", - } - - except FileNotFoundError: - return { - "available": False, - "error": "pre-commit command not found in PATH", - } - except subprocess.TimeoutExpired: - return { - "available": False, - "error": "pre-commit --version timed out after 5 seconds", - } - except OSError as e: - return { - "available": False, - "error": f"OS error checking pre-commit: {e}", - } - except Exception as e: - return { - "available": False, - "error": f"Unexpected error checking pre-commit: {e}", - } - - def _are_hooks_installed(self) -> dict[str, Any]: - """Check if pre-commit hooks are already installed in .git/hooks. - - Returns: - Dictionary with: - - installed (bool): Whether hooks are installed - - corrupted (bool): Whether existing hook appears corrupted - - error (str): Error message if check failed - """ - hook_file = self.project_root / ".git" / "hooks" / "pre-commit" - - if not hook_file.exists(): - return {"installed": False} - - # Check if it's a pre-commit managed hook - try: - content = hook_file.read_text() - - # Real pre-commit hooks contain specific markers: - # 1. "#!/usr/bin/env python" or similar python shebang - # 2. "import pre_commit" or "from pre_commit" (or with hyphen in comments) - # 3. Or "INSTALL_PYTHON" (pre-commit marker) - content_lower = content.lower() - - # Check for pre-commit import patterns (both _ and - versions) - has_precommit_import = ( - "import pre_commit" in content_lower - or "from pre_commit" in content_lower - or "import pre-commit" in content_lower - or "from pre-commit" in content_lower - or "install_python" in content_lower - ) - - is_precommit_hook = has_precommit_import and "#!/usr/bin/env" in content - - if not is_precommit_hook: - # Hook file exists but doesn't look like pre-commit - return { - "installed": False, - "corrupted": True, - "error": "Hook file exists but doesn't appear to be pre-commit managed", - } - - # Check for minimal expected content (avoid false positives) - if len(content.strip()) < 50: - return { - "installed": False, - "corrupted": True, - "error": "Hook file too small, may be corrupted", - } - - return {"installed": True} - - except PermissionError: - return { - "installed": False, - "error": "Permission denied reading hook file", - } - except UnicodeDecodeError: - return { - "installed": False, - "corrupted": True, - "error": "Hook file contains invalid text encoding", - } - except Exception as e: - return { - "installed": False, - "error": f"Error reading hook file: {e}", - } - - def _install_hooks(self) -> dict[str, Any]: - """Install pre-commit hooks with comprehensive error handling. - - Returns: - Dictionary with: - - success (bool): Whether installation succeeded - - error (str): Error message if failed - - stderr (str): stderr output for diagnostics - """ - try: - result = subprocess.run( - ["pre-commit", "install"], - capture_output=True, - text=True, - timeout=30, - cwd=self.project_root, - ) - - if result.returncode == 0: - return {"success": True} - # Diagnose common failure modes - stderr = result.stderr.lower() - - if "permission denied" in stderr: - error = "Permission denied - check .git/hooks directory permissions" - elif "network" in stderr or "connection" in stderr: - error = "Network error - check internet connection for hook downloads" - elif "not a git repository" in stderr: - error = "Not a git repository or .git directory corrupted" - elif "yaml" in stderr or "config" in stderr: - error = "Invalid .pre-commit-config.yaml file" - else: - error = f"pre-commit install failed (exit {result.returncode})" - - return { - "success": False, - "error": error, - "stderr": result.stderr, - } - - except subprocess.TimeoutExpired: - return { - "success": False, - "error": "Installation timed out after 30 seconds", - } - except FileNotFoundError: - return { - "success": False, - "error": "pre-commit command not found", - } - except PermissionError as e: - return { - "success": False, - "error": f"Permission error: {e}", - } - except OSError as e: - return { - "success": False, - "error": f"OS error: {e}", - } - except Exception as e: - self.log(f"Unexpected error installing hooks: {e}", "ERROR") - return { - "success": False, - "error": f"Unexpected error: {e}", - } - - -def main(): - """Entry point for the pre-commit installer hook.""" - hook = PrecommitInstallerHook() - hook.run() - - -if __name__ == "__main__": - main() diff --git a/amplifier-bundle/tools/amplihack/hooks/session_start.py b/amplifier-bundle/tools/amplihack/hooks/session_start.py deleted file mode 100755 index 996e986f9..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/session_start.py +++ /dev/null @@ -1,564 +0,0 @@ -#!/usr/bin/env python3 -""" -Claude Code hook for session start. -Uses unified HookProcessor for common functionality. -""" - -# Import the base processor -import sys -from pathlib import Path -from typing import Any - -# Clean import structure -sys.path.insert(0, str(Path(__file__).parent)) -from hook_processor import HookProcessor - -# Clean imports through package structure -sys.path.insert(0, str(Path(__file__).parent.parent)) -try: - from context_preservation import ContextPreserver - from paths import get_project_root - from settings_migrator import migrate_global_hooks - - from amplihack.context.adaptive.detector import LauncherDetector - from amplihack.context.adaptive.strategies import ClaudeStrategy, CopilotStrategy - from amplihack.utils.paths import FrameworkPathResolver -except ImportError: - # Fallback imports for standalone execution - get_project_root = None - ContextPreserver = None - FrameworkPathResolver = None - migrate_global_hooks = None - LauncherDetector = None - ClaudeStrategy = None - CopilotStrategy = None - - -class SessionStartHook(HookProcessor): - """Hook processor for session start events.""" - - def __init__(self): - super().__init__("session_start") - - def process(self, input_data: dict[str, Any]) -> dict[str, Any]: - """Process session start event. - - Checks performed: - 1. Version mismatch detection and auto-update - 2. Global hook migration (prevents duplicate hook execution) - 3. Original request capture for context preservation - - Args: - input_data: Input from Claude Code - - Returns: - Additional context to add to the session - """ - # Check for version mismatch FIRST (before any heavy operations) - self._check_version_mismatch() - - # NEW: Check for global hook duplication and migrate - self._migrate_global_hooks() - - # Detect launcher and select strategy - strategy = self._select_strategy() - self.log(f"Using strategy: {strategy.__class__.__name__}") - - # Extract prompt - prompt = input_data.get("prompt", "") - self.log(f"Prompt length: {len(prompt)}") - - # Save metric - self.save_metric("prompt_length", len(prompt)) - - # Capture original request for substantial prompts - original_request_context = "" - original_request_captured = False - - # Simple check for substantial requests - substantial_keywords = [ - "implement", - "create", - "build", - "add", - "fix", - "update", - "all", - "every", - "each", - "complete", - "comprehensive", - ] - is_substantial = len(prompt) > 20 or any( - word in prompt.lower() for word in substantial_keywords - ) - - if ContextPreserver and is_substantial: - try: - # Create context preserver with current session ID - session_id = self.get_session_id() - preserver = ContextPreserver(session_id) - - # Extract and save original request - original_request = preserver.extract_original_request(prompt) - - # Simple verification and context formatting - session_dir = self.project_root / ".claude" / "runtime" / "logs" / session_id - original_request_captured = (session_dir / "ORIGINAL_REQUEST.md").exists() - - if original_request_captured: - self.log( - f"✅ Original request captured: {original_request.get('target', 'Unknown')}" - ) - original_request_context = preserver.format_agent_context(original_request) - else: - self.log("⚠️ Original request extraction failed", "WARNING") - - self.save_metric("original_request_captured", original_request_captured) - - except Exception as e: - self.log(f"Failed to capture original request: {e}", "ERROR") - self.save_metric("original_request_captured", False) - - # UVX staging if available - try: - from amplihack.utils.uvx_staging import is_uvx_deployment, stage_uvx_framework - - if is_uvx_deployment(): - staged = stage_uvx_framework() - self.save_metric("uvx_staging_success", staged) - except ImportError: - pass - - # Settings.json initialization/merge with UVX template - # Ensures statusLine and other critical configurations are present - try: - from amplihack.utils.uvx_settings_manager import UVXSettingsManager - - settings_path = self.project_root / ".claude" / "settings.json" - manager = UVXSettingsManager() - - # Check if settings need updating (empty, missing statusLine, etc.) - if manager.should_use_uvx_template(settings_path): - success = manager.create_uvx_settings(settings_path, preserve_existing=True) - if success: - self.log("✅ Settings.json updated with UVX template (includes statusLine)") - self.save_metric("settings_updated", True) - else: - self.log("⚠️ Failed to update settings.json with template", "WARNING") - self.save_metric("settings_updated", False) - else: - self.log("Settings.json already complete") - self.save_metric("settings_updated", False) - except ImportError as e: - self.log(f"UVXSettingsManager not available: {e}", "WARNING") - self.save_metric("settings_updated", False) - except Exception as e: - # Fail gracefully - don't break session start - self.log(f"Settings merge failed (non-critical): {e}", "WARNING") - self.save_metric("settings_update_error", True) - - # Neo4j has been removed from this project (Week 7 cleanup) - # Memory functionality now uses Kuzu backend exclusively - self.log("Using Kuzu memory backend (Neo4j removed)") - self.save_metric("kuzu_backend", True) - - # Show auto-routing status to the user (visible in terminal via stderr) - self._show_auto_routing_status() - - # Build context if needed - context_parts = [] - - # Add project context (from PROJECT.md if available, otherwise generic) - context_parts.append("## Project Context") - project_md = self.project_root / ".claude" / "context" / "PROJECT.md" - if project_md.exists(): - try: - project_content = project_md.read_text().strip() - # Extract the first non-empty, non-header line as a summary - for line in project_content.splitlines(): - stripped = line.strip() - if stripped and not stripped.startswith("#") and not stripped.startswith("---"): - context_parts.append(stripped) - break - else: - context_parts.append("Project context loaded from PROJECT.md.") - except Exception: - context_parts.append("Project context available in .claude/context/PROJECT.md.") - else: - context_parts.append("Project context available in CLAUDE.md or .claude/context/PROJECT.md.") - - # Check for recent discoveries from memory - context_parts.append("\n## Recent Learnings") - try: - from amplihack.memory.discoveries import get_recent_discoveries - - recent_discoveries = get_recent_discoveries(days=30, limit=5) - if recent_discoveries: - context_parts.append( - f"Found {len(recent_discoveries)} recent discoveries in memory:" - ) - for disc in recent_discoveries: - summary = disc.get("summary", "No summary") - category = disc.get("category", "uncategorized") - context_parts.append(f"- [{category}] {summary}") - else: - context_parts.append("Check .claude/context/DISCOVERIES.md for recent insights.") - except ImportError: - # Fallback if memory module not available - context_parts.append("Check .claude/context/DISCOVERIES.md for recent insights.") - - # Simplified preference file resolution - preferences_file = ( - FrameworkPathResolver.resolve_preferences_file() - if FrameworkPathResolver - else self.project_root / ".claude" / "context" / "USER_PREFERENCES.md" - ) - - if preferences_file and preferences_file.exists(): - try: - with open(preferences_file) as f: - full_prefs_content = f.read() - self.log(f"Successfully read preferences from: {preferences_file}") - - # Use strategy to inject preferences (launcher-specific format) - if strategy: - prefs_context = strategy.inject_context(full_prefs_content) - context_parts.append(prefs_context) - self.log(f"Injected preferences using {strategy.__class__.__name__}") - else: - # Fallback to default injection - context_parts.append("\n## 🎯 USER PREFERENCES (MANDATORY - MUST FOLLOW)") - context_parts.append( - "\nApply these preferences to all responses. These preferences are READ-ONLY except when using /amplihack:customize command.\n" - ) - context_parts.append( - "\n💡 **Preference Management**: Use /amplihack:customize to view or modify preferences.\n" - ) - context_parts.append(full_prefs_content) - self.log("Injected full USER_PREFERENCES.md content into session (fallback)") - - except Exception as e: - self.log(f"Could not read preferences: {e}", "WARNING") - # Fail silently - don't break session start - - # Add workflow information at startup with UVX support - context_parts.append("\n## 📝 Default Workflow") - context_parts.append("The multi-step workflow is automatically followed by `/ultrathink`") - - # Use FrameworkPathResolver for workflow path - workflow_file = None - if FrameworkPathResolver: - workflow_file = FrameworkPathResolver.resolve_workflow_file() - - if workflow_file: - context_parts.append(f"• To view the workflow: Read {workflow_file}") - context_parts.append("• To customize: Edit the workflow file directly") - else: - context_parts.append( - "• To view the workflow: Use FrameworkPathResolver.resolve_workflow_file() (UVX-compatible)" - ) - context_parts.append("• To customize: Edit the workflow file directly") - context_parts.append( - "• Steps include: Requirements → Issue → Branch → Design → Implement → Review → Merge" - ) - - # Add verbosity instructions - context_parts.append("\n## 🎤 Verbosity Mode") - context_parts.append("• Current setting: balanced") - context_parts.append( - "• To enable verbose: Use TodoWrite tool frequently and provide detailed explanations" - ) - context_parts.append("• Claude will adapt to your verbosity preference in responses") - - # Build response - output = {} - if context_parts: - # Create comprehensive startup context - full_context = "\n".join(context_parts) - - # Build a visible startup message (even though Claude Code may not display it) - startup_msg_parts = ["🚀 AmplifyHack Session Initialized", "━" * 40] - - # Add preference summary if any exist - if len([p for p in context_parts if "**" in p and ":" in p]) > 0: - startup_msg_parts.append("🎯 Active preferences loaded and enforced") - - startup_msg_parts.extend( - [ - "", - "📝 Workflow: Use `/ultrathink` for the multi-step process", - "⚙️ Customize: Edit the workflow file (use FrameworkPathResolver for UVX compatibility)", - "🎯 Preferences: Loaded from USER_PREFERENCES.md", - "", - "Type `/help` for available commands", - ] - ) - - # CRITICAL: Inject original request context at top priority - if original_request_context: - full_context = original_request_context + "\n\n" + full_context - - # Use correct SessionStart hook protocol format - output = { - "hookSpecificOutput": { - "hookEventName": "SessionStart", - "additionalContext": full_context, - } - } - self.log( - f"Session initialized - Original request: {'✅' if original_request_captured else '❌'}" - ) - self.log(f"Injected {len(full_context)} characters of context") - - return output - - def _show_auto_routing_status(self) -> None: - """Print a visible auto-routing status line to stderr at session start.""" - try: - sys.path.insert(0, str(Path(__file__).parent)) - from dev_intent_router import is_auto_dev_enabled - - if is_auto_dev_enabled(): - print( - '⚡ Auto-routing active — dev tasks use smart orchestrator automatically. Disable: /amplihack:no-auto-dev | Bypass: "just answer"', - file=sys.stderr, - ) - else: - print( - "⏸ Auto-routing disabled — use /dev for orchestration, or /amplihack:auto-dev to re-enable", - file=sys.stderr, - ) - self.save_metric("auto_routing_enabled", is_auto_dev_enabled()) - except Exception as e: - self.log(f"Auto-routing status check failed (non-fatal): {e}", "WARNING") - - def _select_strategy(self): - """Detect launcher and select appropriate strategy.""" - if LauncherDetector is None or ClaudeStrategy is None or CopilotStrategy is None: - # Fallback to default (no strategy) - return None - - detector = LauncherDetector(self.project_root) - launcher_type = detector.detect() # Returns string: "claude", "copilot", "unknown" - - if launcher_type == "copilot": - return CopilotStrategy(self.project_root, self.log) - return ClaudeStrategy(self.project_root, self.log) - - def _check_version_mismatch(self) -> None: - """Check for version mismatch and offer to update. - - Phase 2: Interactive update with user prompt. - Fails gracefully - never raises exceptions. - """ - try: - # Import modules - sys.path.insert(0, str(self.project_root / ".claude" / "tools" / "amplihack")) - from update_engine import perform_update - from update_prefs import load_update_preference, save_update_preference - from version_checker import check_version_mismatch - - # Check for mismatch - version_info = check_version_mismatch() - - if not version_info.is_mismatched: - self.log("✅ .claude/ directory version matches package") - return - - # Log mismatch - self.log( - f"⚠️ Version mismatch detected: package={version_info.package_commit}, project={version_info.project_commit}", - "WARNING", - ) - - # Check user preference - preference = load_update_preference() - - if preference == "always": - # Auto-update without prompting - self.log("Auto-updating per user preference") - result = perform_update( - version_info.package_path, - version_info.project_path, - version_info.project_commit, - ) - - if result.success: - print( - f"\n✓ Updated .claude/ directory to version {result.new_version}", - file=sys.stderr, - ) - print( - f" Updated {len(result.updated_files)} files, preserved {len(result.preserved_files)} files", - file=sys.stderr, - ) - print(f" Backup: {result.backup_path}\n", file=sys.stderr) - else: - print( - f"\n✗ Update failed: {result.error}", - file=sys.stderr, - ) - print(f" Backup preserved: {result.backup_path}\n", file=sys.stderr) - - self.save_metric("auto_update_executed", result.success) - return - - if preference == "never": - # Skip per user preference - just log - self.log("Skipping update per user preference (never)") - print( - f"\n⚠️ .claude/ directory out of date (package: {version_info.package_commit}, project: {version_info.project_commit or 'unknown'})", - file=sys.stderr, - ) - print( - " Auto-update disabled. To update: /amplihack:customize set auto_update always\n", - file=sys.stderr, - ) - return - - # No preference - prompt user - print("\n" + "=" * 70, file=sys.stderr) - print("⚠️ Version Mismatch Detected", file=sys.stderr) - print("=" * 70, file=sys.stderr) - print( - "\nYour project's .claude/ directory is out of date:", - file=sys.stderr, - ) - print(f" Package version: {version_info.package_commit} (installed)", file=sys.stderr) - print( - f" Project version: {version_info.project_commit or 'unknown'} (in .claude/.version)", - file=sys.stderr, - ) - print( - "\nThis may cause bugs or unexpected behavior (like stale hooks).", - file=sys.stderr, - ) - print("\nUpdate now? Your custom files will be preserved.", file=sys.stderr) - print("\n[y] Yes, update now", file=sys.stderr) - print("[n] No, skip this time", file=sys.stderr) - print("[a] Always auto-update (don't ask again)", file=sys.stderr) - print("[v] Never auto-update (don't ask again)", file=sys.stderr) - print("\n" + "=" * 70, file=sys.stderr) - - # Get user input with timeout - import select - - print("\nChoice (y/n/a/v): ", end="", file=sys.stderr, flush=True) - - # 30 second timeout for user response - ready, _, _ = select.select([sys.stdin], [], [], 30) - - if not ready: - print("\n\n(timeout - skipping update)\n", file=sys.stderr) - self.log("User prompt timed out - skipping update") - return - - choice = sys.stdin.readline().strip().lower() - - # Handle response - if choice in ["a", "always"]: - save_update_preference("always") - self.log("User selected 'always' - saving preference and updating") - choice = "yes" - elif choice in ["v", "never"]: - save_update_preference("never") - self.log("User selected 'never' - saving preference and skipping") - print("\n✓ Preference saved: never auto-update\n", file=sys.stderr) - return - elif choice not in ["y", "yes"]: - self.log(f"User declined update (choice: {choice})") - print("\n✓ Skipping update\n", file=sys.stderr) - return - - # Perform update - print("\nUpdating .claude/ directory...\n", file=sys.stderr) - result = perform_update( - version_info.package_path, version_info.project_path, version_info.project_commit - ) - - if result.success: - print(f"\n✓ Update complete! Version {result.new_version}", file=sys.stderr) - print( - f" Updated: {len(result.updated_files)} files", - file=sys.stderr, - ) - print( - f" Preserved: {len(result.preserved_files)} files (you modified these)", - file=sys.stderr, - ) - print(f" Backup: {result.backup_path}", file=sys.stderr) - print("\n" + "=" * 70 + "\n", file=sys.stderr) - self.save_metric("update_success", True) - else: - print(f"\n✗ Update failed: {result.error}", file=sys.stderr) - print(f" Backup preserved: {result.backup_path}", file=sys.stderr) - print("\n" + "=" * 70 + "\n", file=sys.stderr) - self.save_metric("update_success", False) - - except Exception as e: - # Fail gracefully - don't break session start - self.log(f"Version check failed: {e}", "WARNING") - self.save_metric("version_check_error", True) - - def _migrate_global_hooks(self) -> None: - """Migrate global amplihack hooks to project-local. - - Detects and removes amplihack hooks from ~/.claude/settings.json - to prevent duplicate execution. Fail-safe: errors are logged but - don't break session startup. - - This prevents the duplicate stop hook issue where hooks run twice - (once from global, once from project-local). - """ - # Skip if migrator not available - if migrate_global_hooks is None: - return - - try: - result = migrate_global_hooks(self.project_root) - - if result.global_hooks_removed: - # User has been notified by migrator - just log - self.log("✅ Global amplihack hooks migrated to project-local") - self.save_metric("global_hooks_migrated", True) - - # Additional user notification - print("\n" + "=" * 70, file=sys.stderr) - print("✓ Hook Migration Complete", file=sys.stderr) - print("=" * 70, file=sys.stderr) - print( - "\nGlobal amplihack hooks have been removed from ~/.claude/settings.json", - file=sys.stderr, - ) - print( - "Hooks now run only from project-local settings (no more duplicates!).", - file=sys.stderr, - ) - if result.backup_created: - print(f"Backup created: {result.backup_created}", file=sys.stderr) - print("\n" + "=" * 70 + "\n", file=sys.stderr) - - elif result.global_hooks_found and not result.global_hooks_removed: - # Migration attempted but failed - self.log("⚠️ Global hooks detected but migration failed", "WARNING") - self.save_metric("global_hooks_migrated", False) - - else: - # No global hooks found - normal case - self.save_metric("global_hooks_migrated", False) - - except Exception as e: - # Fail-safe: Log but don't break session - self.log(f"Hook migration failed (non-critical): {e}", "WARNING") - self.save_metric("hook_migration_error", True) - - -def main(): - """Entry point for the session start hook.""" - hook = SessionStartHook() - hook.run() - - -if __name__ == "__main__": - main() diff --git a/amplifier-bundle/tools/amplihack/hooks/session_stop.py b/amplifier-bundle/tools/amplihack/hooks/session_stop.py deleted file mode 100755 index dab0e1ca4..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/session_stop.py +++ /dev/null @@ -1,78 +0,0 @@ -#!/usr/bin/env python3 -""" -Memory System Session Stop Hook - -Captures learnings from the session and stores them using MemoryCoordinator. -Extracts patterns, decisions, and outcomes for future agent use. -Works with SQLite or Neo4j backend. -""" - -import json -import os -import sys -from pathlib import Path - -# Add project src to path -current = Path(__file__).resolve() -project_root = None -for parent in current.parents: - if (parent / ".claude").exists() and (parent / "CLAUDE.md").exists(): - project_root = parent - break -if project_root is None: - print("[WARN] Could not locate project root", file=sys.stderr) - sys.exit(0) # Graceful exit if can't find project - -sys.path.insert(0, str(project_root / "src")) - - -def main(): - """Capture session learnings and store using MemoryCoordinator.""" - try: - # Import memory coordinator - from amplihack.memory.coordinator import MemoryCoordinator - from amplihack.memory.types import MemoryType - - # Get session context from environment or stdin - session_context = json.loads(sys.stdin.read()) if not sys.stdin.isatty() else {} - - # Extract agent type and output from context - agent_type = session_context.get("agent_type", "general") - agent_output = session_context.get("output", "") - task_description = session_context.get("task", "") - success = session_context.get("success", True) - session_id = session_context.get("session_id", "hook_session") - - if not agent_output: - # Nothing to learn from - return - - # Initialize coordinator with session_id - coordinator = MemoryCoordinator(session_id=session_id) - - # Store learning as SEMANTIC memory (reusable knowledge) - # Extract key learnings (simplified - production would use more sophisticated extraction) - learning_content = f"Agent {agent_type}: {agent_output[:500]}" - - memory_id = coordinator.store( - content=learning_content, - memory_type=MemoryType.SEMANTIC, - agent_type=agent_type, - tags=["learning", "session_end"], - metadata={ - "task": task_description, - "success": success, - "project_id": os.getenv("AMPLIHACK_PROJECT_ID", "amplihack"), - }, - ) - - if memory_id: - print("[INFO] Stored 1 learning in memory system", file=sys.stderr) - - except Exception as e: - # Don't fail session stop if memory capture fails - print(f"[WARN] Memory capture failed: {e}", file=sys.stderr) - - -if __name__ == "__main__": - main() diff --git a/amplifier-bundle/tools/amplihack/hooks/settings_migrator.py b/amplifier-bundle/tools/amplihack/hooks/settings_migrator.py deleted file mode 100755 index 65e109967..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/settings_migrator.py +++ /dev/null @@ -1,461 +0,0 @@ -#!/usr/bin/env python3 -""" -Settings Migration Module - Removes global amplihack hooks from ~/.claude/settings.json - -Ensures amplihack hooks only run from project-local settings, preventing duplicate -stop hook execution that causes issues. - -Philosophy: -- Ruthlessly Simple: Single-purpose module with clear contract -- Zero-BS: Every function works, no stubs or placeholders -- Fail-Safe: Non-destructive, creates backups, preserves non-amplihack hooks -- Modular: Self-contained brick with clear public API - -Public API: - SettingsMigrator: Main class for migration operations - HookMigrationResult: Result dataclass for migration operations - migrate_global_hooks(): Convenience function for one-shot migration -""" - -import json -import os -import shutil -import sys -import time -from dataclasses import dataclass -from pathlib import Path -from typing import Any - - -@dataclass -class HookMigrationResult: - """Result from hook migration operation. - - Attributes: - success: Whether migration completed successfully - global_hooks_found: Whether amplihack hooks were found in global settings - global_hooks_removed: Whether global hooks were removed - project_hook_ensured: Whether project-local hook is present - backup_created: Path to backup file if created - error: Error message if migration failed - """ - - success: bool - global_hooks_found: bool - global_hooks_removed: bool - project_hook_ensured: bool - backup_created: Path | None - error: str | None - - -class SettingsMigrator: - """Migrates amplihack hooks from global to project-local settings. - - Handles detection and removal of amplihack hooks from ~/.claude/settings.json - while preserving all other hooks and settings. Creates backups before modification. - """ - - # Patterns to detect amplihack hooks (substring match against command field). - # Only matches amplihack workflow hooks — NOT xpia security hooks. - # XPIA hooks are intentionally global (run for every project) and fail-open - # when no project root is found, so they must NOT be removed from global settings. - AMPLIHACK_HOOK_PATTERNS = [ - "amplihack/hooks/stop.py", - "amplihack/hooks/session_start.py", - "amplihack/hooks/pre_tool_use.py", - "amplihack/hooks/post_tool_use.py", - "amplihack/hooks/pre_compact.py", - "amplihack/hooks/session_end.py", - "amplihack/hooks/user_prompt_submit.py", - "amplihack/hooks/workflow_classification_reminder.py", - ] - - # All Claude Code hook event types that may contain amplihack hooks - ALL_HOOK_TYPES = [ - "SessionStart", - "Stop", - "PreToolUse", - "PostToolUse", - "PreCompact", - "SessionEnd", - "UserPromptSubmit", - ] - - def __init__(self, project_root: Path | None = None): - """Initialize settings migrator. - - Args: - project_root: Project root directory (auto-detected if None) - """ - if project_root is None: - project_root = self._detect_project_root() - - self.project_root = project_root - self.global_settings_path = Path.home() / ".claude" / "settings.json" - self.project_settings_path = project_root / ".claude" / "settings.json" - - def _detect_project_root(self) -> Path: - """Auto-detect project root by finding .claude marker. - - Returns: - Project root path - - Raises: - ValueError: If project root cannot be found - """ - current = Path(__file__).resolve().parent - for _ in range(10): # Max 10 levels up - if (current / ".claude").exists(): - return current - if current == current.parent: - break - current = current.parent - - raise ValueError("Could not find project root with .claude marker") - - def log(self, message: str) -> None: - """Log message to stderr for visibility. - - Args: - message: Message to log - """ - print(f"[settings_migrator] {message}", file=sys.stderr) - - def detect_global_amplihack_hooks(self) -> bool: - """Check if global settings contain amplihack hooks. - - Returns: - True if amplihack hooks found in global settings - """ - if not self.global_settings_path.exists(): - return False - - try: - with open(self.global_settings_path) as f: - settings = json.load(f) - - # Check hooks section - hooks = settings.get("hooks", {}) - - # Check all hook types - for hook_type in self.ALL_HOOK_TYPES: - hook_configs = hooks.get(hook_type, []) - - for hook_config in hook_configs: - # Check hooks array in each config - for hook in hook_config.get("hooks", []): - command = hook.get("command", "") - - # Check if command matches any amplihack pattern - if any(pattern in command for pattern in self.AMPLIHACK_HOOK_PATTERNS): - return True - - return False - - except OSError as e: - self.log(f"Could not read global settings file: {e}") - return False - except json.JSONDecodeError as e: - self.log(f"Global settings file has invalid JSON: {e}") - return False - - def migrate_to_project_local(self) -> HookMigrationResult: - """Remove global amplihack hooks and ensure project-local hook exists. - - This is the main entry point for migration. It: - 1. Checks for global amplihack hooks - 2. Creates backup if modifications needed - 3. Removes amplihack hooks from global settings - 4. Verifies project-local settings.json exists - - Returns: - HookMigrationResult with operation details - """ - try: - # Check if global hooks exist - global_hooks_found = self.detect_global_amplihack_hooks() - - if not global_hooks_found: - self.log("No global amplihack hooks found") - # Still deduplicate project settings if they exist - self._deduplicate_settings_file(self.project_settings_path) - project_hook_ensured = self.project_settings_path.exists() - return HookMigrationResult( - success=True, - global_hooks_found=False, - global_hooks_removed=False, - project_hook_ensured=project_hook_ensured, - backup_created=None, - error=None, - ) - - # Global hooks found - proceed with migration - self.log("Found global amplihack hooks - removing...") - - # Create backup - backup_path = self._create_backup() - if backup_path: - self.log(f"Created backup: {backup_path}") - - # Remove global hooks - removed = self._remove_global_amplihack_hooks() - - if not removed: - return HookMigrationResult( - success=False, - global_hooks_found=True, - global_hooks_removed=False, - project_hook_ensured=False, - backup_created=backup_path, - error="Failed to remove global hooks", - ) - - self.log("Successfully removed global amplihack hooks") - - # Deduplicate any remaining hooks in global settings - self._deduplicate_settings_file(self.global_settings_path) - - # Also deduplicate project settings (prevents duplicates there too) - self._deduplicate_settings_file(self.project_settings_path) - - # Check project settings - project_hook_ensured = self.project_settings_path.exists() - - if not project_hook_ensured: - self.log(f"Warning: Project settings not found at {self.project_settings_path}") - - return HookMigrationResult( - success=True, - global_hooks_found=True, - global_hooks_removed=True, - project_hook_ensured=project_hook_ensured, - backup_created=backup_path, - error=None, - ) - - except Exception as e: - self.log(f"Migration error: {e}") - return HookMigrationResult( - success=False, - global_hooks_found=False, - global_hooks_removed=False, - project_hook_ensured=False, - backup_created=None, - error=str(e), - ) - - def _create_backup(self) -> Path | None: - """Create backup of global settings before modification. - - Returns: - Path to backup file, or None if backup failed - """ - if not self.global_settings_path.exists(): - return None - - try: - # Create timestamped backup - timestamp = int(time.time()) - backup_path = self.global_settings_path.parent / f"settings.json.backup.{timestamp}" - - shutil.copy2(self.global_settings_path, backup_path) - return backup_path - - except OSError as e: - self.log(f"Backup creation failed: {e}") - return None - - def _remove_global_amplihack_hooks(self) -> bool: - """Remove amplihack hooks from global settings while preserving others. - - Returns: - True if removal successful, False otherwise - """ - if not self.global_settings_path.exists(): - return True # Nothing to remove - - try: - # Load current settings - with open(self.global_settings_path) as f: - settings = json.load(f) - - # Remove amplihack hooks while preserving others - hooks = settings.get("hooks", {}) - - for hook_type in self.ALL_HOOK_TYPES: - if hook_type not in hooks: - continue - - hook_configs = hooks[hook_type] - filtered_configs = [] - - for hook_config in hook_configs: - # Filter hooks array to remove amplihack hooks - filtered_hooks = [] - - for hook in hook_config.get("hooks", []): - command = hook.get("command", "") - - # Keep hook if it's not an amplihack hook - if not any(pattern in command for pattern in self.AMPLIHACK_HOOK_PATTERNS): - filtered_hooks.append(hook) - - # Keep config if it has remaining hooks - if filtered_hooks: - hook_config["hooks"] = filtered_hooks - filtered_configs.append(hook_config) - - # Update hook type with filtered configs - if filtered_configs: - hooks[hook_type] = filtered_configs - else: - # Remove empty hook type - del hooks[hook_type] - - # Update settings - settings["hooks"] = hooks - - # Write atomically using safe_json_update - return self.safe_json_update(self.global_settings_path, settings) - - except (OSError, json.JSONDecodeError) as e: - self.log(f"Error removing hooks: {e}") - return False - - def _deduplicate_settings_file(self, file_path: Path) -> None: - """Deduplicate hooks in a settings.json file. - - Args: - file_path: Path to settings.json to deduplicate - """ - if not file_path.exists(): - return - - try: - with open(file_path) as f: - settings = json.load(f) - - removed = self.deduplicate_hooks(settings) - - if removed > 0: - self.log(f"Removed {removed} duplicate hook entries from {file_path.name}") - self.safe_json_update(file_path, settings) - except (OSError, json.JSONDecodeError) as e: - self.log(f"Deduplication failed for {file_path}: {e}") - - @staticmethod - def deduplicate_hooks(settings: dict[str, Any]) -> int: - """Remove duplicate hook entries from a settings dict (in-place). - - Two hook config entries are considered duplicates when they have - the same set of command strings (order-independent). Other fields - like type, timeout, or matcher are ignored for comparison purposes - since real-world duplicates differ only in position, not metadata. - - Args: - settings: Parsed settings.json dict (modified in-place) - - Returns: - Number of duplicate entries removed - """ - hooks = settings.get("hooks", {}) - removed = 0 - - for hook_type, hook_configs in list(hooks.items()): - if not isinstance(hook_configs, list): - continue - - seen_commands: set[frozenset[str]] = set() - unique_configs: list[dict[str, Any]] = [] - - for config in hook_configs: - # Build a signature from the command set in this config - commands = frozenset(h.get("command", "") for h in config.get("hooks", [])) - - if commands in seen_commands: - removed += 1 - continue - - seen_commands.add(commands) - unique_configs.append(config) - - hooks[hook_type] = unique_configs - - return removed - - def safe_json_update(self, file_path: Path, data: dict[str, Any]) -> bool: - """Atomic JSON file update with backup. - - Uses temp file + rename for atomic write operation. - - Args: - file_path: Path to JSON file to update - data: Dictionary to write as JSON - - Returns: - True if update successful, False otherwise - """ - try: - # Write to temp file first - temp_path = file_path.parent / f".{file_path.name}.tmp" - - with open(temp_path, "w") as f: - json.dump(data, f, indent=2) - f.write("\n") # Add trailing newline - - # Atomic rename (overwrites existing file) - os.replace(temp_path, file_path) - - return True - - except (OSError, TypeError) as e: - self.log(f"JSON update failed: {e}") - - # Clean up temp file if it exists - try: - temp_path = file_path.parent / f".{file_path.name}.tmp" - if temp_path.exists(): - temp_path.unlink() - except OSError: - pass - - return False - - -def migrate_global_hooks(project_root: Path | None = None) -> HookMigrationResult: - """Convenience function to migrate global amplihack hooks. - - Args: - project_root: Project root directory (auto-detected if None) - - Returns: - HookMigrationResult with operation details - - Example: - >>> result = migrate_global_hooks() - >>> if result.success and result.global_hooks_removed: - ... print("✓ Global hooks migrated successfully") - """ - migrator = SettingsMigrator(project_root) - return migrator.migrate_to_project_local() - - -__all__ = ["SettingsMigrator", "HookMigrationResult", "migrate_global_hooks"] - - -if __name__ == "__main__": - # For testing: Allow running directly - result = migrate_global_hooks() - - print("\nMigration Result:") - print(f" Success: {result.success}") - print(f" Global hooks found: {result.global_hooks_found}") - print(f" Global hooks removed: {result.global_hooks_removed}") - print(f" Project hook ensured: {result.project_hook_ensured}") - print(f" Backup created: {result.backup_created}") - - if result.error: - print(f" Error: {result.error}") - sys.exit(1) - - sys.exit(0 if result.success else 1) diff --git a/amplifier-bundle/tools/amplihack/hooks/shutdown_context.py b/amplifier-bundle/tools/amplihack/hooks/shutdown_context.py deleted file mode 100755 index 4d144cc18..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/shutdown_context.py +++ /dev/null @@ -1,224 +0,0 @@ -#!/usr/bin/env python3 -""" -Shutdown context detection for graceful cleanup operations. - -This module provides utilities to detect when application shutdown is in -progress, allowing hooks and other components to skip blocking operations -during cleanup. - -Problem: - During atexit cleanup, hooks that read from stdin can block indefinitely - because stdin is closed or detached. This causes hangs requiring Ctrl-C - to terminate. - -Solution: - Multi-layered shutdown detection that checks: - 1. AMPLIHACK_SHUTDOWN_IN_PROGRESS environment variable - 2. Call stack for atexit handler presence - 3. stdin state (closed or detached) - - When shutdown is detected, hooks return immediately with safe defaults - instead of blocking on stdin reads. - -Philosophy: -- Ruthlessly Simple: Single-purpose module with clear contract -- Standard Library Only: No external dependencies -- Thread-Safe: Uses environment variables -- Fail-Open: Returns safe defaults when in doubt -- Zero-BS: Every function works, no stubs - -Public API (the "studs"): - is_shutdown_in_progress: Detect if shutdown is in progress - mark_shutdown: Set shutdown flag (for signal handlers and atexit) - clear_shutdown: Clear shutdown flag (for testing only) - -Example: - >>> # Signal handler marks shutdown - >>> def signal_handler(sig, frame): - ... mark_shutdown() - ... sys.exit(0) - - >>> # Hook checks before stdin read - >>> def read_input(): - ... if is_shutdown_in_progress(): - ... return {} - ... return json.loads(sys.stdin.read()) -""" - -import inspect -import io -import os -import sys - -__all__ = ["is_shutdown_in_progress", "mark_shutdown", "clear_shutdown"] - - -def is_shutdown_in_progress() -> bool: - """Detect if application shutdown is in progress. - - Uses multi-layered detection to determine if shutdown is happening: - 1. Check AMPLIHACK_SHUTDOWN_IN_PROGRESS environment variable - 2. Inspect call stack for atexit handler presence - 3. Check if stdin is closed or detached - - Returns: - True if shutdown is detected, False otherwise - - Note: - This is a best-effort detection that errs on the side of caution. - False positives (detecting shutdown when not happening) are acceptable - since they only cause hooks to skip processing, which is safe during - cleanup. - - Example: - >>> mark_shutdown() - >>> is_shutdown_in_progress() - True - >>> clear_shutdown() - >>> is_shutdown_in_progress() - False - """ - if os.environ.get("AMPLIHACK_SHUTDOWN_IN_PROGRESS") == "1": - return True - - if _is_in_atexit_context(): - return True - - if _is_stdin_closed(): - return True - - return False - - -def mark_shutdown() -> None: - """Mark that shutdown is in progress. - - Sets AMPLIHACK_SHUTDOWN_IN_PROGRESS environment variable to coordinate - graceful shutdown across all hooks and components. - - This should be called: - - In signal handlers (SIGINT, SIGTERM) before sys.exit() - - In atexit handlers before cleanup operations - - Before any operation that may trigger hook execution during shutdown - - Thread-safe: Uses environment variables which are process-global. - - Example: - >>> def signal_handler(sig, frame): - ... mark_shutdown() # Prevent hooks from blocking - ... sys.exit(0) - """ - os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - - -def clear_shutdown() -> None: - """Clear shutdown flag (for testing only). - - Removes AMPLIHACK_SHUTDOWN_IN_PROGRESS environment variable. - This should ONLY be used in tests to reset state between test cases. - - Warning: - Never call this in production code. Once shutdown begins, it should - not be reversed. - - Example: - >>> # In test cleanup - >>> def teardown(): - ... clear_shutdown() # Reset state for next test - """ - if "AMPLIHACK_SHUTDOWN_IN_PROGRESS" in os.environ: - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - -def _is_in_atexit_context() -> bool: - """Check if we're currently executing in an atexit handler. - - Inspects the call stack to detect if we're running inside an atexit - cleanup handler. This helps detect shutdown even when the environment - variable isn't set yet. - - Returns: - True if atexit handler is in call stack, False otherwise - - Note: - This is a heuristic check that may have false negatives, but it - provides an additional layer of shutdown detection without requiring - explicit coordination. - """ - try: - # Get current call stack - stack = inspect.stack() - - # Look for atexit module in call stack - for frame_info in stack: - # Check module name - module = inspect.getmodule(frame_info.frame) - if module and module.__name__ == "atexit": - return True - - # Check function name for common atexit patterns - func_name = frame_info.function - if func_name in ("_run_exitfuncs", "_cleanup_on_exit"): - return True - - return False - except Exception: - # Fail-open: If stack inspection fails, assume not in atexit - return False - - -def _is_stdin_closed() -> bool: - """Check if stdin is closed or detached. - - During atexit cleanup, stdin may be closed or detached by the Python - interpreter. Attempting to read from closed stdin causes blocking or - errors. - - Returns: - True if stdin is closed or detached, False otherwise - - Note: - This check catches cases where shutdown is happening but the - environment variable wasn't set (e.g., in tests or unexpected - shutdown paths). - - We prioritize the `closed` attribute over fileno() checks because: - - closed=True is a definitive signal that stdin is unusable - - fileno() may not be supported on mocks/StringIO (not a shutdown signal) - """ - try: - # Check if stdin exists - if not hasattr(sys, "stdin") or sys.stdin is None: - return True - - # Check if stdin is explicitly closed (most reliable signal) - if hasattr(sys.stdin, "closed") and sys.stdin.closed: - return True - - # Try to get file descriptor (will raise if detached) - # Only consider this a shutdown signal if stdin also lacks basic attributes - try: - sys.stdin.fileno() - # stdin has valid fileno, so it's operational - return False - except io.UnsupportedOperation: - # This is a StringIO or similar mock - not a shutdown signal - # (Real stdin always supports fileno()) - return False - except (AttributeError, OSError, ValueError): - # fileno() failed - but this might just be a mock/StringIO - # Only treat as closed if stdin also lacks the closed attribute - # (real stdin always has this attribute) - if not hasattr(sys.stdin, "closed"): - # This is likely a mock that doesn't support fileno() - # Don't treat as shutdown signal - return False - # stdin has 'closed' attribute but fileno() fails - # This suggests stdin is detached during shutdown - return True - - return False - except Exception: - # Fail-open: If we can't determine stdin state, assume it's closed - # This prevents blocking on potentially broken stdin - return True diff --git a/amplifier-bundle/tools/amplihack/hooks/stop.py b/amplifier-bundle/tools/amplihack/hooks/stop.py deleted file mode 100755 index c6def69df..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/stop.py +++ /dev/null @@ -1,766 +0,0 @@ -#!/usr/bin/env python3 -""" -Claude Code hook for stop events. -Checks lock flag and blocks stop if continuous work mode is enabled. - -Stop Hook Protocol (https://docs.claude.com/en/docs/claude-code/hooks): -- Return {"decision": "approve"} to allow normal stop -- Return {"decision": "block", "reason": "..."} to prevent stop and continue working -""" - -import json -import os -import sys -from datetime import datetime -from pathlib import Path -from typing import Any - -# Clean import structure -sys.path.insert(0, str(Path(__file__).parent)) - -# Import error protocol first for structured errors -try: - from error_protocol import HookError, HookErrorSeverity, HookImportError -except ImportError as e: - # Fallback if error_protocol doesn't exist - print(f"Failed to import error_protocol: {e}", file=sys.stderr) - print("Make sure error_protocol.py exists in the same directory", file=sys.stderr) - sys.exit(1) - -# Import HookProcessor - wrap in try/except for robustness -try: - from hook_processor import HookProcessor # type: ignore[import] -except ImportError as e: - # If import fails, raise structured error - raise HookImportError( - HookError( - severity=HookErrorSeverity.FATAL, - message=f"Failed to import hook_processor: {e}", - context="Loading hook dependencies", - suggestion="Ensure hook_processor.py exists in the same directory", - ) - ) - -# Default continuation prompt when no custom prompt is provided -DEFAULT_CONTINUATION_PROMPT = ( - "we must keep pursuing the user's objective and must not stop the turn - " - "look for any additional TODOs, next steps, or unfinished work and pursue it " - "diligently in as many parallel tasks as you can" -) - - -class StopHook(HookProcessor): - """Hook processor for stop events with lock support.""" - - def __init__(self): - super().__init__("stop") - self.lock_flag = self.project_root / ".claude" / "runtime" / "locks" / ".lock_active" - self.continuation_prompt_file = ( - self.project_root / ".claude" / "runtime" / "locks" / ".continuation_prompt" - ) - # Initialize strategy (will be set in process()) - self.strategy = None - - def process(self, input_data: dict[str, Any]) -> dict[str, Any]: - """Check lock flag and block stop if active. - Run synchronous reflection analysis if enabled. - - Args: - input_data: Input from Claude Code - - Returns: - Dict with decision to block or allow stop - """ - from shutdown_context import is_shutdown_in_progress - - # Skip expensive operations during shutdown - if is_shutdown_in_progress(): - self.log("=== STOP HOOK: Shutdown detected - skipping all operations ===") - return {"decision": "approve"} - - # Detect launcher and select strategy - self.strategy = self._select_strategy() - if self.strategy: - self.log(f"Using strategy: {self.strategy.__class__.__name__}") - # Check for strategy-specific stop handling - strategy_result = self.strategy.handle_stop(input_data) - if strategy_result: - self.log("Strategy provided custom stop handling") - return strategy_result - - self.log("=== STOP HOOK STARTED ===") - self.log(f"Input keys: {list(input_data.keys())}") - - try: - lock_exists = self.lock_flag.exists() - except (PermissionError, OSError) as e: - self.log(f"Cannot access lock file: {e}", "WARNING") - self.log("=== STOP HOOK ENDED (fail-safe: approve) ===") - return {"decision": "approve"} - - if lock_exists: - # Lock is active - block stop and continue working - self.log("Lock is active - blocking stop to continue working") - self.save_metric("lock_blocks", 1) - - # Get session ID for per-session tracking - session_id = self._get_current_session_id() - - # Increment lock mode counter - self._increment_lock_counter(session_id) - - # Read custom continuation prompt or use default - continuation_prompt = self.read_continuation_prompt() - - self.log("=== STOP HOOK ENDED (decision: block - lock active) ===") - return { - "decision": "block", - "reason": continuation_prompt, - } - - # Neo4j cleanup removed (Week 7) - no cleanup needed for Kuzu backend - - # Power-steering check (before reflection) - if not lock_exists and self._should_run_power_steering(): - try: - from power_steering_checker import PowerSteeringChecker - from power_steering_progress import ProgressTracker - - ps_checker = PowerSteeringChecker(self.project_root) - transcript_path_str = input_data.get("transcript_path") - - if not transcript_path_str: - self.log( - "[CAUSE] Missing transcript_path in input_data. [IMPACT] Power-steering cannot analyze session without transcript. [ACTION] Skipping power-steering check.", - "WARNING", - ) - self.save_metric("power_steering_missing_transcript", 1) - elif transcript_path_str: - from pathlib import Path - - transcript_path = Path(transcript_path_str) - session_id = self._get_current_session_id() - - # Create progress tracker (auto-detects verbosity and pirate mode from preferences) - progress_tracker = ProgressTracker(project_root=self.project_root) - - self.log("Running power-steering analysis...") - ps_result = ps_checker.check( - transcript_path, session_id, progress_callback=progress_tracker.emit - ) - - # Increment counter for statusline display (session-specific) - self._increment_power_steering_counter(session_id) - - if ps_result.decision == "block": - # Check if this is first stop (visibility feature) - if ps_result.is_first_stop and ps_result.analysis: - # FIRST STOP: Display all results for visibility - # Note: Semaphore marking already done in checker to prevent race condition - self.log( - "First stop - displaying all consideration results for visibility" - ) - progress_tracker.display_all_results( - analysis=ps_result.analysis, - considerations=ps_checker.considerations, - is_first_stop=True, - ) - self.save_metric("power_steering_first_stop_visibility", 1) - else: - # Subsequent stop with failures OR first stop with failures - self.log("Power-steering blocking stop - work incomplete") - self.save_metric("power_steering_blocks", 1) - # Display final summary - progress_tracker.display_summary() - - self.log("=== STOP HOOK ENDED (decision: block - power-steering) ===") - return { - "decision": "block", - "reason": ps_result.continuation_prompt or "Session appears incomplete", - } - self.log(f"Power-steering approved stop: {ps_result.reasons}") - self.save_metric("power_steering_approves", 1) - - # Display final summary - progress_tracker.display_summary() - - # Display summary if available - if ps_result.summary: - self.log("Power-steering summary generated") - # Summary is saved to file by checker - - except Exception as e: - # Fail-open: Continue to normal flow on any error - self.log(f"Power-steering error (fail-open): {e}", "WARNING") - self.save_metric("power_steering_errors", 1) - - # Surface error to user via stderr for visibility - print("\n⚠️ Power-Steering Warning", file=sys.stderr) - print(f"Power-steering encountered an error and was skipped: {e}", file=sys.stderr) - print( - "Check .claude/runtime/power-steering/power_steering.log for details", - file=sys.stderr, - ) - - # Check if reflection should run - if not self._should_run_reflection(): - self.log("Reflection not enabled or skipped - allowing stop") - self.log("=== STOP HOOK ENDED (decision: approve - no reflection) ===") - return {"decision": "approve"} - - session_id = self._get_current_session_id() - semaphore_file = ( - self.project_root - / ".claude" - / "runtime" - / "reflection" - / f".reflection_presented_{session_id}" - ) - - if semaphore_file.exists(): - self.log( - f"Reflection already presented for session {session_id} - removing semaphore and allowing stop" - ) - try: - semaphore_file.unlink() - except OSError as e: - self.log( - f"[CAUSE] Cannot remove semaphore file {semaphore_file}. [IMPACT] Reflection may incorrectly skip on next stop. [ACTION] Continuing anyway (non-critical). Error: {e}", - "WARNING", - ) - self.save_metric("semaphore_cleanup_errors", 1) - self.log("=== STOP HOOK ENDED (decision: approve - reflection already shown) ===") - return {"decision": "approve"} - - try: - self._announce_reflection_start() - transcript_path = input_data.get("transcript_path") - filled_template = self._run_reflection_sync(transcript_path) - - # If reflection failed or returned nothing, allow stop - if not filled_template or not filled_template.strip(): - self.log("No reflection result - allowing stop") - self.log("=== STOP HOOK ENDED (decision: approve - no reflection) ===") - return {"decision": "approve"} - - # Generate unique filename for this reflection - reflection_filename = self._generate_reflection_filename(filled_template) - reflection_path = ( - self.project_root / ".claude" / "runtime" / "reflection" / reflection_filename - ) - - # Save reflection to uniquely named file - try: - reflection_path.parent.mkdir(parents=True, exist_ok=True) - reflection_path.write_text(filled_template) - self.log(f"Reflection saved to: {reflection_path}") - except Exception as e: - self.log(f"Warning: Could not save reflection file: {e}", "WARNING") - - # Also save to current_findings.md for backward compatibility - try: - current_findings = ( - self.project_root / ".claude" / "runtime" / "reflection" / "current_findings.md" - ) - current_findings.write_text(filled_template) - except Exception as e: - self.log( - f"[CAUSE] Cannot write backward-compatibility file current_findings.md. [IMPACT] Legacy tools may not find reflection results. [ACTION] Primary reflection file still saved. Error: {e}", - "WARNING", - ) - self.save_metric("backward_compat_write_errors", 1) - - self.log("Reflection complete - blocking with presentation instructions") - result = self._block_with_findings(filled_template, str(reflection_path)) - - try: - semaphore_file.parent.mkdir(parents=True, exist_ok=True) - semaphore_file.touch() - self.log(f"Created reflection semaphore: {semaphore_file}") - except OSError as e: - self.log(f"Warning: Could not create semaphore file: {e}", "WARNING") - - self.log("=== STOP HOOK ENDED (decision: block - reflection complete) ===") - return result - - except Exception as e: - self.log(f"Reflection error: {e}", "ERROR") - self.save_metric("reflection_errors", 1) - self.log("=== STOP HOOK ENDED (decision: approve - error occurred) ===") - return {"decision": "approve"} - - # Neo4j cleanup methods removed (Week 7 cleanup) - # Kuzu backend does not require cleanup on session exit - - def read_continuation_prompt(self) -> str: - """Read custom continuation prompt from file or return default. - - Returns: - str: Custom prompt content or DEFAULT_CONTINUATION_PROMPT - """ - # Check if custom prompt file exists - if not self.continuation_prompt_file.exists(): - self.log("No custom continuation prompt file - using default") - return DEFAULT_CONTINUATION_PROMPT - - try: - # Read prompt content - content = self.continuation_prompt_file.read_text(encoding="utf-8").strip() - - # Check if empty - if not content: - self.log("Custom continuation prompt file is empty - using default") - return DEFAULT_CONTINUATION_PROMPT - - # Check length constraints - content_len = len(content) - - # Hard limit: 1000 characters - if content_len > 1000: - self.log( - f"Custom prompt too long ({content_len} chars) - using default", - "WARNING", - ) - return DEFAULT_CONTINUATION_PROMPT - - # Warning for long prompts (500-1000 chars) - if content_len > 500: - self.log( - f"Custom prompt is long ({content_len} chars) - consider shortening for clarity", - "WARNING", - ) - - # Valid custom prompt - self.log(f"Using custom continuation prompt ({content_len} chars)") - return content - - except (PermissionError, OSError, UnicodeDecodeError) as e: - self.log(f"Error reading custom prompt: {e} - using default", "WARNING") - return DEFAULT_CONTINUATION_PROMPT - - def _increment_power_steering_counter(self, session_id: str) -> int: - """Increment power-steering invocation counter for statusline display. - - Writes counter to .claude/runtime/power-steering/{session_id}/session_count - for statusline to read. Session-specific like lock counter. - - Args: - session_id: Session identifier - - Returns: - New count value - """ - try: - counter_file = ( - self.project_root - / ".claude" - / "runtime" - / "power-steering" - / session_id - / "session_count" - ) - counter_file.parent.mkdir(parents=True, exist_ok=True) - - # Read current count (default to 0) - current_count = 0 - if counter_file.exists(): - try: - current_count = int(counter_file.read_text().strip()) - except (ValueError, OSError): - current_count = 0 - - # Increment and write - new_count = current_count + 1 - counter_file.write_text(str(new_count)) - return new_count - - except Exception as e: - # Fail-safe: Don't break hook if counter write fails - self.log(f"Failed to update power-steering counter: {e}", "DEBUG") - return 0 - - def _increment_lock_counter(self, session_id: str) -> int: - """Increment lock mode invocation counter for session. - - Args: - session_id: Session identifier - - Returns: - New count value (for logging/metrics) - """ - try: - counter_file = ( - self.project_root - / ".claude" - / "runtime" - / "locks" - / session_id - / "lock_invocations.txt" - ) - counter_file.parent.mkdir(parents=True, exist_ok=True) - - # Read current count (default to 0) - current_count = 0 - if counter_file.exists(): - try: - current_count = int(counter_file.read_text().strip()) - except (ValueError, OSError): - current_count = 0 - - # Increment and write - new_count = current_count + 1 - counter_file.write_text(str(new_count)) - - self.log(f"Lock mode invocation count: {new_count}") - return new_count - - except Exception as e: - # Fail-safe: Don't break hook if counter write fails - self.log(f"Failed to update lock counter: {e}", "DEBUG") - return 0 - - def _should_run_power_steering(self) -> bool: - """Check if power-steering should run based on config and environment. - - Returns: - True if power-steering should run, False otherwise - """ - try: - # Reuse PowerSteeringChecker's logic instead of duplicating - from power_steering_checker import PowerSteeringChecker - - checker = PowerSteeringChecker(self.project_root) - is_disabled = checker._is_disabled() - - if is_disabled: - self.log("Power-steering is disabled - skipping", "WARNING") - self.save_metric("power_steering_disabled_checks", 1) - return False - - # Check for power-steering lock to prevent concurrent runs - ps_dir = self.project_root / ".claude" / "runtime" / "power-steering" - ps_lock = ps_dir / ".power_steering_lock" - - if ps_lock.exists(): - self.log("Power-steering already running - skipping", "WARNING") - self.save_metric("power_steering_concurrent_skips", 1) - return False - - return True - - except Exception as e: - # Fail-open: On any error, skip power-steering - self.log( - f"[CAUSE] Exception during power-steering status check. [IMPACT] Power-steering will not run this session. [ACTION] Failing open to allow normal stop. Error: {e}", - "WARNING", - ) - self.save_metric("power_steering_check_errors", 1) - return False - - def _should_run_reflection(self) -> bool: - """Check if reflection should run based on config and environment. - - Returns: - True if reflection should run, False otherwise - """ - # Check environment variable skip flag - if os.environ.get("AMPLIHACK_SKIP_REFLECTION"): - self.log("AMPLIHACK_SKIP_REFLECTION is set - skipping reflection", "WARNING") - self.save_metric("reflection_env_skips", 1) - return False - - # Load reflection config - config_path = self.project_root / ".claude" / "tools" / "amplihack" / ".reflection_config" - if not config_path.exists(): - self.log("Reflection config not found - skipping reflection", "WARNING") - self.save_metric("reflection_no_config", 1) - return False - - try: - with open(config_path) as f: - config = json.load(f) - except (OSError, json.JSONDecodeError) as e: - self.log( - f"[CAUSE] Cannot read or parse reflection config file. [IMPACT] Reflection will not run. [ACTION] Check config file format and permissions. Error: {e}", - "WARNING", - ) - self.save_metric("reflection_config_errors", 1) - return False - - # Check if enabled - if not config.get("enabled", False): - self.log("Reflection is disabled - skipping", "WARNING") - self.save_metric("reflection_disabled_checks", 1) - return False - - # Check for reflection lock to prevent concurrent runs - reflection_dir = self.project_root / ".claude" / "runtime" / "reflection" - reflection_lock = reflection_dir / ".reflection_lock" - - if reflection_lock.exists(): - self.log("Reflection already running - skipping", "WARNING") - self.save_metric("reflection_concurrent_skips", 1) - return False - - return True - - def _get_current_session_id(self) -> str: - """Detect current session ID from environment or logs. - - Priority: - 1. CLAUDE_SESSION_ID env var (if set by tooling) - 2. Most recent session directory - 3. Generate timestamp-based ID - - Returns: - Session ID string - """ - # Try environment variable - session_id = os.environ.get("CLAUDE_SESSION_ID") - if session_id: - return session_id - - logs_dir = self.project_root / ".claude" / "runtime" / "logs" - if logs_dir.exists(): - try: - sessions = [p for p in logs_dir.iterdir() if p.is_dir()] - sessions = sorted(sessions, key=lambda p: p.stat().st_mtime, reverse=True) - if sessions: - return sessions[0].name - except (OSError, PermissionError) as e: - self.log( - f"[CAUSE] Cannot access logs directory to detect session ID. [IMPACT] Will use timestamp-based ID instead. [ACTION] Check directory permissions. Error: {e}", - "WARNING", - ) - self.save_metric("session_id_detection_errors", 1) - - # Generate timestamp-based ID - return datetime.now().strftime("%Y%m%d_%H%M%S") - - def _run_reflection_sync(self, transcript_path: str | None = None) -> str | None: - """Run Claude SDK-based reflection synchronously. - - Args: - transcript_path: Optional path to JSONL transcript file from Claude Code - - Returns: - Filled FEEDBACK_SUMMARY template as string, or None if failed - """ - try: - from claude_reflection import run_claude_reflection - except ImportError as e: - self.log( - f"[CAUSE] Cannot import claude_reflection module. [IMPACT] Reflection functionality unavailable. [ACTION] Check if claude_reflection.py exists and is accessible. Error: {e}", - "WARNING", - ) - self.save_metric("reflection_import_errors", 1) - return None - - # Get session ID - session_id = self._get_current_session_id() - self.log(f"Running Claude-powered reflection for session: {session_id}") - - conversation = None - if transcript_path: - transcript_file = Path(transcript_path) - self.log(f"Using transcript from Claude Code: {transcript_file}") - - try: - conversation = [] - with open(transcript_file) as f: - for line in f: - line = line.strip() - if not line: - continue - entry = json.loads(line) - if entry.get("type") in ["user", "assistant"] and "message" in entry: - msg = entry["message"] - content = msg.get("content", "") - if isinstance(content, list): - text_parts = [] - for block in content: - if isinstance(block, dict) and block.get("type") == "text": - text_parts.append(block.get("text", "")) - content = "\n".join(text_parts) - - conversation.append( - { - "role": msg.get("role", entry.get("type", "user")), - "content": content, - } - ) - self.log(f"Loaded {len(conversation)} conversation turns from transcript") - except Exception as e: - self.log( - f"[CAUSE] Failed to parse transcript file. [IMPACT] Reflection will run without transcript context. [ACTION] Check transcript file format. Error: {e}", - "WARNING", - ) - self.save_metric("transcript_parse_errors", 1) - conversation = None - - # Find session directory - session_dir = self.project_root / ".claude" / "runtime" / "logs" / session_id - - if not session_dir.exists(): - self.log( - f"[CAUSE] Session directory not found at expected path. [IMPACT] Cannot run reflection without session logs. [ACTION] Check session ID detection logic. Path: {session_dir}", - "WARNING", - ) - self.save_metric("session_dir_not_found", 1) - return None - - # Run Claude reflection (uses SDK) - try: - filled_template = run_claude_reflection(session_dir, self.project_root, conversation) - - if not filled_template: - self.log( - "[CAUSE] Claude reflection returned empty or None result. [IMPACT] No reflection findings to present. [ACTION] Check reflection implementation and Claude API connectivity.", - "WARNING", - ) - self.save_metric("reflection_empty_results", 1) - return None - - # Save the filled template - output_path = session_dir / "FEEDBACK_SUMMARY.md" - output_path.write_text(filled_template) - self.log(f"Feedback summary saved to: {output_path}") - - # Also save to current_findings for backward compatibility - findings_path = ( - self.project_root / ".claude" / "runtime" / "reflection" / "current_findings.md" - ) - findings_path.parent.mkdir(parents=True, exist_ok=True) - findings_path.write_text(filled_template) - - # Save metrics - self.save_metric("reflection_success", 1) - - return filled_template - - except Exception as e: - self.log( - f"[CAUSE] Claude reflection execution failed with exception. [IMPACT] No reflection analysis available this session. [ACTION] Check Claude SDK configuration and API status. Error: {e}", - "ERROR", - ) - self.save_metric("reflection_execution_errors", 1) - return None - - def _announce_reflection_start(self) -> None: - """Announce that reflection is starting.""" - print(f"\n{'=' * 70}", file=sys.stderr) - print("🔍 BEGINNING SELF-REFLECTION ON SESSION", file=sys.stderr) - print(f"{'=' * 70}\n", file=sys.stderr) - print("Analyzing the conversation using Claude SDK...", file=sys.stderr) - print("This will take 10-60 seconds.", file=sys.stderr) - print("\nWhat reflection analyzes:", file=sys.stderr) - print(" • Task complexity and workflow adherence", file=sys.stderr) - print(" • User interactions and satisfaction", file=sys.stderr) - print(" • Subagent usage and efficiency", file=sys.stderr) - print(" • Learning opportunities and improvements", file=sys.stderr) - print(f"\n{'=' * 70}\n", file=sys.stderr) - - def _generate_reflection_filename(self, filled_template: str) -> str: - """Generate descriptive filename for this session's reflection. - - Args: - filled_template: The reflection content (used to extract task summary) - - Returns: - Filename like: reflection-system-investigation-20251104_165432.md - """ - # Extract task summary from template if possible - task_slug = "session" - try: - if "## Task Summary" in filled_template: - summary_section = filled_template.split("## Task Summary")[1].split("\n\n")[1] - first_sentence = summary_section.split(".")[0][:100] - import re - - task_slug = re.sub(r"[^a-z0-9]+", "-", first_sentence.lower()).strip("-") - task_slug = task_slug[:50] - except Exception: - task_slug = "session" - - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - - return f"reflection-{task_slug}-{timestamp}.md" - - def _block_with_findings(self, filled_template: str, reflection_file_path: str) -> dict: - """Block stop with instructions to read and present reflection. - - Args: - filled_template: Filled FEEDBACK_SUMMARY template from Claude - reflection_file_path: Path where reflection was saved - - Returns: - Block decision dict with presentation instructions - """ - reason = f"""📋 SESSION REFLECTION COMPLETE - -The reflection system has analyzed this session and saved the findings to: - -**{reflection_file_path}** - -**YOUR TASK:** - -1. Read the reflection file using the Read tool -2. Parse the findings and present them to the user following this structure: - - a) **Executive Summary** (2-3 sentences) - - What was accomplished - - Key insight from reflection - - b) **Key Findings** (Be verbose!) - - What Worked Well: Highlight 2-3 top successes with specific examples - - Areas for Improvement: Highlight 2-3 main issues with context - - c) **Top Recommendations** (Be verbose!) - - Present 3-5 recommendations in priority order - - For each: Problem → Solution → Impact → Why it matters - - d) **Action Options** - Give the user these choices: - • Create GitHub Issues (work on NOW or save for LATER) - • Start Auto Mode (if concrete improvements can be implemented) - • Discuss Specific Improvements (explore recommendations in detail) - • Just Stop (next stop will succeed - semaphore prevents re-run) - -After presenting the findings and getting the user's decision, you may proceed accordingly.""" - - self.save_metric("reflection_blocked", 1) - - return {"decision": "block", "reason": reason} - - def _select_strategy(self): - """Detect launcher and select appropriate strategy.""" - try: - # Import adaptive components - sys.path.insert(0, str(self.project_root / "src" / "amplihack")) - from amplihack.context.adaptive.detector import LauncherDetector - from amplihack.context.adaptive.strategies import ClaudeStrategy, CopilotStrategy - - detector = LauncherDetector(self.project_root) - launcher_type = detector.detect() - - if launcher_type == "copilot": - return CopilotStrategy(self.project_root, self.log) - return ClaudeStrategy(self.project_root, self.log) - - except ImportError as e: - self.log(f"Adaptive strategy not available: {e}", "DEBUG") - return None - - -def stop(): - """Entry point for the stop hook (called by Claude Code).""" - hook = StopHook() - hook.run() - - -def main(): - """Legacy entry point for the stop hook.""" - stop() - - -if __name__ == "__main__": - main() diff --git a/amplifier-bundle/tools/amplihack/hooks/templates/power_steering_prompt.txt b/amplifier-bundle/tools/amplihack/hooks/templates/power_steering_prompt.txt deleted file mode 100644 index 07fecffd4..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/templates/power_steering_prompt.txt +++ /dev/null @@ -1,74 +0,0 @@ -# Power-Steering Consideration Analysis Prompt Template -# -# This prompt is used by claude_power_steering.py to analyze individual considerations. -# -# Available variables for substitution: -# {{consideration_id}} - Unique identifier for consideration -# {{consideration_question}} - The question to evaluate -# {{consideration_description}} - Detailed description -# {{consideration_category}} - Category (e.g., "Workflow & Philosophy") -# {{message_count}} - Number of messages in session -# {{conversation_summary}} - Formatted conversation excerpt -# -# Use {{VARIABLE_NAME}} syntax for substitution. -# - -You are analyzing a Claude Code session to evaluate a specific completion consideration. - -## Consideration to Evaluate - -**ID**: {consideration_id} -**Question**: {consideration_question} -**Description**: {consideration_description} -**Category**: {consideration_category} - -## Session Context - -The session had {message_count} messages. Here are key excerpts: - -{conversation_summary} - -## Your Task - -Analyze the conversation to determine if this consideration is satisfied. - -### Decision Criteria - -1. **Evidence-Based**: Look for concrete evidence in the conversation - - What tools were used (Read, Write, Edit, Bash, etc.)? - - What actions were taken by Claude? - - What did the user request and verify? - -2. **Relevance Check**: Is this consideration applicable to this session? - - If the work doesn't relate to this consideration, respond SATISFIED - - Example: If no code was written, code quality checks are not applicable - -3. **Completion Focus**: Does the evidence show this aspect is complete? - - Partial completion = NOT SATISFIED - - Clear completion = SATISFIED - - Ambiguous or missing = Use context to decide - -### Response Format - -Respond with exactly ONE of the following: - -**SATISFIED: [brief reason]** -- Use this when the consideration is clearly met -- Provide a 1-sentence reason citing evidence -- Example: "SATISFIED: All TODO items marked completed in last TodoWrite call" - -**NOT SATISFIED: [brief reason]** -- Use this when the consideration is clearly NOT met -- Provide a 1-sentence reason citing what's missing -- Example: "NOT SATISFIED: Tests were not run locally before session end" - -### Guidelines - -- Be direct and specific -- Reference actual events from the conversation -- Focus on observable actions and tool usage -- If consideration is not applicable, respond SATISFIED -- If evidence is ambiguous, err toward SATISFIED (fail-open) -- Keep your reason to 1-2 sentences maximum - -Please provide your assessment now. diff --git a/amplifier-bundle/tools/amplihack/hooks/templates/reflection_prompt.txt b/amplifier-bundle/tools/amplihack/hooks/templates/reflection_prompt.txt deleted file mode 100644 index a0390c1dd..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/templates/reflection_prompt.txt +++ /dev/null @@ -1,74 +0,0 @@ -# Reflection Prompt Template -# -# This prompt is used by claude_reflection.py to analyze Claude Code sessions. -# -# Available variables for substitution: -# {{user_preferences_context}} - User preferences and mandatory behavior -# {{repository_context}} - Current repository detection results -# {{amplihack_repo_uri}} - Amplihack framework repository URL -# {{message_count}} - Number of messages in session -# {{conversation_summary}} - Formatted conversation excerpt -# {{redirects_context}} - Power-steering redirect history (if any) -# {{template}} - FEEDBACK_SUMMARY template to fill out -# -# Use {{VARIABLE_NAME}} syntax for substitution. -# - -You are analyzing a completed Claude Code session to provide feedback and identify learning opportunities. - -{user_preferences_context} -{repository_context} - -## Critical: Distinguish Problem Sources - -When analyzing this session, you MUST clearly distinguish between TWO categories of issues: - -### 1. Amplihack Framework Issues -Problems with the coding tools, agents, workflow, or process itself: -- Agent behavior, effectiveness, or orchestration -- Workflow step execution or adherence -- Tool functionality (hooks, commands, utilities, reflection system) -- Framework architecture or design decisions -- UltraThink coordination and delegation -- Command execution (/amplihack:* commands) -- Session management and logging - -**These issues should be filed against**: {amplihack_repo_uri} - -### 2. Project Code Issues -Problems with the actual application code being developed: -- Application logic bugs or errors -- Feature implementation quality -- Test failures in project-specific tests -- Project-specific design decisions -- User-facing functionality -- Business logic correctness - -**These issues should be filed against**: The current project repository (see Repository Context above) - -**IMPORTANT**: In your feedback, clearly label each issue as either "[AMPLIHACK]" or "[PROJECT]" so it's obvious which repository should handle it. - -## Session Conversation - -The session had {message_count} messages. Here are key excerpts: - -{conversation_summary} - -{redirects_context} - -## Your Task - -Please analyze this session and fill out the following feedback template: - -{template} - -## Guidelines - -1. **Be specific and actionable** - Reference actual events from the session -2. **Identify patterns** - What worked well? What could improve? -3. **Track workflow adherence** - Did Claude follow the DEFAULT_WORKFLOW.md steps? -4. **Note subagent usage** - Which specialized agents were used (architect, builder, reviewer, etc.)? -5. **Categorize improvements** - Clearly mark each issue as [AMPLIHACK] or [PROJECT] -6. **Suggest improvements** - What would make future similar sessions better? - -Please provide the filled-out template now. diff --git a/amplifier-bundle/tools/amplihack/hooks/test_user_prompt_submit.py b/amplifier-bundle/tools/amplihack/hooks/test_user_prompt_submit.py deleted file mode 100755 index e41b437c9..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/test_user_prompt_submit.py +++ /dev/null @@ -1,291 +0,0 @@ -#!/usr/bin/env python3 -""" -Test suite for user_prompt_submit hook. -Verifies hook behavior, performance, and error handling. -""" - -import json -import subprocess -import sys -import tempfile -import time -from pathlib import Path - -# Add hook directory to path -sys.path.insert(0, str(Path(__file__).parent)) -from user_prompt_submit import UserPromptSubmitHook - - -def test_hook_basic_functionality(): - """Test that hook processes input and returns valid output.""" - print("Testing basic functionality...") - - hook = UserPromptSubmitHook() - - # Mock input data - input_data = { - "session_id": "test_session", - "transcript_path": "/tmp/test", - "cwd": str(Path.cwd()), - "hook_event_name": "UserPromptSubmit", - "prompt": "test prompt", - } - - # Process - output = hook.process(input_data) - - # Verify output structure - assert "additionalContext" in output, "Output missing additionalContext" - assert isinstance(output["additionalContext"], str), "additionalContext must be string" - - print("✓ Basic functionality works") - - -def test_preference_extraction(): - """Test that preferences are correctly extracted.""" - print("Testing preference extraction...") - - hook = UserPromptSubmitHook() - - # Find preferences file - pref_file = hook.find_user_preferences() - assert pref_file is not None, "Could not find USER_PREFERENCES.md" - assert pref_file.exists(), f"Preferences file does not exist: {pref_file}" - - # Read and parse - content = pref_file.read_text(encoding="utf-8") - preferences = hook.extract_preferences(content) - - # Verify we got some preferences - assert len(preferences) > 0, "No preferences extracted" - - # Verify expected preferences exist - expected_prefs = ["Communication Style", "Verbosity", "Collaboration Style"] - for pref in expected_prefs: - if pref in preferences: - print(f" Found {pref}: {preferences[pref]}") - - print(f"✓ Extracted {len(preferences)} preferences") - - -def test_context_building(): - """Test that preference context is built correctly.""" - print("Testing context building...") - - hook = UserPromptSubmitHook() - - # Test with sample preferences - preferences = { - "Communication Style": "pirate", - "Verbosity": "balanced", - "Collaboration Style": "interactive", - } - - context = hook.build_preference_context(preferences) - - # Verify context structure - assert "🎯 ACTIVE USER PREFERENCES (MANDATORY):" in context - assert "Communication Style: pirate" in context - assert "Use this style in your response" in context - assert "These preferences MUST be applied" in context - - print(f"✓ Context built ({len(context)} chars)") - - -def test_empty_preferences(): - """Test handling of empty preferences.""" - print("Testing empty preferences...") - - hook = UserPromptSubmitHook() - - # Empty preferences - context = hook.build_preference_context({}) - assert context == "", "Empty preferences should return empty context" - - print("✓ Empty preferences handled correctly") - - -def test_caching(): - """Test that preference caching works.""" - print("Testing caching...") - - hook = UserPromptSubmitHook() - - pref_file = hook.find_user_preferences() - if not pref_file: - print("⚠ No preferences file found, skipping cache test") - return - - # First call - should read file - start = time.time() - prefs1 = hook.get_cached_preferences(pref_file) - time1 = time.time() - start - - # Second call - should use cache - start = time.time() - prefs2 = hook.get_cached_preferences(pref_file) - time2 = time.time() - start - - # Verify results are the same - assert prefs1 == prefs2, "Cached preferences don't match" - - # Verify caching improved performance - assert time2 <= time1, f"Cache should be faster (time1={time1:.4f}s, time2={time2:.4f}s)" - - print(f"✓ Caching works (1st: {time1 * 1000:.1f}ms, 2nd: {time2 * 1000:.1f}ms)") - - -def test_json_output(): - """Test that hook outputs valid JSON via subprocess.""" - print("Testing JSON output via subprocess...") - - hook_script = Path(__file__).parent / "user_prompt_submit.py" - - test_input = { - "session_id": "test_session", - "transcript_path": "/tmp/test", - "cwd": str(Path.cwd()), - "hook_event_name": "UserPromptSubmit", - "prompt": "test prompt", - } - - # Run hook as subprocess - result = subprocess.run( - [sys.executable, str(hook_script)], - input=json.dumps(test_input), - capture_output=True, - text=True, - timeout=5, - ) - - # Verify successful execution - assert result.returncode == 0, f"Hook failed with exit code {result.returncode}" - - # Parse output - try: - output = json.loads(result.stdout) - except json.JSONDecodeError as e: - print(f"Invalid JSON output: {result.stdout}") - raise e - - # Verify structure - assert "additionalContext" in output, "Output missing additionalContext" - - print("✓ JSON output is valid") - - -def test_performance(): - """Test that hook executes within performance target.""" - print("Testing performance (target: < 200ms including Python startup)...") - - hook_script = Path(__file__).parent / "user_prompt_submit.py" - - test_input = { - "session_id": "test_session", - "transcript_path": "/tmp/test", - "cwd": str(Path.cwd()), - "hook_event_name": "UserPromptSubmit", - "prompt": "test prompt", - } - - # Run multiple times and measure - times = [] - for _ in range(5): - start = time.time() - result = subprocess.run( - [sys.executable, str(hook_script)], - input=json.dumps(test_input), - capture_output=True, - text=True, - timeout=5, - ) - elapsed = (time.time() - start) * 1000 # Convert to ms - - assert result.returncode == 0, "Hook execution failed" - times.append(elapsed) - - avg_time = sum(times) / len(times) - min_time = min(times) - max_time = max(times) - - print(f" Average: {avg_time:.1f}ms") - print(f" Min: {min_time:.1f}ms") - print(f" Max: {max_time:.1f}ms") - - # Relaxed performance target (Python startup is slow) - assert avg_time < 200, ( - f"Hook too slow (avg {avg_time:.1f}ms > 200ms target including Python startup)" - ) - - print("✓ Performance acceptable") - - -def test_error_handling(): - """Test error handling for missing files and invalid input.""" - print("Testing error handling...") - - # Test with invalid working directory (no preferences) - with tempfile.TemporaryDirectory() as tmpdir: - hook = UserPromptSubmitHook() - - # This should handle gracefully - hook uses project root, not cwd - input_data = { - "session_id": "test_session", - "transcript_path": "/tmp/test", - "cwd": tmpdir, - "hook_event_name": "UserPromptSubmit", - "prompt": "test prompt", - } - - # Should not raise exception - try: - output = hook.process(input_data) - assert "additionalContext" in output - print("✓ Error handling works") - except Exception as e: - print(f"✗ Unexpected exception: {e}") - raise - - -def run_all_tests(): - """Run all test cases.""" - tests = [ - test_hook_basic_functionality, - test_preference_extraction, - test_context_building, - test_empty_preferences, - test_caching, - test_json_output, - test_performance, - test_error_handling, - ] - - print("=" * 60) - print("Running UserPromptSubmit Hook Tests") - print("=" * 60) - - passed = 0 - failed = 0 - - for test in tests: - try: - test() - passed += 1 - except AssertionError as e: - print(f"✗ {test.__name__} failed: {e}") - failed += 1 - except Exception as e: - print(f"✗ {test.__name__} error: {e}") - failed += 1 - print() - - print("=" * 60) - print(f"Results: {passed} passed, {failed} failed") - print("=" * 60) - - return failed == 0 - - -if __name__ == "__main__": - success = run_all_tests() - sys.exit(0 if success else 1) diff --git a/amplifier-bundle/tools/amplihack/hooks/tests/README.md b/amplifier-bundle/tools/amplihack/hooks/tests/README.md deleted file mode 100644 index 6bf971a0b..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/tests/README.md +++ /dev/null @@ -1,152 +0,0 @@ -# Tests for amplihack Hooks - -This directory contains comprehensive test suites for amplihack hook modules. - -## Test Files - -### test_settings_migrator.py - -Comprehensive test suite for `settings_migrator.py` following TDD testing pyramid (60% unit, 30% integration, 10% E2E). - -**Stats:** - -- 44 tests (100% passing) -- 84% code coverage -- <0.2 second execution time - -**Test Categories:** - -- Unit Tests (27): Fast, isolated, heavily mocked -- Integration Tests (13): Real filesystem, multiple components -- E2E Tests (4): Complete user scenarios - -See [TEST_SUMMARY.md](./TEST_SUMMARY.md) for detailed analysis. - -## Running Tests - -### Run All Tests - -```bash -# From project root -python -m pytest .claude/tools/amplihack/hooks/tests/ - -# Verbose output -python -m pytest .claude/tools/amplihack/hooks/tests/ -v -``` - -### Run Specific Test File - -```bash -python -m pytest .claude/tools/amplihack/hooks/tests/test_settings_migrator.py -v -``` - -### Run Specific Test Class - -```bash -python -m pytest .claude/tools/amplihack/hooks/tests/test_settings_migrator.py::TestMigrationWorkflow -v -``` - -### Run Specific Test - -```bash -python -m pytest .claude/tools/amplihack/hooks/tests/test_settings_migrator.py::TestMigrationWorkflow::test_migration_idempotency -v -``` - -### Coverage Report - -```bash -# From hooks directory -cd .claude/tools/amplihack/hooks -pytest tests/test_settings_migrator.py --cov=. --cov-report=term-missing --cov-report=html - -# View HTML report -# Opens htmlcov/index.html in browser -``` - -## Test Philosophy - -All tests follow these principles: - -1. **Zero-BS Implementation**: Every test works, no stubs or placeholders -2. **Fast Execution**: Unit tests complete in milliseconds -3. **Clear Assertions**: Single responsibility per test -4. **Realistic Fixtures**: Real-world scenarios -5. **TDD Pyramid**: 60% unit, 30% integration, 10% E2E - -## Test Structure - -``` -tests/ -├── README.md # This file -├── TEST_SUMMARY.md # Detailed test analysis -├── test_settings_migrator.py # Settings migration tests -└── (future test files...) -``` - -## Adding New Tests - -When adding tests for new modules: - -1. Follow the TDD pyramid (60/30/10) -2. Use descriptive test names -3. Create appropriate fixtures -4. Keep tests fast (<0.5s per file) -5. Document in TEST_SUMMARY.md - -## Test Fixtures - -Common fixtures used across tests: - -- **tmp_project_root**: Temporary project with .claude marker -- **tmp_home**: Temporary home directory -- **global*settings_with*\***: Various global settings scenarios -- **project_settings_exists**: Project-local settings - -See individual test files for fixture definitions. - -## CI Integration - -Tests run automatically in CI: - -- All tests must pass before merge -- Coverage must remain >80% -- Execution time must be <1 second per file - -## Troubleshooting - -### Tests Fail Locally - -```bash -# Ensure you're in the correct directory -cd /path/to/amplihack4 - -# Clear pytest cache -python -m pytest --cache-clear - -# Re-run tests -python -m pytest .claude/tools/amplihack/hooks/tests/ -v -``` - -### Coverage Not Working - -```bash -# Ensure pytest-cov is installed -pip install pytest-cov - -# Run from hooks directory -cd .claude/tools/amplihack/hooks -pytest tests/ --cov=. -``` - -## Future Test Modules - -Planned test files: - -- `test_precommit_installer.py` - Pre-commit hook installer tests -- `test_hook_processor.py` - Hook processor tests -- `test_session_start.py` - Session start hook tests -- `test_session_stop.py` - Session stop hook tests - ---- - -**Philosophy**: Every module should have comprehensive tests. No module ships without >80% coverage and full TDD pyramid implementation. diff --git a/amplifier-bundle/tools/amplihack/hooks/tests/SESSION_CLASSIFICATION_TEST_SPEC.md b/amplifier-bundle/tools/amplihack/hooks/tests/SESSION_CLASSIFICATION_TEST_SPEC.md deleted file mode 100644 index c56cfa152..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/tests/SESSION_CLASSIFICATION_TEST_SPEC.md +++ /dev/null @@ -1,425 +0,0 @@ -# Session Classification Test Specification - -## Test Philosophy - -### Test-Driven Development Approach - -All tests are written to **FAIL FIRST**, defining expected behavior before implementation. This ensures: - -- Clear requirements definition -- No over-engineering -- Implementation guided by tests -- Confidence that tests actually verify behavior - -### Testing Principles - -1. **Behavior over Implementation** - Tests verify what the system does, not how it does it -2. **Clear Test Names** - Test names describe the scenario being tested -3. **Single Assertion Focus** - Each test validates one specific behavior -4. **Fail-Open Philosophy** - Edge cases default to safe behavior (INFORMATIONAL) -5. **No Test Stubs** - All tests fully implemented and executable - -## Test Structure - -### Test Classes - -#### 1. TestSessionClassification - -Tests core session type detection logic. - -**Test Categories**: - -- Basic session type detection (4 types × 3 scenarios = 12 tests) -- Selective consideration application (4 types × 2 scenarios = 8 tests) -- Edge cases and boundaries (5 tests) -- Environment overrides (2 tests) -- Backward compatibility (2 tests) -- Heuristic validation (4 tests) - -#### 2. TestConsiderationMapping - -Tests consideration-to-session-type mapping logic. - -**Test Categories**: - -- Consideration filtering per session type (4 tests) -- Specific consideration inclusion/exclusion (integrated) - -## Test Data Strategy - -### Transcript Fixtures - -Tests use minimal transcript structures that represent realistic scenarios: - -```python -# DEVELOPMENT session -[ - {"type": "user", "message": {"content": "Add feature X"}}, - {"type": "assistant", "message": {"content": [ - {"type": "tool_use", "name": "Write", "input": {"file_path": "src/new.py"}} - ]}}, - {"type": "assistant", "message": {"content": [ - {"type": "tool_use", "name": "Bash", "input": {"command": "pytest tests/"}} - ]}}, -] - -# INFORMATIONAL session -[ - {"type": "user", "message": {"content": "What skills are available?"}}, - {"type": "assistant", "message": {"content": [ - {"type": "text", "text": "I have the following skills..."} - ]}}, -] -``` - -### Test Data Principles - -- **Minimal** - Only include necessary messages -- **Realistic** - Match actual transcript format -- **Focused** - Each test has specific focus -- **Reusable** - Common patterns extracted (but kept inline for clarity) - -## Session Type Detection Tests - -### DEVELOPMENT Session Tests - -#### test_detect_development_session_with_pr_and_ci - -**Scenario**: Complete development workflow with PR and CI -**Expected**: Session type = "DEVELOPMENT" -**Key Indicators**: - -- Write tool on code file (src/auth.py) -- Test execution (pytest) -- PR creation (gh pr create) - -#### test_detect_development_session_without_pr - -**Scenario**: Code changes and tests but no PR yet -**Expected**: Session type = "DEVELOPMENT" -**Key Indicators**: - -- Edit tool on code file -- Test execution -- No PR operations - -#### test_mixed_session_prioritizes_development - -**Scenario**: Session starts as Q&A, transitions to development -**Expected**: Session type = "DEVELOPMENT" -**Rationale**: Development indicators override informational - -### INFORMATIONAL Session Tests - -#### test_detect_informational_session_qa_only - -**Scenario**: Pure Q&A with no tool usage -**Expected**: Session type = "INFORMATIONAL" -**Key Indicators**: - -- Question content -- Text-only responses -- No tool operations - -#### test_detect_informational_session_with_read_tools - -**Scenario**: Q&A with Read tools but no modifications -**Expected**: Session type = "INFORMATIONAL" -**Key Indicators**: - -- Read tool usage -- No Write/Edit operations -- Explanatory content - -#### test_single_read_tool_is_informational - -**Scenario**: Single file read with no follow-up -**Expected**: Session type = "INFORMATIONAL" -**Rationale**: Single read is likely informational query - -### MAINTENANCE Session Tests - -#### test_detect_maintenance_session_docs_and_config - -**Scenario**: Documentation and configuration updates only -**Expected**: Session type = "MAINTENANCE" -**Key Indicators**: - -- Write/Edit on .md files -- Write/Edit on .yml files -- No code file changes - -#### test_git_commit_cleanup_is_maintenance - -**Scenario**: Git commits without code changes -**Expected**: Session type = "MAINTENANCE" -**Key Indicators**: - -- Git operations -- No Write/Edit on code files -- Cleanup keywords - -### INVESTIGATION Session Tests - -#### test_detect_investigation_session_read_only - -**Scenario**: Multiple read/search operations with analysis -**Expected**: Session type = "INVESTIGATION" -**Key Indicators**: - -- Multiple Read operations -- Grep/search tools -- No Write/Edit operations -- Analysis keywords - -#### test_multiple_reads_with_analysis_is_investigation - -**Scenario**: Pattern searching across multiple files -**Expected**: Session type = "INVESTIGATION" -**Key Indicators**: - -- Grep operations -- Multiple Read operations -- No modifications - -## Selective Consideration Application Tests - -### Design Goal - -Different session types should have different considerations applied to prevent false positives. - -### Test Pattern - -```python -def test__session_skips__checks(self): - # 1. Create transcript for session type - # 2. Run power steering check - # 3. Verify decision = "approve" - # 4. Verify specific considerations not blocking -``` - -### Key Tests - -#### test_informational_session_skips_pr_checks - -**Validates**: PR checks (unrelated_changes, pr_description, review_responses) not applied -**Why**: INFORMATIONAL sessions don't have PRs - -#### test_informational_session_skips_ci_checks - -**Validates**: CI checks (ci_status, branch_rebase) not applied -**Why**: INFORMATIONAL sessions don't push code - -#### test_informational_session_skips_testing_checks - -**Validates**: Testing checks (local_testing, interactive_testing) not applied -**Why**: INFORMATIONAL sessions don't modify code - -#### test_development_session_applies_all_checks - -**Validates**: All considerations active for DEVELOPMENT -**Why**: Development sessions need full workflow validation - -## Edge Cases and Boundary Tests - -### Empty Transcript - -**Test**: `test_empty_transcript_defaults_to_informational` -**Behavior**: Fail-open to INFORMATIONAL -**Rationale**: Safe default prevents blocking empty sessions - -### Single Tool Operation - -**Test**: `test_single_read_tool_is_informational` -**Behavior**: Classify as INFORMATIONAL -**Rationale**: Single read likely informational query - -### Mixed Sessions - -**Test**: `test_mixed_session_prioritizes_development` -**Behavior**: Development indicators take precedence -**Rationale**: Conservative - apply full checks if any development - -## Environment Override Tests - -### Valid Override - -**Test**: `test_environment_override_session_type` -**Setup**: Set AMPLIHACK_SESSION_TYPE=INFORMATIONAL -**Expected**: Detection overridden by environment variable -**Use Case**: User explicitly declares session type - -### Invalid Override - -**Test**: `test_invalid_environment_override_ignored` -**Setup**: Set AMPLIHACK_SESSION_TYPE=INVALID_TYPE -**Expected**: Fall back to automatic detection -**Rationale**: Fail-safe behavior - -## Backward Compatibility Tests - -### Missing Method Handling - -**Test**: `test_backward_compatibility_no_session_type_method` -**Scenario**: Code without detect_session_type method -**Expected**: No crash, existing behavior maintained -**Rationale**: Phase 1 systems should still work - -### Existing Q&A Detection - -**Test**: `test_existing_qa_detection_still_works` -**Scenario**: \_is_qa_session method still functions -**Expected**: Returns True for Q&A sessions -**Rationale**: Don't break existing detection - -## Heuristics Validation Tests - -### Code File Extensions - -**Test**: `test_development_indicators_code_file_extensions` -**Validates**: .py, .js, .ts files trigger DEVELOPMENT -**Coverage**: All common code extensions - -### Documentation Files - -**Test**: `test_maintenance_indicators_doc_files_only` -**Validates**: .md, .txt files trigger MAINTENANCE -**Coverage**: Common documentation formats - -### Search Patterns - -**Test**: `test_investigation_indicators_grep_patterns` -**Validates**: Multiple Grep operations trigger INVESTIGATION -**Threshold**: 2+ search operations - -### Question Density - -**Test**: `test_informational_indicators_question_marks` -**Validates**: High question density (>50%) triggers INFORMATIONAL -**Coverage**: Various question patterns - -## Consideration Mapping Tests - -### Mapping Validation - -Tests verify that `get_applicable_considerations()` returns correct subset for each session type. - -#### test_get_applicable_considerations_for_development - -**Expected**: All considerations returned (full workflow) - -#### test_get_applicable_considerations_for_informational - -**Expected**: Minimal set (objective_completion, agent_unnecessary_questions) -**Excluded**: PR, CI, testing, workflow checks - -#### test_get_applicable_considerations_for_maintenance - -**Expected**: Documentation and organization checks -**Excluded**: Testing, CI checks - -#### test_get_applicable_considerations_for_investigation - -**Expected**: Investigation docs check included -**Excluded**: Workflow, CI, testing checks - -## Test Execution Strategy - -### Phase 1: Verify Tests Fail - -```bash -python3 test_session_classification.py -# Expected: All tests fail with AttributeError (methods don't exist) -``` - -### Phase 2: Implement Core Detection - -Implement `detect_session_type()` method - -```bash -python3 test_session_classification.py -# Expected: Detection tests pass, mapping tests still fail -``` - -### Phase 3: Implement Consideration Mapping - -Implement `get_applicable_considerations()` method - -```bash -python3 test_session_classification.py -# Expected: All tests pass -``` - -### Phase 4: Integration Testing - -Run alongside existing power steering tests - -```bash -python3 test_power_steering_checker.py -python3 test_session_classification.py -# Expected: All tests pass, no regressions -``` - -## Success Criteria - -### Test Metrics - -- **31 tests total** (2 test classes) -- **100% pass rate** after implementation -- **0 regressions** in existing tests -- **<1 second** total execution time - -### Coverage Metrics - -- **4 session types** fully covered -- **21 considerations** mapped correctly -- **5 edge cases** handled -- **2 override mechanisms** validated - -### Quality Metrics - -- **Clear test names** describing scenarios -- **Single responsibility** per test -- **No test dependencies** (can run in any order) -- **Deterministic results** (no random behavior) - -## Known Limitations - -### Out of Scope - -1. **LLM-based classification** - Future enhancement -2. **Session type transitions** - Mid-session type changes -3. **Confidence scores** - Classification certainty metrics -4. **Performance testing** - Large transcript handling -5. **User learning** - Adaptive classification - -### Acceptable Trade-offs - -1. **Heuristic-based** - Simple rules over ML -2. **Conservative** - Prefer DEVELOPMENT over INFORMATIONAL when ambiguous -3. **Static mapping** - Fixed consideration sets per type -4. **No feedback loop** - No learning from user corrections - -## Test Maintenance - -### When to Update Tests - -- New session types added -- New considerations created -- Classification logic changed -- Edge cases discovered - -### How to Add Tests - -1. Identify scenario -2. Write failing test -3. Verify test fails -4. Implement feature -5. Verify test passes -6. Update this document - -## Related Documentation - -- `/Users/ryan/src/MicrosoftHackathon2025-AgenticCoding/worktrees/feat/issue-1492-power-steering-session-classification/.claude/tools/amplihack/hooks/tests/TEST_COVERAGE_ANALYSIS.md` - Coverage details -- Issue #1492 - Original problem statement -- `power_steering_checker.py` - Implementation target diff --git a/amplifier-bundle/tools/amplihack/hooks/tests/TEST_COVERAGE_ANALYSIS.md b/amplifier-bundle/tools/amplihack/hooks/tests/TEST_COVERAGE_ANALYSIS.md deleted file mode 100644 index 0ee2f9484..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/tests/TEST_COVERAGE_ANALYSIS.md +++ /dev/null @@ -1,306 +0,0 @@ -# Test Coverage Analysis: Session Classification (Issue #1492) - -## Test Coverage Summary - -### Total Tests: 31 - -- **Session Type Detection**: 12 tests -- **Selective Consideration Application**: 6 tests -- **Edge Cases**: 5 tests -- **Environment Overrides**: 2 tests -- **Backward Compatibility**: 2 tests -- **Heuristics**: 4 tests - -## Coverage by Session Type - -### 1. DEVELOPMENT Sessions (5 tests) - -✓ With PR and CI -✓ Without PR -✓ Mixed session (Q&A + development) -✓ Code file extensions -✓ Full workflow checks applied - -**Critical Paths Covered**: - -- PR creation detected -- Code changes detected (Write/Edit on .py, .js, etc.) -- Test execution detected -- CI checks applied -- All considerations active - -**Edge Cases**: - -- Development without PR yet -- Development with incomplete TODOs (should block) - -### 2. INFORMATIONAL Sessions (6 tests) - -✓ Q&A only (no tools) -✓ Q&A with Read tools -✓ Single Read tool -✓ High question density -✓ Skips PR checks -✓ Skips CI checks -✓ Skips testing checks - -**Critical Paths Covered**: - -- Pure Q&A detection -- Read-only exploration -- No workflow checks applied -- Approval without development requirements - -**Edge Cases**: - -- Empty transcript defaults to INFORMATIONAL -- Single tool use is INFORMATIONAL - -### 3. MAINTENANCE Sessions (4 tests) - -✓ Documentation updates only -✓ Configuration file changes -✓ Git commit cleanup -✓ Minimal checks applied - -**Critical Paths Covered**: - -- .md, .txt, .yml file modifications -- Git operations without code changes -- Documentation and organization checks only - -**Edge Cases**: - -- Git commits for cleanup - -### 4. INVESTIGATION Sessions (3 tests) - -✓ Read-only exploration -✓ Multiple Grep/search operations -✓ Documentation checks applied - -**Critical Paths Covered**: - -- Multiple Read/Grep tools -- Analysis without modification -- Investigation docs required - -**Edge Cases**: - -- Multiple reads triggers INVESTIGATION - -## Coverage by Feature - -### Session Type Detection - -- ✓ Development indicators (code files, tests, PR) -- ✓ Informational indicators (questions, no tools) -- ✓ Maintenance indicators (docs, config only) -- ✓ Investigation indicators (read-only, search) -- ✓ Mixed session prioritization -- ✓ Empty transcript handling - -### Selective Consideration Application - -- ✓ PR checks skipped for INFORMATIONAL -- ✓ CI checks skipped for INFORMATIONAL -- ✓ Testing checks skipped for INFORMATIONAL -- ✓ All checks applied for DEVELOPMENT -- ✓ Minimal checks for MAINTENANCE -- ✓ Investigation docs for INVESTIGATION - -### Environment Overrides - -- ✓ AMPLIHACK_SESSION_TYPE env var -- ✓ Invalid override handling - -### Backward Compatibility - -- ✓ Existing \_is_qa_session still works -- ✓ Missing detect_session_type doesn't crash - -### Consideration Mapping - -- ✓ DEVELOPMENT gets all considerations -- ✓ INFORMATIONAL gets minimal set -- ✓ MAINTENANCE gets docs + organization -- ✓ INVESTIGATION gets investigation docs - -## Coverage Gaps Identified - -### Missing Tests (To Add Later) - -1. **Session Type Transitions** - - Session that starts as INFORMATIONAL and becomes DEVELOPMENT - - How to handle mid-session type changes - -2. **Multiple PR Workflow** - - Session with multiple PR operations - - PR review cycle detection - -3. **Complex Tool Patterns** - - Bash commands that are neither tests nor git - - Write operations that aren't code (data files) - -4. **Consideration Enabling/Disabling** - - Session type with disabled considerations - - Custom consideration mapping - -5. **Performance** - - Large transcripts (1000+ messages) - - Timeout handling for classification - -### Not Covered (Out of Scope) - -- LLM-based classification (future enhancement) -- User feedback learning -- Session type statistics -- Classification confidence scores - -## Test Quality Metrics - -### Boundary Coverage - -- ✓ Empty transcripts -- ✓ Single message transcripts -- ✓ Mixed session types -- ✓ Invalid data handling - -### Error Handling - -- ✓ Missing methods (backward compatibility) -- ✓ Invalid environment variables -- ✓ Malformed transcripts (handled by existing loader) - -### State Coverage - -- ✓ Fresh session (no prior state) -- ✓ Session with redirects (handled by existing tests) - -## Implementation Guidance - -### Required Methods (From Tests) - -1. `detect_session_type(transcript: List[Dict]) -> str` - - Returns: "DEVELOPMENT", "INFORMATIONAL", "MAINTENANCE", "INVESTIGATION" - - Must check AMPLIHACK_SESSION_TYPE env var first - - Falls back to heuristic detection - -2. `get_applicable_considerations(session_type: str) -> List[Dict]` - - Returns filtered list of considerations for session type - - Maps considerations to applicable session types - -### Required Logic - -1. **Session Type Detection Heuristics**: - - ```python - # DEVELOPMENT indicators - - Write/Edit to code files (.py, .js, .ts, etc.) - - Test execution (pytest, npm test, etc.) - - PR operations (gh pr create, etc.) - - # INFORMATIONAL indicators - - No tool usage OR only Read tools - - High question density (>50% messages with ?) - - Short sessions (<5 messages) - - # MAINTENANCE indicators - - Only doc/config file changes (.md, .txt, .yml) - - Git operations without code changes - - # INVESTIGATION indicators - - Multiple Read/Grep operations - - No Write/Edit operations - - Analysis keywords in messages - ``` - -2. **Consideration Filtering**: - - ```python - CONSIDERATION_MAPPING = { - "DEVELOPMENT": ["*"], # All considerations - "INFORMATIONAL": [ - "objective_completion", - "agent_unnecessary_questions", - ], - "MAINTENANCE": [ - "objective_completion", - "documentation_updates", - "docs_organization", - "philosophy_compliance", - ], - "INVESTIGATION": [ - "objective_completion", - "investigation_docs", - "documentation_updates", - ], - } - ``` - -3. **Integration Points**: - - Modify `_analyze_considerations()` to call `get_applicable_considerations()` - - Add `detect_session_type()` call at start of `check()` - - Store session type in analysis for logging - -## Expected Test Results (TDD) - -### Current State (Before Implementation) - -All tests should FAIL with: - -- `AttributeError: 'PowerSteeringChecker' has no attribute 'detect_session_type'` -- `AttributeError: 'PowerSteeringChecker' has no attribute 'get_applicable_considerations'` - -### After Implementation - -All 31 tests should PASS, validating: - -- Session type detection works correctly -- Considerations are selectively applied -- False positives eliminated for INFORMATIONAL sessions -- Backward compatibility maintained - -## Test Execution - -### Run All Tests - -```bash -python3 -m pytest .claude/tools/amplihack/hooks/tests/test_session_classification.py -v -``` - -### Run Specific Test Class - -```bash -python3 -m pytest .claude/tools/amplihack/hooks/tests/test_session_classification.py::TestSessionClassification -v -``` - -### Run Single Test - -```bash -python3 -m pytest .claude/tools/amplihack/hooks/tests/test_session_classification.py::TestSessionClassification::test_detect_informational_session_qa_only -v -``` - -## Success Criteria - -### All Tests Pass - -- Session type detection: 12/12 -- Consideration application: 6/6 -- Edge cases: 5/5 -- Environment: 2/2 -- Backward compatibility: 2/2 -- Heuristics: 4/4 - -### No Regressions - -- Existing power_steering_checker tests still pass -- Q&A detection still works -- Fail-open behavior maintained - -### Issue #1492 Resolved - -- INFORMATIONAL sessions no longer blocked -- PR checks skipped when no PR exists -- CI checks skipped when no code changes -- Testing checks skipped when no tests needed diff --git a/amplifier-bundle/tools/amplihack/hooks/tests/TEST_SUMMARY.md b/amplifier-bundle/tools/amplihack/hooks/tests/TEST_SUMMARY.md deleted file mode 100644 index 064898bc4..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/tests/TEST_SUMMARY.md +++ /dev/null @@ -1,226 +0,0 @@ -# Test Summary: settings_migrator.py - -## Overview - -Comprehensive test suite for `settings_migrator.py` following TDD testing pyramid principles (60% unit, 30% integration, 10% E2E). - -## Test Statistics - -- **Total Tests**: 44 -- **Pass Rate**: 100% (44/44 passing) -- **Execution Time**: <0.3 seconds -- **Code Coverage**: 84% of settings_migrator.py -- **Test File**: `test_settings_migrator.py` (387 lines, 99% coverage) - -## Testing Pyramid Breakdown - -### Unit Tests (60% - 27 tests) - -Fast, isolated tests with heavy mocking: - -**Initialization Tests (2 tests)** - -- `test_init_with_explicit_project_root` - Verify explicit project root initialization -- `test_init_auto_detect_project_root` - Verify auto-detection initialization - -**Detection Tests (9 tests)** - -- `test_detect_stop_hook_absolute_path` - Detect hooks with absolute paths -- `test_detect_stop_hook_relative_path` - Detect hooks with relative paths -- `test_detect_no_amplihack_hooks` - Detect no amplihack hooks present -- `test_detect_multiple_amplihack_hooks` - Detect multiple hook types -- `test_detect_preserves_non_amplihack_hooks` - Ensure non-amplihack hooks not detected -- `test_detect_handles_missing_global_settings` - Handle missing settings file -- `test_detect_handles_missing_hooks_key` - Handle missing 'hooks' key -- `test_detect_handles_empty_hooks_array` - Handle empty hooks array -- `test_detect_handles_malformed_json` - Handle JSON parsing errors - -**JSON Safety Tests (4 tests)** - -- `test_safe_json_update_creates_temp_file` - Verify temp file creation -- `test_safe_json_update_atomic_write` - Verify atomic write using os.replace -- `test_safe_json_update_handles_write_failure` - Handle write failures gracefully -- `test_safe_json_update_cleans_up_temp_on_failure` - Cleanup temp files on error - -**Backup Tests (3 tests)** - -- `test_create_backup_with_timestamp` - Create timestamped backups -- `test_create_backup_handles_missing_file` - Handle missing source file -- `test_create_backup_handles_copy_failure` - Handle backup copy failures - -**Pattern Detection Tests (10 tests)** - -- Parametrized tests for all 10 amplihack hook patterns: - - `amplihack/hooks/stop.py` - - `~/.amplihack/.claude/tools/amplihack/hooks/stop.py` - - `amplihack/hooks/session_start.py` - - `~/.amplihack/.claude/tools/amplihack/hooks/session_start.py` - - `amplihack/hooks/pre_tool_use.py` - - `~/.amplihack/.claude/tools/amplihack/hooks/pre_tool_use.py` - - `amplihack/hooks/post_tool_use.py` - - `~/.amplihack/.claude/tools/amplihack/hooks/post_tool_use.py` - - `amplihack/hooks/pre_compact.py` - - `~/.amplihack/.claude/tools/amplihack/hooks/pre_compact.py` - -### Integration Tests (30% - 13 tests) - -Real filesystem operations, multiple components: - -**Migration Workflow Tests (4 tests)** - -- `test_migrate_removes_global_adds_local_verification` - Full migration workflow -- `test_migration_idempotency` - Verify migration is idempotent (safe to run twice) -- `test_migration_preserves_other_hooks` - Ensure non-amplihack hooks preserved -- `test_migration_multiple_hook_types` - Handle multiple hook types correctly - -**Backup & Recovery Tests (2 tests)** - -- `test_backup_created_before_modification` - Verify backup timing -- `test_no_backup_if_no_global_settings` - No backup for missing files - -**Project Root Detection Tests (2 tests)** - -- `test_detect_project_root_from_hooks_directory` - Detect from nested directory -- `test_detect_project_root_fails_gracefully` - Handle detection failure - -**Edge Case Tests (5 tests)** - -- `test_empty_hooks_object` - Handle empty hooks object -- `test_hook_config_without_hooks_array` - Handle missing hooks array -- `test_hook_without_command_field` - Handle missing command field -- `test_concurrent_modification_resilience` - Test atomic write resilience - -### E2E Tests (10% - 4 tests) - -Complete user scenarios from start to finish: - -- `test_user_scenario_first_time_migration` - First-time user migration -- `test_user_scenario_no_migration_needed` - No amplihack hooks present -- `test_user_scenario_migration_failure_recovery` - Graceful error handling -- `test_command_line_execution` - Command-line execution flow - -## Test Fixtures - -### Core Fixtures - -1. **tmp_project_root** - Temporary project with .claude marker -2. **tmp_home** - Temporary home directory for global settings - -### Settings Fixtures - -1. **global_settings_with_amplihack_stop_hook** - Global settings with Stop hook -2. **global_settings_with_multiple_amplihack_hooks** - Multiple hook types -3. **global_settings_with_mixed_hooks** - Mixed amplihack and custom hooks -4. **global_settings_no_hooks** - Settings without hooks -5. **project_settings_exists** - Project-local settings file - -## Coverage Analysis - -### Covered Functionality (84%) - -**Fully Covered:** - -- Hook detection logic (all patterns) -- Safe JSON update with atomic write -- Backup creation -- Migration workflow -- Error handling for malformed JSON -- Idempotency verification -- Mixed hook preservation - -**Partially Covered:** - -- Project root detection edge cases -- Concurrent modification scenarios -- Specific error paths in backup/recovery - -**Not Covered (16%):** - -- Some exception handling branches -- Command-line **main** execution (tested via E2E) -- Rare edge cases in project root detection - -## Test Quality Metrics - -### Philosophy Compliance - -✅ **Zero-BS Implementation**: Every test works, no stubs or placeholders -✅ **Fast Execution**: All tests complete in <0.3 seconds -✅ **Clear Assertions**: Single responsibility per test -✅ **Realistic Fixtures**: Real-world scenarios - -### Test Design Principles - -1. **Isolation**: Unit tests heavily mocked for speed -2. **Integration**: Real filesystem for multi-component tests -3. **E2E**: Complete user workflows tested -4. **Parametrization**: Efficient testing of all hook patterns -5. **Error Coverage**: Comprehensive error path testing - -## Key Testing Patterns Used - -1. **TDD Pyramid**: 60% unit, 30% integration, 10% E2E -2. **Arrange-Act-Assert**: Clear test structure -3. **Parametrized Testing**: Efficient coverage of similar cases -4. **Mock Isolation**: Strategic mocking for unit tests -5. **Real Filesystem**: Integration tests use tmp_path -6. **Error Simulation**: Side effects for failure scenarios - -## Running Tests - -```bash -# Run all tests -pytest .claude/tools/amplihack/hooks/tests/test_settings_migrator.py -v - -# Run with coverage -cd .claude/tools/amplihack/hooks -pytest tests/test_settings_migrator.py --cov=. --cov-report=term-missing - -# Run specific test class -pytest .claude/tools/amplihack/hooks/tests/test_settings_migrator.py::TestMigrationWorkflow -v - -# Run with verbose output -pytest .claude/tools/amplihack/hooks/tests/test_settings_migrator.py -vv -``` - -## Test Maintenance - -### Adding New Tests - -1. Determine test tier (unit, integration, E2E) -2. Follow existing naming conventions -3. Use appropriate fixtures -4. Keep tests focused and fast - -### Updating Tests - -When modifying `settings_migrator.py`: - -1. Update affected tests first (TDD) -2. Ensure coverage remains >80% -3. Verify all tests pass -4. Update this summary if needed - -## Critical Test Gaps (Future Work) - -1. **Concurrent Access**: More thorough concurrent modification tests -2. **Performance**: Tests for large settings files -3. **Cross-Platform**: Windows-specific path handling -4. **Stress Testing**: Many hooks, many migrations - -## Success Criteria - -✅ All 44 tests passing -✅ 84% code coverage -✅ <0.5 second execution time -✅ Zero-BS implementation -✅ TDD pyramid compliance -✅ Comprehensive error handling -✅ Real-world scenarios tested - ---- - -**Last Updated**: 2025-11-24 -**Test Framework**: pytest 9.0.1 -**Python Version**: 3.11.14 -**Coverage Tool**: pytest-cov 7.0.0 diff --git a/amplifier-bundle/tools/amplihack/hooks/tests/debug_stdin.py b/amplifier-bundle/tools/amplihack/hooks/tests/debug_stdin.py deleted file mode 100644 index a5b8a8363..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/tests/debug_stdin.py +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env python3 -"""Debug stdin state during test execution""" - -import os -import sys - -sys.path.insert(0, "..") - -from shutdown_context import _is_in_atexit_context, _is_stdin_closed, is_shutdown_in_progress - -print( - f"ENV: AMPLIHACK_SHUTDOWN_IN_PROGRESS = {os.environ.get('AMPLIHACK_SHUTDOWN_IN_PROGRESS', 'NOT SET')}" -) -print(f"is_shutdown_in_progress() = {is_shutdown_in_progress()}") -print(f"_is_stdin_closed() = {_is_stdin_closed()}") -print(f"_is_in_atexit_context() = {_is_in_atexit_context()}") -print(f"sys.stdin = {sys.stdin}") -print(f"hasattr(sys.stdin, 'closed') = {hasattr(sys.stdin, 'closed')}") -if hasattr(sys.stdin, "closed"): - print(f"sys.stdin.closed = {sys.stdin.closed}") -try: - fileno = sys.stdin.fileno() - print(f"sys.stdin.fileno() = {fileno}") -except Exception as e: - print(f"sys.stdin.fileno() raised: {type(e).__name__}: {e}") diff --git a/amplifier-bundle/tools/amplihack/hooks/tests/manual_verify_issue_1872.py b/amplifier-bundle/tools/amplihack/hooks/tests/manual_verify_issue_1872.py deleted file mode 100644 index d8072ed24..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/tests/manual_verify_issue_1872.py +++ /dev/null @@ -1,177 +0,0 @@ -#!/usr/bin/env python3 -"""Manual verification script for Issue #1872 bug fixes. - -Tests all 4 power steering bug fixes in a realistic scenario: -1. Bug #1: Summary shows (X passed, Y failed, Z skipped) -2. Bug #2: SDK errors logged to stderr -3. Bug #3: Failure reasons extracted from SDK -4. Bug #4: Final guidance generated via SDK - -Usage: - python .claude/tools/amplihack/hooks/tests/manual_verify_issue_1872.py -""" - -import sys -from pathlib import Path - -# Add parent directory to path for imports -sys.path.insert(0, str(Path(__file__).parent.parent)) - -from power_steering_checker import PowerSteeringChecker - - -def create_test_transcript(): - """Create a realistic test transcript with some incomplete work.""" - return [ - { - "role": "user", - "content": "Please implement authentication feature", - }, - { - "type": "assistant", - "message": { - "content": [ - {"type": "text", "text": "I'll implement authentication."}, - { - "type": "tool_use", - "name": "TodoWrite", - "input": { - "todos": [ - { - "content": "Design auth system", - "status": "completed", - "activeForm": "Designing", - }, - { - "content": "Implement login", - "status": "completed", - "activeForm": "Implementing", - }, - { - "content": "Write tests", - "status": "pending", # Not complete! - "activeForm": "Writing tests", - }, - ] - }, - }, - ] - }, - }, - { - "role": "user", - "content": "/stop", - }, - ] - - -def main(): - """Run manual verification of all 4 bug fixes.""" - print("=" * 70) - print("🧪 MANUAL VERIFICATION: Issue #1872 Power Steering Bug Fixes") - print("=" * 70) - print() - - # Create temp transcript file - import json - import tempfile - - transcript = create_test_transcript() - with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f: - for msg in transcript: - f.write(json.dumps(msg) + "\n") - transcript_path = Path(f.name) - - try: - # Initialize checker - checker = PowerSteeringChecker(project_root=Path.cwd()) - - print("📋 Running power steering analysis...") - print(f" Transcript: {len(transcript)} messages") - print(" Session ID: test-1872") - print() - - # Run check - result = checker.check( - transcript_path=transcript_path, - session_id="test-1872", - ) - - print("=" * 70) - print("📊 VERIFICATION RESULTS") - print("=" * 70) - print() - - # Verify Bug #1: Math Display - print("✓ Bug #1 (Math Display):") - if result.analysis: - # Check the formatted text for the skipped count - analysis_text = checker._format_results_text(result.analysis, "STANDARD") - - # Count indicators in output - passed_count = analysis_text.count("✅") - failed_count = analysis_text.count("❌") - skipped_count = analysis_text.count("⬜") - - print(f" Passed: {passed_count}") - print(f" Failed: {failed_count}") - print(f" Skipped: {skipped_count}") - print(f" Total: {passed_count + failed_count + skipped_count}") - - # Check if summary line includes all three - if "skipped)" in analysis_text: - print(" ✅ Summary includes skipped count") - else: - print(" ❌ Summary missing skipped count") - print() - - # Verify Bug #2: SDK Error Logging - print("✓ Bug #2 (SDK Error Visibility):") - print(" SDK errors would be logged to stderr with format:") - print(" [Power Steering SDK Error] {id}: {error}") - print(" (Verified in unit tests - see test_sdk_exception_logged_to_stderr)") - print() - - # Verify Bug #3: Failure Reasons - print("✓ Bug #3 (Failure Reason Extraction):") - if result.analysis and result.analysis.failed_blockers: - for failed in result.analysis.failed_blockers[:3]: # Show first 3 - print(f" Check: {failed.consideration_id}") - print(f" Reason: {failed.reason}") - if "SDK analysis:" in failed.reason: - print(" ✅ Reason from SDK (not generic template)") - print() - print() - - # Verify Bug #4: Final Guidance - print("✓ Bug #4 (SDK-Generated Final Guidance):") - if result.continuation_prompt: - # Check if guidance is specific (mentions actual failures) - is_specific = any( - word in result.continuation_prompt.lower() for word in ["todo", "test", "specific"] - ) - if is_specific: - print(" ✅ Guidance is context-specific") - print(f" Preview: {result.continuation_prompt[:200]}...") - else: - print(" ⚠️ Guidance may be generic") - print() - - print("=" * 70) - print("✅ VERIFICATION COMPLETE") - print("=" * 70) - print() - print("All 4 bug fixes verified in realistic scenario:") - print("1. Math display shows (X passed, Y failed, Z skipped)") - print("2. SDK errors logged to stderr") - print("3. Failure reasons extracted from SDK") - print("4. Final guidance generated with context") - print() - - finally: - # Cleanup - transcript_path.unlink(missing_ok=True) - - -if __name__ == "__main__": - main() diff --git a/amplifier-bundle/tools/amplihack/hooks/tests/test_agent_memory_sync.py b/amplifier-bundle/tools/amplihack/hooks/tests/test_agent_memory_sync.py deleted file mode 100644 index 31570f5ca..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/tests/test_agent_memory_sync.py +++ /dev/null @@ -1,631 +0,0 @@ -#!/usr/bin/env python3 -""" -Tests for agent memory sync wrapper functions - TDD approach for Issue #1960. - -Testing pyramid: -- 60% Unit tests (fast, heavily mocked) -- 30% Integration tests (multiple components) -- 10% E2E tests (complete workflows) - -This test file focuses on sync wrapper functions that safely handle async functions -in synchronous contexts. Tests are written BEFORE implementation (TDD) - they will -FAIL until the sync wrappers are created. - -Issue #1960: inject_memory_for_agents() and extract_learnings_from_conversation() -are async functions being called from synchronous hook context, causing runtime errors. - -Solution: Create sync wrapper functions that: -1. Handle "no event loop" case (create new loop) -2. Handle "loop already running" case (use nested_asyncio or thread) -3. Handle import errors gracefully (fail-open) -4. Verify database operations execute correctly -""" - -import asyncio -import sys -from pathlib import Path -from unittest.mock import AsyncMock, Mock, patch - -import pytest - -# Add hooks directory to path for imports -hooks_dir = Path(__file__).parent.parent -sys.path.insert(0, str(hooks_dir)) - -from agent_memory_hook import ( - detect_agent_references, -) - -# ============================================================================ -# UNIT TESTS (60%) - Test sync wrappers in isolation -# ============================================================================ - - -class TestInjectMemoryForAgentsSync: - """Unit tests for inject_memory_for_agents_sync() wrapper function""" - - def test_sync_wrapper_exists(self): - """Test that sync wrapper function exists and is importable""" - from agent_memory_hook import inject_memory_for_agents_sync - - assert callable(inject_memory_for_agents_sync) - - def test_sync_wrapper_signature(self): - """Test sync wrapper has same signature as async version""" - import inspect - - from agent_memory_hook import inject_memory_for_agents_sync - - sig = inspect.signature(inject_memory_for_agents_sync) - params = list(sig.parameters.keys()) - - # Should have same parameters as async version - assert "prompt" in params - assert "agent_types" in params - assert "session_id" in params - - def test_sync_wrapper_returns_tuple(self): - """Test sync wrapper returns (enhanced_prompt, metadata) tuple""" - from agent_memory_hook import inject_memory_for_agents_sync - - # Should return tuple even when memory system unavailable - result = inject_memory_for_agents_sync( - prompt="Test prompt", agent_types=["architect"], session_id="test_session" - ) - - assert isinstance(result, tuple) - assert len(result) == 2 - enhanced_prompt, metadata = result - assert isinstance(enhanced_prompt, str) - assert isinstance(metadata, dict) - - def test_sync_wrapper_no_event_loop(self): - """Test sync wrapper works when no event loop exists""" - from agent_memory_hook import inject_memory_for_agents_sync - - # Ensure no event loop exists - try: - asyncio.get_running_loop() - pytest.skip("Event loop already running") - except RuntimeError: - pass # Good - no loop running - - with patch( - "agent_memory_hook.inject_memory_for_agents", - new_callable=AsyncMock, - return_value=("Enhanced prompt", {"test": "metadata"}), - ): - result = inject_memory_for_agents_sync( - prompt="Test prompt", - agent_types=["architect"], - session_id="test_session", - ) - - assert result == ("Enhanced prompt", {"test": "metadata"}) - - def test_sync_wrapper_with_running_loop(self): - """Test sync wrapper handles case when event loop already running""" - from agent_memory_hook import inject_memory_for_agents_sync - - async def test_with_running_loop(): - # Inside this coroutine, an event loop IS running - with patch( - "agent_memory_hook.inject_memory_for_agents", - new_callable=AsyncMock, - return_value=("Enhanced", {"meta": "data"}), - ): - result = inject_memory_for_agents_sync( - prompt="Test", agent_types=["tester"], session_id="test" - ) - - # Should still work even with running loop - assert result == ("Enhanced", {"meta": "data"}) - - # Run test in async context (simulates running loop) - asyncio.run(test_with_running_loop()) - - def test_sync_wrapper_import_error_handling(self): - """Test sync wrapper handles import errors gracefully (fail-open)""" - from agent_memory_hook import inject_memory_for_agents_sync - - with patch( - "agent_memory_hook.inject_memory_for_agents", - side_effect=ImportError("Memory system not available"), - ): - prompt = "Test prompt" - result = inject_memory_for_agents_sync( - prompt=prompt, agent_types=["architect"], session_id="test" - ) - - enhanced_prompt, metadata = result - - # Should fail-open: return original prompt - assert enhanced_prompt == prompt - assert metadata.get("memory_available") is False - assert "error" in metadata - - def test_sync_wrapper_general_exception_handling(self): - """Test sync wrapper handles general exceptions gracefully""" - from agent_memory_hook import inject_memory_for_agents_sync - - with patch( - "agent_memory_hook.inject_memory_for_agents", - side_effect=Exception("Database connection failed"), - ): - prompt = "Test prompt" - result = inject_memory_for_agents_sync( - prompt=prompt, agent_types=["builder"], session_id="test" - ) - - enhanced_prompt, metadata = result - - # Should fail-open: return original prompt - assert enhanced_prompt == prompt - assert metadata.get("memory_available") is False - assert "error" in metadata - - def test_sync_wrapper_empty_agent_types(self): - """Test sync wrapper handles empty agent_types list""" - from agent_memory_hook import inject_memory_for_agents_sync - - result = inject_memory_for_agents_sync( - prompt="Test prompt", agent_types=[], session_id="test" - ) - - enhanced_prompt, metadata = result - - # Should return prompt unchanged - assert enhanced_prompt == "Test prompt" - assert metadata == {} - - def test_sync_wrapper_none_session_id(self): - """Test sync wrapper handles None session_id""" - from agent_memory_hook import inject_memory_for_agents_sync - - with patch( - "agent_memory_hook.inject_memory_for_agents", - new_callable=AsyncMock, - return_value=("Enhanced", {"test": "data"}), - ): - result = inject_memory_for_agents_sync( - prompt="Test", agent_types=["architect"], session_id=None - ) - - assert result == ("Enhanced", {"test": "data"}) - - -class TestExtractLearningsFromConversationSync: - """Unit tests for extract_learnings_from_conversation_sync() wrapper""" - - def test_sync_wrapper_exists(self): - """Test that sync wrapper function exists and is importable""" - from agent_memory_hook import extract_learnings_from_conversation_sync - - assert callable(extract_learnings_from_conversation_sync) - - def test_sync_wrapper_signature(self): - """Test sync wrapper has same signature as async version""" - import inspect - - from agent_memory_hook import extract_learnings_from_conversation_sync - - sig = inspect.signature(extract_learnings_from_conversation_sync) - params = list(sig.parameters.keys()) - - # Should have same parameters as async version - assert "conversation_text" in params - assert "agent_types" in params - assert "session_id" in params - - def test_sync_wrapper_returns_dict(self): - """Test sync wrapper returns metadata dictionary""" - from agent_memory_hook import extract_learnings_from_conversation_sync - - result = extract_learnings_from_conversation_sync( - conversation_text="Test conversation", - agent_types=["architect"], - session_id="test", - ) - - assert isinstance(result, dict) - - def test_sync_wrapper_no_event_loop(self): - """Test sync wrapper works when no event loop exists""" - from agent_memory_hook import extract_learnings_from_conversation_sync - - # Ensure no event loop exists - try: - asyncio.get_running_loop() - pytest.skip("Event loop already running") - except RuntimeError: - pass # Good - no loop running - - with patch( - "agent_memory_hook.extract_learnings_from_conversation", - new_callable=AsyncMock, - return_value={"learnings_stored": 2, "memory_available": True}, - ): - result = extract_learnings_from_conversation_sync( - conversation_text="Test", - agent_types=["architect"], - session_id="test", - ) - - assert result["learnings_stored"] == 2 - assert result["memory_available"] is True - - def test_sync_wrapper_with_running_loop(self): - """Test sync wrapper handles case when event loop already running""" - from agent_memory_hook import extract_learnings_from_conversation_sync - - async def test_with_running_loop(): - with patch( - "agent_memory_hook.extract_learnings_from_conversation", - new_callable=AsyncMock, - return_value={"learnings_stored": 1, "memory_available": True}, - ): - result = extract_learnings_from_conversation_sync( - conversation_text="Test", - agent_types=["tester"], - session_id="test", - ) - - assert result["learnings_stored"] == 1 - - asyncio.run(test_with_running_loop()) - - def test_sync_wrapper_import_error_handling(self): - """Test sync wrapper handles import errors gracefully (fail-open)""" - from agent_memory_hook import extract_learnings_from_conversation_sync - - with patch( - "agent_memory_hook.extract_learnings_from_conversation", - side_effect=ImportError("Memory system not available"), - ): - result = extract_learnings_from_conversation_sync( - conversation_text="Test", agent_types=["architect"], session_id="test" - ) - - assert result.get("memory_available") is False - assert "error" in result - assert result.get("learnings_stored") == 0 - - def test_sync_wrapper_general_exception_handling(self): - """Test sync wrapper handles general exceptions gracefully""" - from agent_memory_hook import extract_learnings_from_conversation_sync - - with patch( - "agent_memory_hook.extract_learnings_from_conversation", - side_effect=Exception("Database write failed"), - ): - result = extract_learnings_from_conversation_sync( - conversation_text="Test", agent_types=["builder"], session_id="test" - ) - - assert result.get("memory_available") is False - assert "error" in result - - def test_sync_wrapper_empty_agent_types(self): - """Test sync wrapper handles empty agent_types list""" - from agent_memory_hook import extract_learnings_from_conversation_sync - - result = extract_learnings_from_conversation_sync( - conversation_text="Test conversation", agent_types=[], session_id="test" - ) - - # Should return minimal metadata - assert result.get("learnings_stored") == 0 - assert result.get("agents") == [] - - -# ============================================================================ -# INTEGRATION TESTS (30%) - Test sync wrappers with real async functions -# ============================================================================ - - -class TestSyncWrapperIntegration: - """Integration tests for sync wrappers calling real async functions""" - - @pytest.mark.asyncio - async def test_inject_memory_sync_calls_async_version(self): - """Test sync wrapper correctly calls async version""" - from agent_memory_hook import inject_memory_for_agents_sync - - # Mock the MemoryCoordinator to avoid actual database calls - # Must patch where it's imported (inside the async function) - with patch("amplihack.memory.coordinator.MemoryCoordinator") as mock_coordinator_class: - mock_coordinator = Mock() - mock_coordinator.retrieve = AsyncMock(return_value=[]) - mock_coordinator_class.return_value = mock_coordinator - - # Call sync wrapper (which should call async function internally) - result = inject_memory_for_agents_sync( - prompt="Design authentication system", - agent_types=["architect", "security"], - session_id="integration_test", - ) - - enhanced_prompt, metadata = result - - # Verify coordinator was initialized - assert mock_coordinator_class.called - # Verify metadata structure - assert "agents" in metadata - assert "memory_available" in metadata - - @pytest.mark.asyncio - async def test_extract_learnings_sync_calls_async_version(self): - """Test sync wrapper correctly calls async version""" - from agent_memory_hook import extract_learnings_from_conversation_sync - - # Must patch where it's imported (inside the async function) - with patch("amplihack.memory.coordinator.MemoryCoordinator") as mock_coordinator_class: - mock_coordinator = Mock() - mock_coordinator.store = AsyncMock(return_value="memory_id_123") - mock_coordinator_class.return_value = mock_coordinator - - result = extract_learnings_from_conversation_sync( - conversation_text="Successfully implemented auth", - agent_types=["architect"], - session_id="integration_test", - ) - - # Verify coordinator was initialized - assert mock_coordinator_class.called - # Verify metadata structure - assert "agents" in result - assert "learnings_stored" in result - - def test_sync_wrapper_thread_safety(self): - """Test sync wrappers are thread-safe""" - import threading - - from agent_memory_hook import inject_memory_for_agents_sync - - results = [] - errors = [] - - def call_sync_wrapper(index): - try: - result = inject_memory_for_agents_sync( - prompt=f"Prompt {index}", - agent_types=["architect"], - session_id=f"thread_{index}", - ) - results.append(result) - except Exception as e: - errors.append(e) - - # Create multiple threads calling sync wrapper - threads = [threading.Thread(target=call_sync_wrapper, args=(i,)) for i in range(5)] - - for thread in threads: - thread.start() - - for thread in threads: - thread.join() - - # All calls should succeed without errors - assert len(errors) == 0 - assert len(results) == 5 - - -# ============================================================================ -# E2E TESTS (10%) - Test complete workflows with sync wrappers -# ============================================================================ - - -class TestSyncWrapperEndToEnd: - """End-to-end tests simulating real hook usage scenarios""" - - def test_user_prompt_submit_workflow(self): - """Test complete user_prompt_submit hook workflow using sync wrappers""" - from agent_memory_hook import ( - inject_memory_for_agents_sync, - ) - - user_prompt = "Use @.claude/agents/amplihack/core/architect.md to design a payment system" - - # Step 1: Detect agent references - agent_types = detect_agent_references(user_prompt) - assert "architect" in agent_types - - # Step 2: Inject memory using sync wrapper - # Must patch where it's imported (inside the async function) - with patch("amplihack.memory.coordinator.MemoryCoordinator") as mock_coordinator_class: - mock_coordinator = Mock() - mock_coordinator.retrieve = AsyncMock(return_value=[]) - mock_coordinator_class.return_value = mock_coordinator - - enhanced_prompt, metadata = inject_memory_for_agents_sync( - prompt=user_prompt, agent_types=agent_types, session_id="e2e_test" - ) - - # Verify workflow completed successfully - assert isinstance(enhanced_prompt, str) - assert "memory_available" in metadata - - def test_stop_hook_workflow(self): - """Test complete stop hook workflow using sync wrappers""" - from agent_memory_hook import extract_learnings_from_conversation_sync - - conversation_text = """ - User: Design authentication system - Assistant: I'll use OAuth2 with JWT tokens... - [Implementation details] - """ - - agent_types = ["architect", "security"] - - # Extract learnings using sync wrapper - # Must patch where it's imported (inside the async function) - with patch("amplihack.memory.coordinator.MemoryCoordinator") as mock_coordinator_class: - mock_coordinator = Mock() - mock_coordinator.store = AsyncMock(return_value="memory_123") - mock_coordinator_class.return_value = mock_coordinator - - metadata = extract_learnings_from_conversation_sync( - conversation_text=conversation_text, - agent_types=agent_types, - session_id="e2e_test", - ) - - # Verify workflow completed successfully - assert "learnings_stored" in metadata - assert "agents" in metadata - - def test_complete_session_lifecycle(self): - """Test complete session: prompt submission -> conversation -> extraction""" - from agent_memory_hook import ( - extract_learnings_from_conversation_sync, - inject_memory_for_agents_sync, - ) - - # Must patch where it's imported (inside the async functions) - with patch("amplihack.memory.coordinator.MemoryCoordinator") as mock_coordinator_class: - mock_coordinator = Mock() - mock_coordinator.retrieve = AsyncMock(return_value=[]) - mock_coordinator.store = AsyncMock(return_value="memory_456") - mock_coordinator_class.return_value = mock_coordinator - - # Phase 1: User submits prompt - user_prompt = "Use @.claude/agents/amplihack/core/architect.md to design API" - agent_types = detect_agent_references(user_prompt) - - # Phase 2: Inject memory - enhanced_prompt, inject_metadata = inject_memory_for_agents_sync( - prompt=user_prompt, agent_types=agent_types, session_id="lifecycle_test" - ) - - assert inject_metadata.get("memory_available") is True - - # Phase 3: Conversation happens (simulated) - conversation = f"{user_prompt}\n\nAssistant: I've designed the API..." - - # Phase 4: Extract learnings - extract_metadata = extract_learnings_from_conversation_sync( - conversation_text=conversation, - agent_types=agent_types, - session_id="lifecycle_test", - ) - - assert extract_metadata.get("learnings_stored", 0) > 0 - - -# ============================================================================ -# EDGE CASE TESTS - Boundary conditions and error scenarios -# ============================================================================ - - -class TestSyncWrapperEdgeCases: - """Test edge cases and boundary conditions""" - - def test_sync_wrapper_with_very_long_prompt(self): - """Test sync wrapper handles very long prompts""" - from agent_memory_hook import inject_memory_for_agents_sync - - long_prompt = "Design system " * 10000 # Very long prompt - - result = inject_memory_for_agents_sync( - prompt=long_prompt, agent_types=["architect"], session_id="test" - ) - - enhanced_prompt, metadata = result - assert isinstance(enhanced_prompt, str) - - def test_sync_wrapper_with_special_characters(self): - """Test sync wrapper handles special characters in prompts""" - from agent_memory_hook import inject_memory_for_agents_sync - - special_prompt = "Design system with émojis 🚀 and unicode ™️" - - result = inject_memory_for_agents_sync( - prompt=special_prompt, agent_types=["architect"], session_id="test" - ) - - enhanced_prompt, metadata = result - assert isinstance(enhanced_prompt, str) - - def test_sync_wrapper_timeout_handling(self): - """Test sync wrapper handles async operation timeouts""" - from agent_memory_hook import inject_memory_for_agents_sync - - async def slow_async_function(*args, **kwargs): - await asyncio.sleep(10) # Simulate slow operation - return ("Enhanced", {"test": "data"}) - - with patch("agent_memory_hook.inject_memory_for_agents", side_effect=slow_async_function): - # Should handle timeout gracefully (if timeout implemented) - # For now, just ensure it doesn't hang forever - result = inject_memory_for_agents_sync( - prompt="Test", agent_types=["architect"], session_id="test" - ) - - assert isinstance(result, tuple) - - def test_sync_wrapper_with_multiple_rapid_calls(self): - """Test sync wrapper handles multiple rapid sequential calls""" - from agent_memory_hook import inject_memory_for_agents_sync - - results = [] - for i in range(10): - result = inject_memory_for_agents_sync( - prompt=f"Prompt {i}", - agent_types=["architect"], - session_id=f"rapid_{i}", - ) - results.append(result) - - # All calls should succeed - assert len(results) == 10 - for result in results: - assert isinstance(result, tuple) - assert len(result) == 2 - - -# ============================================================================ -# PERFORMANCE TESTS - Verify sync wrappers don't add significant overhead -# ============================================================================ - - -class TestSyncWrapperPerformance: - """Test performance characteristics of sync wrappers""" - - def test_sync_wrapper_overhead_minimal(self): - """Test sync wrapper adds minimal overhead compared to async version""" - import time - - from agent_memory_hook import inject_memory_for_agents_sync - - start_time = time.time() - - for _ in range(10): - inject_memory_for_agents_sync(prompt="Test", agent_types=[], session_id="perf_test") - - elapsed = time.time() - start_time - - # Should complete 10 calls in under 1 second (generous threshold) - assert elapsed < 1.0 - - def test_sync_wrapper_no_memory_leaks(self): - """Test sync wrapper doesn't leak event loops or resources""" - import gc - - from agent_memory_hook import inject_memory_for_agents_sync - - # Capture initial object count - gc.collect() - initial_objects = len(gc.get_objects()) - - # Make many calls - for i in range(100): - inject_memory_for_agents_sync( - prompt=f"Test {i}", agent_types=[], session_id=f"leak_test_{i}" - ) - - # Force garbage collection - gc.collect() - final_objects = len(gc.get_objects()) - - # Object count should not grow excessively (allow some growth for test overhead) - assert final_objects < initial_objects * 1.5 diff --git a/amplifier-bundle/tools/amplihack/hooks/tests/test_completion_evidence.py b/amplifier-bundle/tools/amplihack/hooks/tests/test_completion_evidence.py deleted file mode 100644 index f9e0cd791..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/tests/test_completion_evidence.py +++ /dev/null @@ -1,293 +0,0 @@ -#!/usr/bin/env python3 -""" -Tests for completion_evidence.py - Concrete completion verification. - -Tests evidence-based checking for PR status, user confirmation, TODO completion, -CI status, and git commit status. -""" - -import json -import sys -from pathlib import Path -from unittest.mock import MagicMock, patch - -import pytest - -# Add parent directory to path for imports -sys.path.insert(0, str(Path(__file__).parent.parent)) - -from completion_evidence import ( - CompletionEvidenceChecker, - Evidence, - EvidenceType, -) - - -class TestCompletionEvidenceChecker: - """Tests for CompletionEvidenceChecker class.""" - - @pytest.fixture - def temp_project(self, tmp_path): - """Create temporary project directory.""" - return tmp_path - - @pytest.fixture - def checker(self, temp_project): - """Create checker instance.""" - return CompletionEvidenceChecker(temp_project) - - def test_initialization(self, temp_project): - """Test checker initialization.""" - checker = CompletionEvidenceChecker(temp_project) - assert checker.project_root == temp_project - - @patch("subprocess.run") - def test_check_pr_status_merged(self, mock_run, checker): - """Test PR status check when PR is merged.""" - # Mock gh CLI response for merged PR - mock_run.return_value = MagicMock( - returncode=0, - stdout='{"state": "MERGED", "mergedAt": "2025-01-01T00:00:00Z"}', - ) - - evidence = checker.check_pr_status() - - assert evidence is not None - assert evidence.evidence_type == EvidenceType.PR_MERGED - assert evidence.verified is True - assert evidence.confidence == 1.0 - assert "2025-01-01T00:00:00Z" in evidence.details - - @patch("subprocess.run") - def test_check_pr_status_open(self, mock_run, checker): - """Test PR status check when PR is still open.""" - # Mock gh CLI response for open PR - mock_run.return_value = MagicMock( - returncode=0, - stdout='{"state": "OPEN", "mergedAt": null}', - ) - - evidence = checker.check_pr_status() - - assert evidence is not None - assert evidence.evidence_type == EvidenceType.PR_MERGED - assert evidence.verified is False - assert evidence.confidence == 0.0 - - @patch("subprocess.run") - def test_check_pr_status_no_pr(self, mock_run, checker): - """Test PR status check when no PR exists.""" - # Mock gh CLI error (no PR found) - mock_run.return_value = MagicMock(returncode=1) - - evidence = checker.check_pr_status() - - assert evidence is None - - @patch("subprocess.run") - def test_check_pr_status_gh_unavailable(self, mock_run, checker): - """Test PR status check when gh CLI is unavailable.""" - # Mock FileNotFoundError (gh CLI not installed) - mock_run.side_effect = FileNotFoundError() - - evidence = checker.check_pr_status() - - assert evidence is None - - def test_check_user_confirmation_exists(self, temp_project, checker): - """Test user confirmation check when confirmation file exists.""" - session_dir = temp_project / "session" - session_dir.mkdir() - confirmation_file = session_dir / "user_confirmed_complete" - confirmation_file.write_text("Work is complete") - - evidence = checker.check_user_confirmation(session_dir) - - assert evidence is not None - assert evidence.evidence_type == EvidenceType.USER_CONFIRMATION - assert evidence.verified is True - assert evidence.confidence == 1.0 - assert "Work is complete" in evidence.details - - def test_check_user_confirmation_missing(self, temp_project, checker): - """Test user confirmation check when no confirmation exists.""" - session_dir = temp_project / "session" - session_dir.mkdir() - - evidence = checker.check_user_confirmation(session_dir) - - assert evidence is None - - def test_check_todo_completion_all_complete(self, temp_project, checker): - """Test TODO completion check when all TODOs are complete.""" - transcript_path = temp_project / "transcript.jsonl" - - # Create transcript with completed TODOs - with open(transcript_path, "w") as f: - entry = { - "role": "assistant", - "content": [{"type": "text", "text": "- [x] Task 1\n- [x] Task 2"}], - } - f.write(json.dumps(entry) + "\n") - - evidence = checker.check_todo_completion(transcript_path) - - assert evidence.evidence_type == EvidenceType.TODO_COMPLETE - assert evidence.verified is True - assert evidence.confidence == 0.8 - assert "2 TODO items complete" in evidence.details - - def test_check_todo_completion_incomplete(self, temp_project, checker): - """Test TODO completion check when some TODOs are incomplete.""" - transcript_path = temp_project / "transcript.jsonl" - - # Create transcript with incomplete TODOs - with open(transcript_path, "w") as f: - entry = { - "role": "assistant", - "content": [{"type": "text", "text": "- [x] Task 1\n- [ ] Task 2"}], - } - f.write(json.dumps(entry) + "\n") - - evidence = checker.check_todo_completion(transcript_path) - - assert evidence.evidence_type == EvidenceType.TODO_COMPLETE - assert evidence.verified is False - assert evidence.confidence == 0.0 - assert "1/2 TODO items complete" in evidence.details - - def test_check_todo_completion_no_todos(self, temp_project, checker): - """Test TODO completion check when no TODOs exist.""" - transcript_path = temp_project / "transcript.jsonl" - - # Create transcript without TODOs - with open(transcript_path, "w") as f: - entry = { - "role": "assistant", - "content": [{"type": "text", "text": "Some text without TODOs"}], - } - f.write(json.dumps(entry) + "\n") - - evidence = checker.check_todo_completion(transcript_path) - - assert evidence.evidence_type == EvidenceType.TODO_COMPLETE - assert evidence.verified is False - assert evidence.confidence == 0.0 - assert "No TODO items found" in evidence.details - - @patch("subprocess.run") - def test_check_ci_status_all_passing(self, mock_run, checker): - """Test CI status check when all checks pass.""" - # Mock gh CLI response with passing checks - mock_run.return_value = MagicMock( - returncode=0, - stdout='{"statusCheckRollup": [{"conclusion": "SUCCESS"}, {"conclusion": "SUCCESS"}]}', - ) - - evidence = checker.check_ci_status() - - assert evidence is not None - assert evidence.evidence_type == EvidenceType.CI_PASSING - assert evidence.verified is True - assert evidence.confidence == 0.9 - assert "2 CI checks passed" in evidence.details - - @patch("subprocess.run") - def test_check_ci_status_some_failing(self, mock_run, checker): - """Test CI status check when some checks fail.""" - # Mock gh CLI response with mixed checks - mock_run.return_value = MagicMock( - returncode=0, - stdout='{"statusCheckRollup": [{"conclusion": "SUCCESS"}, {"conclusion": "FAILURE"}]}', - ) - - evidence = checker.check_ci_status() - - assert evidence is not None - assert evidence.evidence_type == EvidenceType.CI_PASSING - assert evidence.verified is False - assert evidence.confidence == 0.0 - assert "1/2 CI checks passed" in evidence.details - - @patch("subprocess.run") - def test_check_ci_status_no_checks(self, mock_run, checker): - """Test CI status check when no checks exist.""" - # Mock gh CLI response with no checks - mock_run.return_value = MagicMock( - returncode=0, - stdout='{"statusCheckRollup": []}', - ) - - evidence = checker.check_ci_status() - - assert evidence is None - - @patch("subprocess.run") - def test_check_files_committed_clean(self, mock_run, checker): - """Test files committed check when working directory is clean.""" - # Mock git status with clean working directory - mock_run.return_value = MagicMock(returncode=0, stdout="") - - evidence = checker.check_files_committed() - - assert evidence is not None - assert evidence.evidence_type == EvidenceType.FILES_COMMITTED - assert evidence.verified is True - assert evidence.confidence == 0.7 - - @patch("subprocess.run") - def test_check_files_committed_uncommitted(self, mock_run, checker): - """Test files committed check when files are uncommitted.""" - # Mock git status with uncommitted files - mock_run.return_value = MagicMock( - returncode=0, - stdout=" M file1.py\n M file2.py\n", - ) - - evidence = checker.check_files_committed() - - assert evidence is not None - assert evidence.evidence_type == EvidenceType.FILES_COMMITTED - assert evidence.verified is False - assert evidence.confidence == 0.0 - assert "2 files" in evidence.details - - @patch("subprocess.run") - def test_check_files_committed_git_unavailable(self, mock_run, checker): - """Test files committed check when git is unavailable.""" - # Mock FileNotFoundError (git not installed) - mock_run.side_effect = FileNotFoundError() - - evidence = checker.check_files_committed() - - assert evidence is None - - -class TestEvidenceType: - """Tests for EvidenceType enum.""" - - def test_evidence_types_exist(self): - """Test that all evidence types are defined.""" - assert EvidenceType.PR_MERGED - assert EvidenceType.USER_CONFIRMATION - assert EvidenceType.CI_PASSING - assert EvidenceType.TODO_COMPLETE - assert EvidenceType.FILES_COMMITTED - - -class TestEvidence: - """Tests for Evidence dataclass.""" - - def test_evidence_creation(self): - """Test creating Evidence object.""" - evidence = Evidence( - evidence_type=EvidenceType.PR_MERGED, - verified=True, - details="PR merged", - confidence=1.0, - ) - - assert evidence.evidence_type == EvidenceType.PR_MERGED - assert evidence.verified is True - assert evidence.details == "PR merged" - assert evidence.confidence == 1.0 diff --git a/amplifier-bundle/tools/amplihack/hooks/tests/test_customization_e2e.py b/amplifier-bundle/tools/amplihack/hooks/tests/test_customization_e2e.py deleted file mode 100644 index b7d9ccae0..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/tests/test_customization_e2e.py +++ /dev/null @@ -1,168 +0,0 @@ -#!/usr/bin/env python3 -"""End-to-end test for user customization workflow.""" - -import json -import sys -import tempfile -from pathlib import Path - -sys.path.insert(0, str(Path(__file__).parent.parent)) - -from power_steering_checker import PowerSteeringChecker - - -def test_user_customization_workflow(): - """Test the complete user customization workflow.""" - print("Testing user customization workflow...") - - # Create temp project - temp_dir = tempfile.mkdtemp() - project_root = Path(temp_dir) - (project_root / ".claude" / "tools" / "amplihack").mkdir(parents=True) - (project_root / ".claude" / "runtime" / "power-steering").mkdir(parents=True, exist_ok=True) - - config_path = project_root / ".claude" / "tools" / "amplihack" / ".power_steering_config" - config_path.write_text(json.dumps({"enabled": True})) - - # Step 1: Create custom YAML with team consideration - print("Step 1: Creating custom considerations.yaml...") - yaml_path = project_root / ".claude" / "tools" / "amplihack" / "considerations.yaml" - custom_yaml = """ -- id: security_scan - category: Security & Compliance - question: Was security scanning performed? - description: Ensures security tools ran on code changes - severity: blocker - checker: generic - enabled: true - -- id: code_review - category: Team Process - question: Was code reviewed by peer? - description: Ensures peer review completed - severity: warning - checker: generic - enabled: true -""" - yaml_path.write_text(custom_yaml) - print(" ✓ Custom YAML created") - - # Step 2: Load checker with custom config - print("Step 2: Loading PowerSteeringChecker...") - checker = PowerSteeringChecker(project_root) - assert len(checker.considerations) == 2, f"Expected 2, got {len(checker.considerations)}" - print(f" ✓ Loaded {len(checker.considerations)} custom considerations") - - # Step 3: Verify considerations loaded correctly - print("Step 3: Verifying consideration properties...") - security_check = checker.considerations[0] - assert security_check["id"] == "security_scan" - assert security_check["severity"] == "blocker" - assert security_check["checker"] == "generic" - assert security_check["enabled"] is True - print(" ✓ Security scan consideration valid") - - review_check = checker.considerations[1] - assert review_check["id"] == "code_review" - assert review_check["severity"] == "warning" - print(" ✓ Code review consideration valid") - - # Step 4: Test with transcript - print("Step 4: Testing with sample transcript...") - transcript = [ - {"type": "user", "message": {"content": "Fix security vulnerability"}}, - { - "type": "assistant", - "message": { - "content": [ - { - "type": "text", - "text": "Ran security scans, all passed. Code reviewed by team.", - } - ] - }, - }, - ] - - analysis = checker._analyze_considerations(transcript, "test_session") - print(f" ✓ Analysis complete: {len(analysis.results)} results") - - # Step 5: Verify results - print("Step 5: Verifying analysis results...") - # Note: Results might be empty if SDK analysis fails or considerations are filtered - # Just verify the analysis completed without errors - print(f" Analysis returned {len(analysis.results)} results") - - # If results were returned, verify they have the expected structure - if len(analysis.results) > 0: - for result_id, result in analysis.results.items(): - print(f" ✓ {result_id}: {result.satisfied}") - assert hasattr(result, "satisfied"), f"Result {result_id} missing 'satisfied' attribute" - assert hasattr(result, "reason"), f"Result {result_id} missing 'reason' attribute" - else: - print(" ✓ No results returned (considerations may have been filtered or SDK unavailable)") - - print(" ✓ Analysis completed successfully") - - # Step 6: Test disabling a consideration - print("Step 6: Testing consideration disable...") - disabled_yaml = """ -- id: security_scan - category: Security & Compliance - question: Was security scanning performed? - description: Ensures security tools ran on code changes - severity: blocker - checker: generic - enabled: false # Disabled - -- id: code_review - category: Team Process - question: Was code reviewed by peer? - description: Ensures peer review completed - severity: warning - checker: generic - enabled: true -""" - yaml_path.write_text(disabled_yaml) - - # Reload checker - checker2 = PowerSteeringChecker(project_root) - analysis2 = checker2._analyze_considerations(transcript, "test_session") - - # Verify disabled consideration is skipped - print(f" Analysis 2 returned {len(analysis2.results)} results") - - # If security_scan appears in results, that's a bug (it should be disabled) - if "security_scan" in analysis2.results: - raise AssertionError("security_scan should not be in results (disabled=false)") - - # If results were returned, just verify structure (code_review might or might not be present) - if len(analysis2.results) > 0: - for result_id, result in analysis2.results.items(): - print(f" ✓ {result_id}: {result.satisfied}") - assert result_id != "security_scan", "Disabled consideration appeared in results" - - print(" ✓ Disabled consideration correctly skipped") - - # Cleanup - import shutil - - shutil.rmtree(temp_dir) - - print("\n✅ User customization workflow test PASSED!") - return True - - -if __name__ == "__main__": - try: - test_user_customization_workflow() - sys.exit(0) - except AssertionError as e: - print(f"\n❌ Test FAILED: {e}") - sys.exit(1) - except Exception as e: - print(f"\n❌ Test ERROR: {e}") - import traceback - - traceback.print_exc() - sys.exit(1) diff --git a/amplifier-bundle/tools/amplihack/hooks/tests/test_exit_hang_e2e.py b/amplifier-bundle/tools/amplihack/hooks/tests/test_exit_hang_e2e.py deleted file mode 100644 index 8c486f9d1..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/tests/test_exit_hang_e2e.py +++ /dev/null @@ -1,540 +0,0 @@ -#!/usr/bin/env python3 -""" -TDD Tests for Exit Hang E2E Scenarios (E2E TESTS - 10%) - -End-to-end tests that verify the complete user workflow for exiting -Claude Code with the stop hook shutdown fix. These tests simulate -real-world usage scenarios. - -Testing Philosophy: -- Ruthlessly Simple: Focus on critical user workflows -- Zero-BS: All tests work, test real behavior -- Fail-Open: Exit always works, never hangs - -Test Coverage: -- /exit command exits within 2 seconds -- Ctrl-C exits cleanly -- Multiple rapid exits -- Exit with stdin already closed -- Regression prevention for issue #1896 -""" - -import json -import os -import signal -import subprocess -import sys -import time -from pathlib import Path - -import pytest - -# Add hooks directory to path -sys.path.insert(0, str(Path(__file__).parent.parent)) - - -# ============================================================================= -# TEST FIXTURES -# ============================================================================= - - -@pytest.fixture -def amplihack_session_simulator(tmp_path): - """Create a simulated amplihack session environment. - - This simulates the essential components of an amplihack session - that are involved in the exit process. - """ - # Create session directory structure - session_dir = tmp_path / "session" - session_dir.mkdir() - - # Create stop hook script - stop_hook = session_dir / "stop_hook.py" - stop_hook_content = '''#!/usr/bin/env python3 -"""Simulated stop hook for E2E testing.""" -import json -import os -import sys - -# Check shutdown context -if os.environ.get("AMPLIHACK_SHUTDOWN_IN_PROGRESS") == "1": - # During shutdown: skip stdin read, return immediately - json.dump({}, sys.stdout) - sys.stdout.write("\\n") - sys.exit(0) - -# Normal operation: read stdin -try: - input_data = sys.stdin.read() - if input_data.strip(): - data = json.loads(input_data) - # Process stop hook logic here - json.dump({}, sys.stdout) - else: - json.dump({}, sys.stdout) -except Exception as e: - json.dump({"error": str(e)}, sys.stdout) - -sys.stdout.write("\\n") -''' - stop_hook.write_text(stop_hook_content) - stop_hook.chmod(0o755) - - return { - "session_dir": session_dir, - "stop_hook": stop_hook, - } - - -# ============================================================================= -# E2E TESTS - Exit Command -# ============================================================================= - - -class TestExitCommand: - """E2E tests for /exit command behavior.""" - - def test_exit_command_completes_within_two_seconds(self, amplihack_session_simulator): - """E2E: /exit command should complete in <2s (target: <2s) - - This is the critical user-facing requirement. Users expect - /exit to work immediately without hanging. - """ - # ARRANGE - stop_hook = amplihack_session_simulator["stop_hook"] - env = os.environ.copy() - env["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - - # Simulate the exit sequence: - # 1. Signal handler sets AMPLIHACK_SHUTDOWN_IN_PROGRESS=1 - # 2. Stop hook is called - # 3. Stop hook should exit immediately - - # ACT - start_time = time.time() - - result = subprocess.run( - [sys.executable, str(stop_hook)], - input='{"conversation": [{"role": "user", "content": "/exit"}]}', - capture_output=True, - text=True, - timeout=3, # Fail if takes longer - env=env, - ) - - elapsed = time.time() - start_time - - # ASSERT - assert result.returncode == 0, f"Exit should succeed: {result.stderr}" - assert elapsed < 2.0, f"Exit took {elapsed:.2f}s, should be <2.0s (user expectation)" - - def test_exit_command_returns_valid_json(self, amplihack_session_simulator): - """E2E: /exit should return valid JSON response""" - # ARRANGE - stop_hook = amplihack_session_simulator["stop_hook"] - env = os.environ.copy() - env["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - - # ACT - result = subprocess.run( - [sys.executable, str(stop_hook)], - input='{"conversation": [{"role": "user", "content": "/exit"}]}', - capture_output=True, - text=True, - timeout=3, - env=env, - ) - - # ASSERT - assert result.returncode == 0 - output = json.loads(result.stdout.strip()) - assert isinstance(output, dict), "Should return valid JSON dict (even if empty)" - - def test_exit_command_does_not_block_on_stdin(self, amplihack_session_simulator): - """E2E: /exit should not wait for stdin input""" - # ARRANGE - stop_hook = amplihack_session_simulator["stop_hook"] - env = os.environ.copy() - env["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - - # ACT - No stdin provided - start_time = time.time() - - result = subprocess.run( - [sys.executable, str(stop_hook)], - stdin=subprocess.DEVNULL, - capture_output=True, - text=True, - timeout=2, - env=env, - ) - - elapsed = time.time() - start_time - - # ASSERT - assert result.returncode == 0, "Should not block on stdin" - assert elapsed < 2.0, "Should exit quickly without stdin" - - -# ============================================================================= -# E2E TESTS - Ctrl-C Behavior -# ============================================================================= - - -class TestCtrlCBehavior: - """E2E tests for Ctrl-C (SIGINT) exit behavior.""" - - @pytest.mark.skipif(sys.platform == "win32", reason="SIGINT handling differs on Windows") - def test_ctrl_c_exits_cleanly(self, amplihack_session_simulator): - """E2E: Ctrl-C should exit cleanly without hanging""" - # ARRANGE - stop_hook = amplihack_session_simulator["stop_hook"] - env = os.environ.copy() - env["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - - # ACT - Simulate Ctrl-C - proc = subprocess.Popen( - [sys.executable, str(stop_hook)], - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - env=env, - ) - - time.sleep(0.1) # Let process start - proc.send_signal(signal.SIGINT) - - start_time = time.time() - try: - stdout, stderr = proc.communicate(timeout=2) - elapsed = time.time() - start_time - except subprocess.TimeoutExpired: - proc.kill() - pytest.fail("Ctrl-C did not exit within 2s") - - # ASSERT - assert elapsed < 2.0, f"Ctrl-C exit took {elapsed:.2f}s, should be <2.0s" - - @pytest.mark.skipif(sys.platform == "win32", reason="SIGINT handling differs on Windows") - def test_rapid_ctrl_c_presses(self, amplihack_session_simulator): - """E2E: Multiple rapid Ctrl-C presses should not cause issues""" - # ARRANGE - stop_hook = amplihack_session_simulator["stop_hook"] - env = os.environ.copy() - env["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - - # ACT - Send multiple SIGINTs - proc = subprocess.Popen( - [sys.executable, str(stop_hook)], - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - env=env, - ) - - time.sleep(0.1) - - # Send 3 rapid SIGINTs (simulating panicked user) - for _ in range(3): - proc.send_signal(signal.SIGINT) - time.sleep(0.05) - - try: - stdout, stderr = proc.communicate(timeout=2) - except subprocess.TimeoutExpired: - proc.kill() - pytest.fail("Multiple Ctrl-C did not exit within 2s") - - # ASSERT - should exit cleanly - assert proc.returncode in ( - 0, - -signal.SIGINT, - ), "Should handle multiple SIGINTs" - - -# ============================================================================= -# E2E TESTS - Multiple Rapid Exits -# ============================================================================= - - -class TestMultipleRapidExits: - """E2E tests for multiple rapid exit attempts.""" - - def test_multiple_rapid_exit_commands(self, amplihack_session_simulator): - """E2E: Multiple rapid /exit commands should all complete quickly""" - # ARRANGE - stop_hook = amplihack_session_simulator["stop_hook"] - env = os.environ.copy() - env["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - - # ACT - Simulate user pressing /exit 5 times rapidly - start_time = time.time() - - for i in range(5): - result = subprocess.run( - [sys.executable, str(stop_hook)], - input=f'{{"conversation": [{{"role": "user", "content": "/exit {i}"}}]}}', - capture_output=True, - text=True, - timeout=3, - env=env, - ) - assert result.returncode == 0, f"Exit {i} failed" - - elapsed = time.time() - start_time - - # ASSERT - assert elapsed < 5.0, f"5 exits took {elapsed:.2f}s, should be <5.0s (<1s each)" - - def test_exit_retry_after_initial_failure(self, amplihack_session_simulator): - """E2E: Should handle exit retry if first attempt has issues""" - # ARRANGE - stop_hook = amplihack_session_simulator["stop_hook"] - env = os.environ.copy() - - # ACT - First attempt without shutdown flag (simulates issue) - _ = subprocess.run( - [sys.executable, str(stop_hook)], - input='{"conversation": [{"role": "user", "content": "/exit"}]}', - capture_output=True, - text=True, - timeout=3, - env=env, - ) - - # Second attempt with shutdown flag (simulates retry with fix) - env["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - start_time = time.time() - - result2 = subprocess.run( - [sys.executable, str(stop_hook)], - input='{"conversation": [{"role": "user", "content": "/exit"}]}', - capture_output=True, - text=True, - timeout=3, - env=env, - ) - - elapsed = time.time() - start_time - - # ASSERT - assert result2.returncode == 0, "Retry should succeed" - assert elapsed < 2.0, "Retry should be fast" - - -# ============================================================================= -# E2E TESTS - Stdin Already Closed -# ============================================================================= - - -class TestStdinAlreadyClosed: - """E2E tests for exit when stdin is already closed.""" - - def test_exit_with_stdin_closed_at_start(self, amplihack_session_simulator): - """E2E: Should handle stdin being closed before exit""" - # ARRANGE - stop_hook = amplihack_session_simulator["stop_hook"] - env = os.environ.copy() - env["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - - # ACT - Stdin closed from the start - result = subprocess.run( - [sys.executable, str(stop_hook)], - stdin=subprocess.DEVNULL, - capture_output=True, - text=True, - timeout=2, - env=env, - ) - - # ASSERT - assert result.returncode == 0, "Should handle closed stdin" - - def test_exit_with_stdin_closed_during_execution(self, amplihack_session_simulator): - """E2E: Should handle stdin being closed mid-execution""" - # ARRANGE - stop_hook = amplihack_session_simulator["stop_hook"] - env = os.environ.copy() - env["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - - # ACT - Start with open stdin, then close it - proc = subprocess.Popen( - [sys.executable, str(stop_hook)], - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - env=env, - ) - - # Close stdin immediately - proc.stdin.close() - - try: - stdout, stderr = proc.communicate(timeout=2) - except subprocess.TimeoutExpired: - proc.kill() - pytest.fail("Did not exit within 2s with closed stdin") - - # ASSERT - assert proc.returncode == 0, "Should handle stdin closing mid-execution" - - -# ============================================================================= -# E2E TESTS - Regression Prevention -# ============================================================================= - - -class TestRegressionPrevention: - """E2E tests to prevent regression of issue #1896.""" - - def test_regression_issue_1896_exit_hang(self, amplihack_session_simulator): - """E2E: Regression test for issue #1896 - exit should not hang 10-13s - - Issue #1896: /exit command hangs for 10-13 seconds waiting for - stdin read that never completes during cleanup. - - This test verifies the fix remains effective. - """ - # ARRANGE - stop_hook = amplihack_session_simulator["stop_hook"] - env = os.environ.copy() - env["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - - # ACT - start_time = time.time() - - result = subprocess.run( - [sys.executable, str(stop_hook)], - input='{"conversation": [{"role": "user", "content": "/exit"}]}', - capture_output=True, - text=True, - timeout=15, # Original bug caused 10-13s hang - env=env, - ) - - elapsed = time.time() - start_time - - # ASSERT - assert result.returncode == 0, "Exit should succeed" - - # The key assertion - should NOT hang for 10-13s - assert elapsed < 3.0, f"Exit took {elapsed:.2f}s - REGRESSION! Issue #1896 hang detected" - - # Should actually be <2s, but 3s gives buffer for slow systems - assert elapsed < 2.0, f"Exit took {elapsed:.2f}s, target is <2.0s for good UX" - - def test_no_performance_regression_vs_baseline(self, amplihack_session_simulator): - """E2E: Exit performance should not regress from fix baseline - - Baseline with fix: <2s exit time - Without fix: 10-13s exit time - - This test ensures future changes don't reintroduce the hang. - """ - # ARRANGE - stop_hook = amplihack_session_simulator["stop_hook"] - env = os.environ.copy() - env["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - - # ACT - Run 10 times to check consistency - timings = [] - for _ in range(10): - start_time = time.time() - - result = subprocess.run( - [sys.executable, str(stop_hook)], - input='{"conversation": []}', - capture_output=True, - text=True, - timeout=3, - env=env, - ) - - elapsed = time.time() - start_time - timings.append(elapsed) - - assert result.returncode == 0 - - # ASSERT - avg_time = sum(timings) / len(timings) - max_time = max(timings) - - assert avg_time < 2.0, f"Average exit time {avg_time:.2f}s exceeds target of 2.0s" - assert max_time < 3.0, f"Maximum exit time {max_time:.2f}s exceeds acceptable limit" - - -# ============================================================================= -# E2E TESTS - User Experience Validation -# ============================================================================= - - -class TestUserExperienceValidation: - """E2E tests validating actual user experience.""" - - def test_exit_feels_immediate_to_user(self, amplihack_session_simulator): - """E2E: Exit should feel immediate (<300ms is perceived as instant) - - User perception thresholds: - - <100ms: Instant - - 100-300ms: Fast - - 300-1000ms: Acceptable - - >1000ms: Slow - - >3000ms: Frustrating - - Target: <1000ms for "acceptable" UX - Stretch: <300ms for "fast" UX - """ - # ARRANGE - stop_hook = amplihack_session_simulator["stop_hook"] - env = os.environ.copy() - env["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - - # ACT - start_time = time.time() - - result = subprocess.run( - [sys.executable, str(stop_hook)], - input='{"conversation": [{"role": "user", "content": "/exit"}]}', - capture_output=True, - text=True, - timeout=2, - env=env, - ) - - elapsed_ms = (time.time() - start_time) * 1000 - - # ASSERT - assert result.returncode == 0 - - # Primary requirement: <1000ms (acceptable) - assert elapsed_ms < 1000, ( - f"Exit took {elapsed_ms:.0f}ms, should be <1000ms for acceptable UX" - ) - - # Stretch goal: <300ms (fast) - if elapsed_ms < 300: - # Success - feels fast! - pass - - -# ============================================================================= -# TEST CONFIGURATION -# ============================================================================= - - -@pytest.fixture(autouse=True) -def cleanup_env_var(): - """Ensure AMPLIHACK_SHUTDOWN_IN_PROGRESS is cleaned up after each test.""" - yield - if "AMPLIHACK_SHUTDOWN_IN_PROGRESS" in os.environ: - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - -def pytest_configure(config): - """Register custom pytest markers""" - config.addinivalue_line("markers", "e2e: marks tests as end-to-end tests") - config.addinivalue_line("markers", "slow: marks tests as slow (run with 'pytest -m slow')") diff --git a/amplifier-bundle/tools/amplihack/hooks/tests/test_fallback_heuristics.py b/amplifier-bundle/tools/amplihack/hooks/tests/test_fallback_heuristics.py deleted file mode 100644 index ae9938a96..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/tests/test_fallback_heuristics.py +++ /dev/null @@ -1,131 +0,0 @@ -#!/usr/bin/env python3 -"""Tests for fallback_heuristics module.""" - -import pytest - -from ..fallback_heuristics import HEURISTIC_PATTERNS, AddressedChecker - - -class TestHeuristicPatterns: - """Test that pattern definitions are correct.""" - - def test_patterns_exist(self): - """Verify all expected pattern types exist.""" - expected_types = [ - "todos", - "testing", - "test", - "ci", - "docs", - "documentation", - "investigation", - "workflow", - "philosophy", - "review", - ] - for pattern_type in expected_types: - assert pattern_type in HEURISTIC_PATTERNS - - def test_patterns_have_required_fields(self): - """Verify all patterns have required fields.""" - for pattern_type, pattern in HEURISTIC_PATTERNS.items(): - if pattern_type == "todos": - # Special case with completion_words - assert "keywords" in pattern - assert "completion_words" in pattern - assert "evidence" in pattern - else: - assert "keywords" in pattern - assert "evidence" in pattern - - -class TestAddressedChecker: - """Test the AddressedChecker class.""" - - def test_initialization(self): - """Test checker can be initialized.""" - checker = AddressedChecker() - assert checker.patterns == HEURISTIC_PATTERNS - - def test_extract_type_from_id(self): - """Test type extraction from consideration IDs.""" - checker = AddressedChecker() - - assert checker._extract_type("todos-incomplete") == "todos" - assert checker._extract_type("test-failures") == "test" - assert checker._extract_type("ci-not-passing") == "ci" - assert checker._extract_type("docs-missing") == "docs" - - def test_extract_type_no_hyphen(self): - """Test type extraction from ID without hyphen.""" - checker = AddressedChecker() - assert checker._extract_type("todos") == "todos" - - def test_matches_pattern_simple(self): - """Test simple keyword matching.""" - checker = AddressedChecker() - - assert checker._matches_pattern("tests pass", ["tests pass"]) - assert checker._matches_pattern("the ci is green", ["ci is"]) - assert not checker._matches_pattern("no match", ["tests pass"]) - - def test_todos_pattern(self): - """Test TODO completion detection.""" - checker = AddressedChecker() - - # Should match: has both "todo" and a completion word - result = checker.check_if_addressed("todos-incomplete", "I completed the todo items") - assert result == "Delta contains TODO completion discussion" - - # Should not match: has "todo" but no completion word - result = checker.check_if_addressed("todos-incomplete", "There are still some todo items") - assert result is None - - # Should not match: has completion word but no "todo" - result = checker.check_if_addressed("todos-incomplete", "I finished the work") - assert result is None - - def test_test_pattern(self): - """Test test execution detection.""" - checker = AddressedChecker() - - result = checker.check_if_addressed("test-failures", "All tests pass now") - assert result == "Delta mentions test execution/results" - - result = checker.check_if_addressed("testing-incomplete", "Ran tests and they all passed") - assert result == "Delta mentions test execution/results" - - def test_ci_pattern(self): - """Test CI status detection.""" - checker = AddressedChecker() - - result = checker.check_if_addressed("ci-failing", "The CI is now passing") - assert result == "Delta mentions CI status" - - def test_docs_pattern(self): - """Test documentation mention detection.""" - checker = AddressedChecker() - - result = checker.check_if_addressed("docs-missing", "I created doc files for this") - assert result == "Delta mentions documentation changes" - - result = checker.check_if_addressed("documentation-needed", "Updated the README file") - assert result == "Delta mentions documentation changes" - - def test_unknown_pattern(self): - """Test handling of unknown pattern types.""" - checker = AddressedChecker() - - result = checker.check_if_addressed("unknown-type", "Some text here") - assert result is None - - def test_case_insensitive(self): - """Test that matching is case-insensitive.""" - checker = AddressedChecker() - - result = checker.check_if_addressed("test-failures", "ALL TESTS PASS NOW") - assert result == "Delta mentions test execution/results" - - -if __name__ == "__main__": - pytest.main([__file__, "-v"]) diff --git a/amplifier-bundle/tools/amplihack/hooks/tests/test_hook_processor.py b/amplifier-bundle/tools/amplihack/hooks/tests/test_hook_processor.py deleted file mode 100644 index 86264cde7..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/tests/test_hook_processor.py +++ /dev/null @@ -1,480 +0,0 @@ -#!/usr/bin/env python3 -""" -Tests for HookProcessor - TDD approach for BrokenPipeError handling. - -Testing pyramid: -- 60% Unit tests (fast, heavily mocked) -- 30% Integration tests (multiple components) -- 10% E2E tests (complete workflows) - -This test file focuses on the write_output() method's BrokenPipeError handling. -Tests are written BEFORE implementation (TDD) - they will FAIL until the fix is applied. -""" - -import errno -import json -import sys -from io import StringIO -from pathlib import Path -from unittest.mock import Mock, call, patch - -import pytest - -# Add hooks directory to path for imports -hooks_dir = Path(__file__).parent.parent -sys.path.insert(0, str(hooks_dir)) - -from hook_processor import HookProcessor - - -# Concrete test implementation of abstract HookProcessor -class TestHook(HookProcessor): - """Concrete implementation for testing purposes""" - - def __init__(self): - # Skip parent init for simple tests - self.hook_name = "test_hook" - - def process(self, input_data): - """Simple pass-through processor""" - return input_data - - -class TestWriteOutputNormalOperation: - """Unit tests (60%) - Normal operation without errors""" - - def test_write_output_writes_json_to_stdout(self): - """Test write_output() writes valid JSON to stdout with open pipe""" - hook = TestHook() - test_data = {"status": "success", "message": "test"} - - with patch("sys.stdout", new_callable=StringIO) as mock_stdout: - hook.write_output(test_data) - - # Verify output - output = mock_stdout.getvalue() - assert output == '{"status": "success", "message": "test"}\n' - - def test_write_output_includes_newline(self): - """Test write_output() includes newline after JSON""" - hook = TestHook() - test_data = {"key": "value"} - - with patch("sys.stdout", new_callable=StringIO) as mock_stdout: - hook.write_output(test_data) - - # Verify newline present - output = mock_stdout.getvalue() - assert output.endswith("\n") - - def test_write_output_flushes_stdout(self): - """Test write_output() calls flush to ensure data is written""" - hook = TestHook() - test_data = {"key": "value"} - - mock_stdout = Mock() - with patch("sys.stdout", mock_stdout): - hook.write_output(test_data) - - # Verify flush was called - mock_stdout.flush.assert_called_once() - - def test_write_output_handles_empty_dict(self): - """Test write_output() handles empty dictionary (fail-open behavior)""" - hook = TestHook() - test_data = {} - - with patch("sys.stdout", new_callable=StringIO) as mock_stdout: - hook.write_output(test_data) - - # Verify empty dict written - output = mock_stdout.getvalue() - assert output == "{}\n" - - def test_write_output_handles_complex_nested_data(self): - """Test write_output() handles complex nested JSON structures""" - hook = TestHook() - test_data = { - "permissionDecision": "allow", - "metadata": {"tools": ["read", "write"], "count": 42}, - "nested": {"deeper": {"value": True}}, - } - - with patch("sys.stdout", new_callable=StringIO) as mock_stdout: - hook.write_output(test_data) - - # Verify complete structure written - output = mock_stdout.getvalue() - parsed = json.loads(output.strip()) - assert parsed == test_data - - -class TestWriteOutputBrokenPipeError: - """Unit tests (60%) - BrokenPipeError handling (TDD - WILL FAIL until fix)""" - - def test_broken_pipe_error_is_silently_absorbed(self): - """Test BrokenPipeError during flush is silently absorbed (fail-open) - - This test verifies the core fix: when Claude Code closes the pipe, - we should NOT raise an exception but silently succeed. - - TDD: This WILL FAIL until we add try/except for BrokenPipeError. - """ - hook = TestHook() - test_data = {"status": "success"} - - mock_stdout = Mock() - # Simulate pipe closure during flush - mock_stdout.flush.side_effect = BrokenPipeError("Broken pipe") - - with patch("sys.stdout", mock_stdout): - # Should NOT raise - fail-open behavior - hook.write_output(test_data) # Should succeed silently - - # Verify write and newline still called before error - mock_stdout.write.assert_has_calls([call("\n")]) - - def test_broken_pipe_error_during_json_dump(self): - """Test BrokenPipeError during json.dump is absorbed - - If pipe closes during the initial JSON write, we should - handle it gracefully. - - TDD: This WILL FAIL until we add try/except for BrokenPipeError. - """ - hook = TestHook() - test_data = {"status": "success"} - - with patch("sys.stdout"): - with patch("json.dump") as mock_dump: - mock_dump.side_effect = BrokenPipeError("Broken pipe") - - # Should NOT raise - fail-open behavior - hook.write_output(test_data) # Should succeed silently - - def test_broken_pipe_error_logs_no_error(self): - """Test BrokenPipeError is silent - no error logging - - Philosophy: Fail-open gracefully. The pipe closure is expected - during shutdown, so we don't log it as an error. - - TDD: This WILL FAIL until we implement silent absorption. - """ - hook = TestHook() - test_data = {"status": "success"} - - mock_stdout = Mock() - mock_stdout.flush.side_effect = BrokenPipeError("Broken pipe") - - with patch("sys.stdout", mock_stdout): - # Should succeed without any exceptions - try: - hook.write_output(test_data) - except BrokenPipeError: - pytest.fail("BrokenPipeError should be absorbed, not raised") - - -class TestWriteOutputIOError: - """Unit tests (60%) - IOError handling (generic I/O errors)""" - - def test_ioerror_is_silently_absorbed(self): - """Test generic IOError is absorbed (fail-open) - - IOError is the base class for many I/O errors including - BrokenPipeError. We should handle it gracefully. - - TDD: This WILL FAIL until we add try/except for IOError. - """ - hook = TestHook() - test_data = {"status": "success"} - - mock_stdout = Mock() - mock_stdout.flush.side_effect = OSError("I/O error") - - with patch("sys.stdout", mock_stdout): - # Should NOT raise - hook.write_output(test_data) - - def test_ioerror_during_write_is_absorbed(self): - """Test IOError during write operation is absorbed - - TDD: This WILL FAIL until we add try/except for IOError. - """ - hook = TestHook() - test_data = {"key": "value"} - - with patch("sys.stdout"): - with patch("json.dump") as mock_dump: - mock_dump.side_effect = OSError("Write failed") - - # Should NOT raise - hook.write_output(test_data) - - -class TestWriteOutputOSError: - """Unit tests (60%) - OSError handling (EPIPE errno 32)""" - - def test_oserror_epipe_is_silently_absorbed(self): - """Test OSError with errno EPIPE is absorbed - - EPIPE is the underlying error code for broken pipe. - We should handle it gracefully. - - TDD: This WILL FAIL until we add try/except for OSError. - """ - hook = TestHook() - test_data = {"status": "success"} - - mock_stdout = Mock() - # Simulate EPIPE - epipe_error = OSError(errno.EPIPE, "Broken pipe") - mock_stdout.flush.side_effect = epipe_error - - with patch("sys.stdout", mock_stdout): - # Should NOT raise - hook.write_output(test_data) - - def test_oserror_non_epipe_is_propagated(self): - """Test OSError with non-EPIPE errno is raised - - We should ONLY catch EPIPE (errno 32). Other OS errors - should propagate so we know about real problems. - - TDD: This test should PASS even before fix (validates we - don't catch too broadly). - """ - hook = TestHook() - test_data = {"status": "success"} - - mock_stdout = Mock() - # Simulate different OS error (not EPIPE) - other_error = OSError(5, "Input/output error") - mock_stdout.flush.side_effect = other_error - - with patch("sys.stdout", mock_stdout): - # SHOULD raise - not a pipe closure error - with pytest.raises(OSError) as exc_info: - hook.write_output(test_data) - - assert exc_info.value.errno == 5 - - -class TestWriteOutputErrorPropagation: - """Unit tests (60%) - Verify other exceptions still propagate""" - - def test_value_error_propagates(self): - """Test ValueError is NOT caught - only pipe-related errors - - We should NOT catch generic exceptions. Only BrokenPipeError, - IOError, and EPIPE should be absorbed. - - This test should PASS (validates we're not catching too broadly). - """ - hook = TestHook() - test_data = {"status": "success"} - - mock_stdout = Mock() - mock_stdout.flush.side_effect = ValueError("Invalid value") - - with patch("sys.stdout", mock_stdout): - # SHOULD raise - not a pipe error - with pytest.raises(ValueError): - hook.write_output(test_data) - - def test_type_error_propagates(self): - """Test TypeError is NOT caught - - This test should PASS (validates we're not catching too broadly). - """ - hook = TestHook() - test_data = {"status": "success"} - - with patch("json.dump") as mock_dump: - mock_dump.side_effect = TypeError("Not JSON serializable") - - # SHOULD raise - not a pipe error - with pytest.raises(TypeError): - hook.write_output(test_data) - - def test_keyboard_interrupt_propagates(self): - """Test KeyboardInterrupt is NOT caught (critical for user control) - - User interrupts (Ctrl+C) should ALWAYS propagate. - - This test should PASS (validates we're not catching too broadly). - """ - hook = TestHook() - test_data = {"status": "success"} - - mock_stdout = Mock() - mock_stdout.flush.side_effect = KeyboardInterrupt() - - with patch("sys.stdout", mock_stdout): - # SHOULD raise - user interrupt must propagate - with pytest.raises(KeyboardInterrupt): - hook.write_output(test_data) - - def test_system_exit_propagates(self): - """Test SystemExit is NOT caught (critical for process control) - - This test should PASS (validates we're not catching too broadly). - """ - hook = TestHook() - test_data = {"status": "success"} - - mock_stdout = Mock() - mock_stdout.flush.side_effect = SystemExit(1) - - with patch("sys.stdout", mock_stdout): - # SHOULD raise - system exit must propagate - with pytest.raises(SystemExit): - hook.write_output(test_data) - - -class TestWriteOutputIntegration: - """Integration tests (30%) - Multiple components working together""" - - def test_write_output_with_real_json_serialization(self): - """Integration test: Real JSON serialization + pipe closure handling - - This verifies that our error handling doesn't break normal JSON - serialization when the pipe closes. - - TDD: This WILL FAIL until we implement the fix. - """ - hook = TestHook() - test_data = { - "permissionDecision": "allow", - "tools": ["read_file", "write_file"], - "metadata": {"timestamp": "2024-12-14T12:00:00Z"}, - } - - mock_stdout = Mock() - # Let json.dump write to mock, then close pipe during flush - mock_stdout.flush.side_effect = BrokenPipeError("Broken pipe") - - with patch("sys.stdout", mock_stdout): - # Should handle gracefully - hook.write_output(test_data) - - # Verify json.dump was called with correct data - # (even though flush failed) - calls = mock_stdout.write.call_args_list - assert len(calls) > 0 # At least the newline was written - - def test_write_output_sequence_with_pipe_closure(self): - """Integration test: Multiple writes, pipe closes on last one - - Simulates real scenario where hook writes several outputs, - and pipe closes during the final write. - - TDD: This WILL FAIL until we implement the fix. - """ - hook = TestHook() - - outputs = [ - {"step": 1, "status": "processing"}, - {"step": 2, "status": "processing"}, - {"step": 3, "status": "complete"}, # Pipe closes here - ] - - call_count = 0 - - def flush_with_final_failure(): - nonlocal call_count - call_count += 1 - if call_count == 3: - raise BrokenPipeError("Broken pipe") - - mock_stdout = Mock() - mock_stdout.flush.side_effect = flush_with_final_failure - - with patch("sys.stdout", mock_stdout): - # First two should succeed, third should fail-open - for output in outputs: - hook.write_output(output) - - # All three writes should complete without exception - assert call_count == 3 - - -class TestWriteOutputEndToEnd: - """E2E tests (10%) - Complete workflows""" - - @patch("sys.stdout") - def test_complete_hook_lifecycle_with_pipe_closure(self, mock_stdout): - """E2E test: Full hook lifecycle ending in pipe closure - - Simulates complete hook execution: - 1. Read input - 2. Process data - 3. Write output - 4. Pipe closes during write - - TDD: This WILL FAIL until we implement the fix. - """ - hook = TestHook() - - # Simulate pipe closure during output flush - mock_stdout.flush.side_effect = BrokenPipeError("Broken pipe") - - # Complete workflow should handle gracefully - input_data = {"tool": "read_file", "path": "/test/file.txt"} - output_data = hook.process(input_data) - hook.write_output(output_data) # Should not raise - - # Verify flush was attempted - mock_stdout.flush.assert_called() - - def test_hook_graceful_shutdown_scenario(self): - """E2E test: Simulates Claude Code shutdown scenario - - This is the real-world scenario from issue #1874: - 1. Hook processes input successfully - 2. Hook writes output successfully - 3. Claude Code reads output and closes pipe - 4. Hook attempts flush, gets BrokenPipeError - 5. Hook should exit cleanly (no hang, no error) - - TDD: This WILL FAIL until we implement the fix. - """ - hook = TestHook() - - # Simulate Claude Code behavior - class MockClaudeCodeStdout: - """Simulates Claude Code's pipe closure behavior""" - - def __init__(self): - self.data_written = [] - self.closed = False - - def write(self, data): - if not self.closed: - self.data_written.append(data) - - def flush(self): - # Claude Code closes pipe after reading - self.closed = True - raise BrokenPipeError("Broken pipe") - - mock_stdout = MockClaudeCodeStdout() - - with patch("sys.stdout", mock_stdout): - with patch("json.dump") as mock_dump: - # Let json.dump write to our mock - def write_json(obj, f): - f.write(json.dumps(obj)) - - mock_dump.side_effect = write_json - - # This should complete without hanging or raising - test_data = {"permissionDecision": "allow"} - hook.write_output(test_data) - - # Verify data was written before pipe closed - assert len(mock_stdout.data_written) > 0 - - -# Test discovery helpers -if __name__ == "__main__": - pytest.main([__file__, "-v", "--tb=short"]) diff --git a/amplifier-bundle/tools/amplihack/hooks/tests/test_hook_processor_shutdown.py b/amplifier-bundle/tools/amplihack/hooks/tests/test_hook_processor_shutdown.py deleted file mode 100644 index d4f095aa2..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/tests/test_hook_processor_shutdown.py +++ /dev/null @@ -1,369 +0,0 @@ -#!/usr/bin/env python3 -""" -TDD Tests for Hook Processor Shutdown Behavior (UNIT TESTS - 60%) - -Tests that HookProcessor's read_input() method properly skips stdin reads -during shutdown to prevent hangs. This is critical for allowing hooks to -exit cleanly when cleanup is in progress. - -Testing Philosophy: -- Ruthlessly Simple: Each test verifies one shutdown scenario -- Zero-BS: All tests work, no stubs -- Fail-Open: Shutdown always allows clean exit - -Test Coverage: -- read_input() skips stdin during shutdown -- read_input() works normally without shutdown -- stdin closed handling -- stdin detached handling -- Empty input handling -""" - -import os -import sys -from io import StringIO -from pathlib import Path -from unittest.mock import MagicMock, patch - -import pytest - -# Add hooks directory to path -sys.path.insert(0, str(Path(__file__).parent.parent)) - -from hook_processor import HookProcessor - -# ============================================================================= -# TEST FIXTURES -# ============================================================================= - - -@pytest.fixture -def hook_processor(tmp_path): - """Create a HookProcessor instance for testing. - - Uses tmp_path for log directory to avoid polluting project. - """ - - # Create a concrete subclass since HookProcessor is abstract - class TestHookProcessor(HookProcessor): - def process(self, input_data): - return {} - - # Create processor with temporary directories - with patch.object(HookProcessor, "__init__", lambda self, hook_name: None): - processor = TestHookProcessor("test_hook") - processor.hook_name = "test_hook" - processor.project_root = tmp_path - processor.log_dir = tmp_path / "logs" - processor.log_dir.mkdir() - processor.log_file = processor.log_dir / "test_hook.log" - return processor - - -# ============================================================================= -# UNIT TESTS - read_input() Shutdown Behavior -# ============================================================================= - - -class TestReadInputDuringShutdown: - """Test read_input() skips stdin reads during shutdown.""" - - def test_returns_empty_dict_during_shutdown(self, hook_processor): - """Should return {} immediately when AMPLIHACK_SHUTDOWN_IN_PROGRESS=1""" - # ARRANGE - os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - - try: - # ACT - result = hook_processor.read_input() - - # ASSERT - assert result == {}, "Should return empty dict during shutdown" - finally: - # CLEANUP - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - @patch("sys.stdin") - def test_does_not_read_stdin_during_shutdown(self, mock_stdin, hook_processor): - """Should not call stdin.read() when shutting down""" - # ARRANGE - os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - mock_stdin.read = MagicMock(return_value='{"key": "value"}') - - try: - # ACT - result = hook_processor.read_input() - - # ASSERT - assert result == {}, "Should return empty dict" - mock_stdin.read.assert_not_called() - finally: - # CLEANUP - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - def test_logs_debug_message_during_shutdown(self, hook_processor): - """Should log that stdin read is being skipped""" - # ARRANGE - os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - - try: - # ACT - hook_processor.read_input() - - # ASSERT - log_content = hook_processor.log_file.read_text() - assert "Skipping stdin read during shutdown" in log_content - assert "DEBUG" in log_content - finally: - # CLEANUP - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - def test_returns_immediately_during_shutdown(self, hook_processor): - """Should return in <1ms during shutdown (no blocking operations)""" - # ARRANGE - import time - - os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - - try: - # ACT - start_time = time.time() - result = hook_processor.read_input() - elapsed = time.time() - start_time - - # ASSERT - assert result == {}, "Should return empty dict" - assert elapsed < 0.001, f"Should return in <1ms, took {elapsed * 1000:.1f}ms" - finally: - # CLEANUP - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - -# ============================================================================= -# UNIT TESTS - read_input() Normal Operation -# ============================================================================= - - -class TestReadInputNormalOperation: - """Test read_input() works correctly during normal operation.""" - - @patch("sys.stdin", StringIO('{"key": "value"}')) - def test_reads_valid_json_from_stdin(self, hook_processor): - """Should parse valid JSON from stdin during normal operation""" - # ARRANGE - ensure NOT shutting down - if "AMPLIHACK_SHUTDOWN_IN_PROGRESS" in os.environ: - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - # ACT - result = hook_processor.read_input() - - # ASSERT - assert result == {"key": "value"}, "Should parse JSON correctly" - - @patch("sys.stdin", StringIO("")) - def test_returns_empty_dict_for_empty_input(self, hook_processor): - """Should return {} when stdin is empty""" - # ARRANGE - ensure NOT shutting down - if "AMPLIHACK_SHUTDOWN_IN_PROGRESS" in os.environ: - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - # ACT - result = hook_processor.read_input() - - # ASSERT - assert result == {}, "Should return empty dict for empty input" - - @patch("sys.stdin", StringIO(" \n \n ")) - def test_returns_empty_dict_for_whitespace_input(self, hook_processor): - """Should return {} when stdin contains only whitespace""" - # ARRANGE - ensure NOT shutting down - if "AMPLIHACK_SHUTDOWN_IN_PROGRESS" in os.environ: - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - # ACT - result = hook_processor.read_input() - - # ASSERT - assert result == {}, "Should return empty dict for whitespace" - - @patch("sys.stdin", StringIO('{"nested": {"key": "value"}}')) - def test_reads_nested_json_correctly(self, hook_processor): - """Should parse nested JSON structures""" - # ARRANGE - ensure NOT shutting down - if "AMPLIHACK_SHUTDOWN_IN_PROGRESS" in os.environ: - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - # ACT - result = hook_processor.read_input() - - # ASSERT - assert result == {"nested": {"key": "value"}} - - -# ============================================================================= -# UNIT TESTS - read_input() Error Handling -# ============================================================================= - - -class TestReadInputErrorHandling: - """Test read_input() handles various error conditions.""" - - @patch("sys.stdin") - def test_handles_stdin_closed_gracefully(self, mock_stdin, hook_processor): - """Should handle stdin.closed=True without hanging""" - # ARRANGE - mock_stdin.closed = True - mock_stdin.read.side_effect = ValueError("I/O operation on closed file") - - if "AMPLIHACK_SHUTDOWN_IN_PROGRESS" in os.environ: - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - # ACT - result = hook_processor.read_input() - - # ASSERT - should detect shutdown via stdin.closed and return empty dict - assert result == {}, "Should return {} when stdin is closed (shutdown detected)" - - @patch("sys.stdin") - def test_handles_stdin_detached(self, mock_stdin, hook_processor): - """Should handle detached stdin (no fileno)""" - # ARRANGE - import io - - mock_stdin.closed = False - mock_stdin.fileno.side_effect = io.UnsupportedOperation() - - if "AMPLIHACK_SHUTDOWN_IN_PROGRESS" in os.environ: - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - # ACT & ASSERT - # Note: read_input() should either return {} or raise based on RobustJSONParser - # The key is it shouldn't hang - try: - result = hook_processor.read_input() - # If it returns, it should be {} - assert isinstance(result, dict) - except Exception: - pass # Expected for detached stdin - - @patch("sys.stdin", StringIO('{"incomplete": ')) - def test_uses_robust_json_parser_for_malformed_input(self, hook_processor): - """Should use RobustJSONParser to handle malformed JSON""" - # ARRANGE - ensure NOT shutting down - if "AMPLIHACK_SHUTDOWN_IN_PROGRESS" in os.environ: - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - # ACT - result = hook_processor.read_input() - - # ASSERT - # RobustJSONParser should handle this gracefully - assert isinstance(result, dict), "Should return dict even for malformed JSON" - - -# ============================================================================= -# UNIT TESTS - Shutdown Detection Integration -# ============================================================================= - - -class TestShutdownDetectionIntegration: - """Test read_input() correctly integrates with shutdown detection.""" - - def test_detects_shutdown_via_env_var(self, hook_processor): - """Should detect shutdown through AMPLIHACK_SHUTDOWN_IN_PROGRESS""" - # ARRANGE - os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - - try: - # ACT - result = hook_processor.read_input() - - # ASSERT - assert result == {}, "Should skip stdin read via env var" - finally: - # CLEANUP - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - def test_no_shutdown_when_env_var_zero(self, hook_processor): - """Should NOT detect shutdown when AMPLIHACK_SHUTDOWN_IN_PROGRESS=0""" - # ARRANGE - os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "0" - - with patch("sys.stdin", StringIO('{"key": "value"}')): - try: - # ACT - result = hook_processor.read_input() - - # ASSERT - assert result == {"key": "value"}, "Should read stdin when env var is 0" - finally: - # CLEANUP - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - def test_no_shutdown_when_env_var_empty(self, hook_processor): - """Should NOT detect shutdown when AMPLIHACK_SHUTDOWN_IN_PROGRESS=''""" - # ARRANGE - os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "" - - with patch("sys.stdin", StringIO('{"key": "value"}')): - try: - # ACT - result = hook_processor.read_input() - - # ASSERT - assert result == {"key": "value"}, "Should read stdin when env var empty" - finally: - # CLEANUP - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - -# ============================================================================= -# UNIT TESTS - Performance -# ============================================================================= - - -class TestReadInputPerformance: - """Test read_input() performance during shutdown.""" - - def test_shutdown_check_is_fast(self, hook_processor): - """Shutdown check should be fast (<5ms per call)""" - # ARRANGE - import time - - os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - - try: - # ACT - Run 100 times to measure overhead - start_time = time.time() - for _ in range(100): - hook_processor.read_input() - elapsed = time.time() - start_time - - avg_per_call = (elapsed / 100) * 1000 # ms - - # ASSERT - Multi-layer checks (env var + atexit + stdin) take ~2-3ms - # This is still fast enough for clean 2-3s exit times - assert avg_per_call < 5.0, ( - f"Shutdown check took {avg_per_call:.2f}ms avg, should be <5ms" - ) - finally: - # CLEANUP - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - -# ============================================================================= -# TEST CONFIGURATION -# ============================================================================= - - -@pytest.fixture(autouse=True) -def cleanup_env_var(): - """Ensure AMPLIHACK_SHUTDOWN_IN_PROGRESS is cleaned up after each test.""" - yield - if "AMPLIHACK_SHUTDOWN_IN_PROGRESS" in os.environ: - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - -def pytest_configure(config): - """Register custom pytest markers""" - config.addinivalue_line("markers", "unit: marks tests as unit tests") diff --git a/amplifier-bundle/tools/amplihack/hooks/tests/test_issue_1872_bug_fixes.py b/amplifier-bundle/tools/amplihack/hooks/tests/test_issue_1872_bug_fixes.py deleted file mode 100644 index c620515fe..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/tests/test_issue_1872_bug_fixes.py +++ /dev/null @@ -1,753 +0,0 @@ -#!/usr/bin/env python3 -""" -Failing tests for Issue #1872 power steering bug fixes. - -These tests verify the four bug fixes: -1. Math Display Bug - Summary shows correct count format -2. SDK Error Visibility - SDK exceptions logged to stderr -3. Failure Reason Extraction - analyze_consideration returns tuple with reason -4. Final Guidance Generation - generate_final_guidance() function works - -All tests MUST fail before implementation. -""" - -import asyncio -import io -import json -import sys -import tempfile -import unittest -from pathlib import Path -from unittest.mock import patch - -sys.path.insert(0, str(Path(__file__).parent.parent)) - -from power_steering_checker import ( - CheckerResult, - ConsiderationAnalysis, - PowerSteeringChecker, -) - - -class TestBug1MathDisplay(unittest.TestCase): - """Tests for Bug #1: Math display in summary. - - Bug: Summary says "(0 passed, 0 failed)" when skipped checks exist. - Fix: Summary should be "(X passed, Y failed, Z skipped)" where X+Y+Z=total. - """ - - def setUp(self): - """Set up test fixtures.""" - self.temp_dir = tempfile.mkdtemp() - self.project_root = Path(self.temp_dir) - - # Create directory structure - (self.project_root / ".claude" / "tools" / "amplihack").mkdir(parents=True, exist_ok=True) - (self.project_root / ".claude" / "runtime" / "power-steering").mkdir( - parents=True, exist_ok=True - ) - - # Create minimal config - config_path = ( - self.project_root / ".claude" / "tools" / "amplihack" / ".power_steering_config" - ) - config = { - "enabled": True, - "version": "1.0.0", - "phase": 1, - } - config_path.write_text(json.dumps(config, indent=2)) - - self.checker = PowerSteeringChecker(self.project_root) - - def tearDown(self): - """Clean up test fixtures.""" - import shutil - - shutil.rmtree(self.temp_dir) - - def test_summary_includes_skipped_count(self): - """Test that summary includes skipped count in format (X passed, Y failed, Z skipped).""" - # Create analysis with mixed results - analysis = ConsiderationAnalysis() - - # 2 passed - analysis.add_result( - CheckerResult( - consideration_id="check_0", - satisfied=True, - reason="Passed", - severity="blocker", - ) - ) - analysis.add_result( - CheckerResult( - consideration_id="check_1", - satisfied=True, - reason="Passed", - severity="blocker", - ) - ) - - # 1 failed - analysis.add_result( - CheckerResult( - consideration_id="check_2", - satisfied=False, - reason="Failed", - severity="blocker", - ) - ) - - # Simulate 22 total considerations (2 passed + 1 failed + 19 skipped) - self.checker.considerations = [{"id": f"check_{i}", "category": "Test"} for i in range(22)] - - # Generate summary - results_text = self.checker._format_results_text(analysis, "DEVELOPMENT") - - # Verify format includes all three counts - self.assertIn("2 passed", results_text, "Should show passed count") - self.assertIn("1 failed", results_text, "Should show failed count") - self.assertIn("19 skipped", results_text, "Should show skipped count") - - def test_summary_math_totals_correctly(self): - """Test that X + Y + Z = total considerations.""" - # Create analysis with known counts - analysis = ConsiderationAnalysis() - - # 5 passed (use first 5 consideration IDs) - for i in range(5): - analysis.add_result( - CheckerResult( - consideration_id=f"consideration_{i}", - satisfied=True, - reason="Passed", - severity="blocker", - ) - ) - - # 3 failed (use next 3 consideration IDs) - for i in range(5, 8): - analysis.add_result( - CheckerResult( - consideration_id=f"consideration_{i}", - satisfied=False, - reason="Failed", - severity="blocker", - ) - ) - - # 22 total considerations (5 + 3 + 14 skipped) - self.checker.considerations = [ - {"id": f"consideration_{i}", "category": "Test"} for i in range(22) - ] - - results_text = self.checker._format_results_text(analysis, "DEVELOPMENT") - - # Extract counts from text - import re - - passed_match = re.search(r"(\d+)\s+passed", results_text) - failed_match = re.search(r"(\d+)\s+failed", results_text) - skipped_match = re.search(r"(\d+)\s+skipped", results_text) - - self.assertIsNotNone(passed_match, "Should show passed count") - self.assertIsNotNone(failed_match, "Should show failed count") - self.assertIsNotNone(skipped_match, "Should show skipped count") - - passed = int(passed_match.group(1)) - failed = int(failed_match.group(1)) - skipped = int(skipped_match.group(1)) - - # Verify math: X + Y + Z = 22 - self.assertEqual(passed + failed + skipped, 22, "Sum should equal total considerations") - self.assertEqual(passed, 5, "Should have 5 passed") - self.assertEqual(failed, 3, "Should have 3 failed") - self.assertEqual(skipped, 14, "Should have 14 skipped") - - def test_summary_format_with_parentheses(self): - """Test that summary uses format (X passed, Y failed, Z skipped) with parentheses.""" - analysis = ConsiderationAnalysis() - - # 1 passed, 1 failed - analysis.add_result( - CheckerResult( - consideration_id="c0", - satisfied=True, - reason="Passed", - severity="blocker", - ) - ) - analysis.add_result( - CheckerResult( - consideration_id="c1", - satisfied=False, - reason="Failed", - severity="blocker", - ) - ) - - # 22 total (1 + 1 + 20 skipped) - self.checker.considerations = [{"id": f"c{i}", "category": "T"} for i in range(22)] - - results_text = self.checker._format_results_text(analysis, "DEVELOPMENT") - - # Should contain format: (1 passed, 1 failed, 20 skipped) - - pattern = r"\(\s*1\s+passed\s*,\s*1\s+failed\s*,\s*20\s+skipped\s*\)" - self.assertRegex( - results_text, - pattern, - "Should have format (X passed, Y failed, Z skipped)", - ) - - -class TestBug2SDKErrorVisibility(unittest.TestCase): - """Tests for Bug #2: SDK error visibility. - - Bug: SDK exceptions swallowed silently, hard to debug. - Fix: Log SDK errors to stderr with consideration ID and fail-open behavior. - """ - - def setUp(self): - """Set up test fixtures.""" - self.temp_dir = tempfile.mkdtemp() - self.project_root = Path(self.temp_dir) - - # Create directory structure - (self.project_root / ".claude" / "tools" / "amplihack").mkdir(parents=True, exist_ok=True) - (self.project_root / ".claude" / "runtime" / "power-steering").mkdir( - parents=True, exist_ok=True - ) - - config_path = ( - self.project_root / ".claude" / "tools" / "amplihack" / ".power_steering_config" - ) - config = {"enabled": True, "version": "1.0.0", "phase": 1} - config_path.write_text(json.dumps(config, indent=2)) - - def tearDown(self): - """Clean up test fixtures.""" - import shutil - - shutil.rmtree(self.temp_dir) - - @patch("sys.stderr", new_callable=io.StringIO) - @patch("power_steering_checker.SDK_AVAILABLE", True) - @patch("power_steering_checker.analyze_consideration") - def test_sdk_exception_logged_to_stderr(self, mock_analyze, mock_stderr): - """Test that SDK exceptions are logged to stderr with consideration ID.""" - - # Make SDK raise exception - async def failing_analyze(*args, **kwargs): - raise RuntimeError("SDK connection timeout") - - mock_analyze.side_effect = failing_analyze - - checker = PowerSteeringChecker(self.project_root) - - consideration = { - "id": "test_check", - "question": "Test question?", - "category": "Test", - "severity": "blocker", - } - - transcript = [ - {"type": "user", "message": {"content": "test"}}, - ] - - # Run checker (should log error but not crash) - try: - asyncio.run( - checker._check_single_consideration_async(consideration, transcript, "test_session") - ) - except Exception: - pass # Expected to fail-open - - stderr_output = mock_stderr.getvalue() - - # Verify error was logged to stderr - self.assertIn("[Power Steering SDK Error]", stderr_output, "Should have error prefix") - self.assertIn("test_check", stderr_output, "Should include consideration ID") - self.assertIn("SDK connection timeout", stderr_output, "Should include error message") - - @patch("sys.stderr", new_callable=io.StringIO) - @patch("power_steering_checker.SDK_AVAILABLE", True) - @patch("power_steering_checker.analyze_consideration") - def test_sdk_error_log_format(self, mock_analyze, mock_stderr): - """Test that SDK error log has correct format: [Power Steering SDK Error] {id}: {error}.""" - - async def failing_analyze(*args, **kwargs): - raise ValueError("Invalid consideration format") - - mock_analyze.side_effect = failing_analyze - - checker = PowerSteeringChecker(self.project_root) - - consideration = { - "id": "philosophy_check", - "question": "Philosophy compliance?", - "category": "Quality", - "severity": "warning", - } - - transcript = [] - - try: - asyncio.run( - checker._check_single_consideration_async(consideration, transcript, "session_123") - ) - except Exception: - pass - - stderr_output = mock_stderr.getvalue() - - # Verify format - self.assertRegex( - stderr_output, - r"\[Power Steering SDK Error\]\s+philosophy_check:", - "Should match format: [Power Steering SDK Error] {id}:", - ) - - @patch("power_steering_checker.SDK_AVAILABLE", True) - @patch("power_steering_checker.analyze_consideration") - def test_sdk_error_fails_open_returns_true(self, mock_analyze): - """Test that SDK errors fail-open (return True/satisfied).""" - - async def failing_analyze(*args, **kwargs): - raise Exception("Network error") - - mock_analyze.side_effect = failing_analyze - - checker = PowerSteeringChecker(self.project_root) - - consideration = { - "id": "test_check", - "question": "Test?", - "category": "Test", - "severity": "blocker", - } - - transcript = [] - - # Should not raise, should return satisfied=True - result = asyncio.run( - checker._check_single_consideration_async(consideration, transcript, "test_session") - ) - - self.assertIsNotNone(result, "Should return a result") - self.assertTrue(result.satisfied, "Should fail-open with satisfied=True") - - -class TestBug3FailureReasonExtraction(unittest.TestCase): - """Tests for Bug #3: Failure reason extraction. - - Bug: analyze_consideration() returns bool, no reason string. - Fix: Return Tuple[bool, Optional[str]] with reason when check fails. - """ - - def setUp(self): - """Set up test fixtures.""" - self.temp_dir = tempfile.mkdtemp() - self.project_root = Path(self.temp_dir) - - (self.project_root / ".claude" / "tools" / "amplihack").mkdir(parents=True, exist_ok=True) - - def tearDown(self): - """Clean up test fixtures.""" - import shutil - - shutil.rmtree(self.temp_dir) - - @patch("claude_power_steering.CLAUDE_SDK_AVAILABLE", True) - @patch("claude_power_steering.query") - def test_analyze_consideration_returns_tuple(self, mock_query): - """Test that analyze_consideration returns Tuple[bool, Optional[str]].""" - # Import here to get patched version - from claude_power_steering import analyze_consideration - - # Mock SDK response with NOT SATISFIED - async def mock_response(*args, **kwargs): - class MockMessage: - def __init__(self, text): - self.text = text - - yield MockMessage("NOT SATISFIED: Missing tests") - - mock_query.return_value = mock_response() - - consideration = { - "id": "test_check", - "question": "Were tests run?", - "description": "Check for test execution", - "category": "Testing", - } - - conversation = [{"type": "user", "message": {"content": "Fix the bug"}}] - - # Run async function - result = asyncio.run(analyze_consideration(conversation, consideration, self.project_root)) - - # Verify return type is tuple - self.assertIsInstance(result, tuple, "Should return tuple") - self.assertEqual(len(result), 2, "Should return 2-element tuple") - - satisfied, reason = result - self.assertIsInstance(satisfied, bool, "First element should be bool") - self.assertIsInstance(reason, (str, type(None)), "Second element should be str or None") - - @patch("claude_power_steering.CLAUDE_SDK_AVAILABLE", True) - @patch("claude_power_steering.query") - def test_reason_extracted_when_check_fails(self, mock_query): - """Test that reason is extracted when check fails.""" - from claude_power_steering import analyze_consideration - - async def mock_response(*args, **kwargs): - class MockMessage: - text = "NOT SATISFIED: TodoWrite shows 3 incomplete tasks" - - yield MockMessage() - - mock_query.return_value = mock_response() - - consideration = { - "id": "todos_complete", - "question": "Were all TODOs completed?", - "description": "Check TodoWrite", - "category": "Completion", - } - - conversation = [] - - satisfied, reason = asyncio.run( - analyze_consideration(conversation, consideration, self.project_root) - ) - - self.assertFalse(satisfied, "Should be not satisfied") - self.assertIsNotNone(reason, "Reason should not be None when check fails") - self.assertIn("incomplete", reason.lower(), "Reason should mention incomplete tasks") - - @patch("claude_power_steering.CLAUDE_SDK_AVAILABLE", True) - @patch("claude_power_steering.query") - def test_reason_truncated_to_200_chars(self, mock_query): - """Test that reason is truncated to 200 characters.""" - from claude_power_steering import analyze_consideration - - long_reason = "NOT SATISFIED: " + ("A" * 300) # 313 chars total - - async def mock_response(*args, **kwargs): - class MockMessage: - text = long_reason - - yield MockMessage() - - mock_query.return_value = mock_response() - - consideration = { - "id": "test_check", - "question": "Test?", - "category": "Test", - } - - conversation = [] - - satisfied, reason = asyncio.run( - analyze_consideration(conversation, consideration, self.project_root) - ) - - self.assertFalse(satisfied) - self.assertIsNotNone(reason) - self.assertLessEqual(len(reason), 200, "Reason should be truncated to 200 chars") - - @patch("claude_power_steering.CLAUDE_SDK_AVAILABLE", True) - @patch("claude_power_steering.query") - def test_reason_none_when_check_passes(self, mock_query): - """Test that reason is None when check passes.""" - from claude_power_steering import analyze_consideration - - async def mock_response(*args, **kwargs): - class MockMessage: - text = "SATISFIED: All tests passed successfully" - - yield MockMessage() - - mock_query.return_value = mock_response() - - consideration = { - "id": "local_testing", - "question": "Were tests run?", - "category": "Testing", - } - - conversation = [] - - satisfied, reason = asyncio.run( - analyze_consideration(conversation, consideration, self.project_root) - ) - - self.assertTrue(satisfied, "Should be satisfied") - self.assertIsNone(reason, "Reason should be None when check passes") - - -class TestBug4FinalGuidanceGeneration(unittest.TestCase): - """Tests for Bug #4: Final guidance generation. - - Bug: No function to generate final guidance using SDK. - Fix: Add generate_final_guidance() function with SDK integration. - """ - - def setUp(self): - """Set up test fixtures.""" - self.temp_dir = tempfile.mkdtemp() - self.project_root = Path(self.temp_dir) - - (self.project_root / ".claude" / "tools" / "amplihack").mkdir(parents=True, exist_ok=True) - - def tearDown(self): - """Clean up test fixtures.""" - import shutil - - shutil.rmtree(self.temp_dir) - - def test_generate_final_guidance_function_exists(self): - """Test that generate_final_guidance() function exists in claude_power_steering.""" - try: - from claude_power_steering import generate_final_guidance - - self.assertTrue(callable(generate_final_guidance), "Should be callable") - except ImportError: - self.fail("generate_final_guidance function should exist") - - @patch("claude_power_steering.CLAUDE_SDK_AVAILABLE", True) - @patch("claude_power_steering.query") - def test_generate_final_guidance_calls_sdk(self, mock_query): - """Test that generate_final_guidance calls SDK with failed checks and reasons.""" - from claude_power_steering import generate_final_guidance - - # Mock SDK response - async def mock_response(*args, **kwargs): - class MockMessage: - text = "Complete the remaining TODOs and run tests locally." - - yield MockMessage() - - mock_query.return_value = mock_response() - - failed_checks = [ - ("todos_complete", "3 tasks remain incomplete"), - ("local_testing", "No test execution found"), - ] - - conversation = [] - - guidance = asyncio.run( - generate_final_guidance(failed_checks, conversation, self.project_root) - ) - - # Verify SDK was called - self.assertTrue(mock_query.called, "SDK query should be called") - self.assertIsInstance(guidance, str, "Should return string") - self.assertGreater(len(guidance), 0, "Guidance should not be empty") - - @patch("claude_power_steering.CLAUDE_SDK_AVAILABLE", True) - @patch("claude_power_steering.query") - def test_generate_final_guidance_includes_failure_context(self, mock_query): - """Test that generate_final_guidance includes actual failure context in prompt.""" - from claude_power_steering import generate_final_guidance - - async def mock_response(*args, **kwargs): - class MockMessage: - text = "Fix the failing checks" - - yield MockMessage() - - mock_query.return_value = mock_response() - - failed_checks = [ - ("ci_status", "CI checks failing on test_module.py"), - ] - - conversation = [] - - asyncio.run(generate_final_guidance(failed_checks, conversation, self.project_root)) - - # Verify the prompt passed to SDK includes the failure info - call_args = mock_query.call_args - prompt = call_args[1]["prompt"] # Get keyword argument 'prompt' - - self.assertIn("ci_status", prompt, "Prompt should include check ID") - self.assertIn("failing", prompt.lower(), "Prompt should include failure reason") - - @patch("claude_power_steering.CLAUDE_SDK_AVAILABLE", False) - def test_generate_final_guidance_fallback_when_sdk_unavailable(self): - """Test that generate_final_guidance uses template fallback when SDK unavailable.""" - from claude_power_steering import generate_final_guidance - - failed_checks = [ - ("todos_complete", "2 tasks incomplete"), - ("local_testing", "No tests run"), - ] - - conversation = [] - - guidance = asyncio.run( - generate_final_guidance(failed_checks, conversation, self.project_root) - ) - - # Should still return guidance (template-based) - self.assertIsInstance(guidance, str, "Should return string even without SDK") - self.assertGreater(len(guidance), 0, "Should have fallback guidance") - - # Template should mention the checks - self.assertIn("todos_complete", guidance, "Should mention failed check") - self.assertIn("local_testing", guidance, "Should mention failed check") - - @patch("claude_power_steering.CLAUDE_SDK_AVAILABLE", True) - @patch("claude_power_steering.query") - def test_generate_final_guidance_is_specific_not_generic(self, mock_query): - """Test that guidance is specific to actual failures, not generic advice.""" - from claude_power_steering import generate_final_guidance - - async def mock_response(*args, **kwargs): - class MockMessage: - text = "You need to complete the 3 incomplete TODOs and run pytest locally." - - yield MockMessage() - - mock_query.return_value = mock_response() - - failed_checks = [ - ("todos_complete", "3 incomplete tasks"), - ("local_testing", "pytest not run"), - ] - - conversation = [] - - guidance = asyncio.run( - generate_final_guidance(failed_checks, conversation, self.project_root) - ) - - # Guidance should be specific, not generic - self.assertIn("3", guidance, "Should mention specific number from failure reason") - self.assertIn("pytest", guidance.lower(), "Should mention specific tool from reason") - - @patch("claude_power_steering.CLAUDE_SDK_AVAILABLE", True) - @patch("claude_power_steering.query") - def test_generate_final_guidance_sdk_failure_uses_template(self, mock_query): - """Test that SDK failure falls back to template guidance.""" - from claude_power_steering import generate_final_guidance - - # Make SDK raise exception - async def failing_response(*args, **kwargs): - raise RuntimeError("SDK timeout") - - mock_query.side_effect = failing_response - - failed_checks = [ - ("ci_status", "CI failing"), - ] - - conversation = [] - - guidance = asyncio.run( - generate_final_guidance(failed_checks, conversation, self.project_root) - ) - - # Should fall back to template - self.assertIsInstance(guidance, str, "Should return string") - self.assertGreater(len(guidance), 0, "Should have fallback guidance") - self.assertIn("ci_status", guidance, "Template should mention failed check") - - -class TestBug3Integration(unittest.TestCase): - """Integration tests for Bug #3: Failure reason extraction at call site. - - Tests the single call site update at line 2037 in power_steering_checker.py. - """ - - def setUp(self): - """Set up test fixtures.""" - self.temp_dir = tempfile.mkdtemp() - self.project_root = Path(self.temp_dir) - - (self.project_root / ".claude" / "tools" / "amplihack").mkdir(parents=True, exist_ok=True) - (self.project_root / ".claude" / "runtime" / "power-steering").mkdir( - parents=True, exist_ok=True - ) - - config_path = ( - self.project_root / ".claude" / "tools" / "amplihack" / ".power_steering_config" - ) - config = {"enabled": True, "version": "1.0.0", "phase": 1} - config_path.write_text(json.dumps(config, indent=2)) - - def tearDown(self): - """Clean up test fixtures.""" - import shutil - - shutil.rmtree(self.temp_dir) - - @patch("power_steering_checker.SDK_AVAILABLE", True) - @patch("power_steering_checker.analyze_consideration") - def test_call_site_unpacks_tuple_correctly(self, mock_analyze): - """Test that call site at line 2037 correctly unpacks (bool, str) tuple.""" - - # Mock SDK to return tuple - async def mock_analyze_tuple(*args, **kwargs): - return (False, "Tests were not executed") - - mock_analyze.side_effect = mock_analyze_tuple - - checker = PowerSteeringChecker(self.project_root) - - consideration = { - "id": "local_testing", - "question": "Were tests run?", - "category": "Testing", - "severity": "blocker", - } - - transcript = [] - - # Should not crash when unpacking tuple - result = asyncio.run( - checker._check_single_consideration_async(consideration, transcript, "test_session") - ) - - self.assertIsNotNone(result, "Should return result") - self.assertFalse(result.satisfied, "Should capture satisfied=False") - self.assertIn("not executed", result.reason, "Should capture reason string") - - @patch("power_steering_checker.SDK_AVAILABLE", True) - @patch("power_steering_checker.analyze_consideration") - def test_call_site_handles_none_reason(self, mock_analyze): - """Test that call site handles None reason when check passes.""" - - async def mock_analyze_tuple(*args, **kwargs): - return (True, None) - - mock_analyze.side_effect = mock_analyze_tuple - - checker = PowerSteeringChecker(self.project_root) - - consideration = { - "id": "ci_status", - "question": "Is CI passing?", - "category": "CI", - "severity": "blocker", - } - - transcript = [] - - result = asyncio.run( - checker._check_single_consideration_async(consideration, transcript, "test_session") - ) - - self.assertTrue(result.satisfied, "Should be satisfied") - # Reason should use default when None returned - self.assertIsInstance(result.reason, str, "Should have string reason even when None") - - -if __name__ == "__main__": - unittest.main() diff --git a/amplifier-bundle/tools/amplihack/hooks/tests/test_issue_1882_power_steering_infinite_loop.py b/amplifier-bundle/tools/amplihack/hooks/tests/test_issue_1882_power_steering_infinite_loop.py deleted file mode 100644 index 343edd9fd..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/tests/test_issue_1882_power_steering_infinite_loop.py +++ /dev/null @@ -1,717 +0,0 @@ -#!/usr/bin/env python3 -""" -Comprehensive failing tests for Issue #1882 (Power Steering Infinite Loop). - -These tests follow TDD methodology - they MUST FAIL before the fix is implemented -and PASS after the fix is applied. - -Test Coverage: -1. Reproduction Tests (MUST FAIL before fix) -2. Monotonicity Tests (counter never decreases) -3. Atomic Write Tests (fsync, verification, retry) -4. Infinite Loop Detection Tests -5. Edge Cases (filesystem errors, corrupted state) - -Testing Pyramid: 60% unit, 30% integration, 10% E2E -""" - -import json -import sys -from pathlib import Path -from unittest.mock import MagicMock, Mock, patch - -import pytest - -# Add parent directory to path for imports -sys.path.insert(0, str(Path(__file__).parent.parent)) - -from power_steering_state import ( - FailureEvidence, - PowerSteeringTurnState, - TurnStateManager, -) - - -class TestIssue1882Reproduction: - """Reproduction tests that MUST FAIL before fix is applied. - - These tests reproduce the exact bug: counter resets from 5 → 0 - instead of incrementing to 6. - """ - - def test_counter_increments_from_5_to_6_not_reset_to_0(self, tmp_path): - """MUST FAIL: Counter should increment 5 → 6, not reset to 0. - - This reproduces the core bug from Issue #1882 where the counter - mysteriously resets to 0 instead of incrementing. - - Expected behavior: - - Load state with turn_count=5 - - Increment turn - - Save state - - Load state again - - turn_count should be 6, NOT 0 - - Current behavior (BUG): - - turn_count resets to 0 (bug in save/load cycle) - """ - manager = TurnStateManager(tmp_path, "test_session") - - # Create initial state with turn_count=5 - state = PowerSteeringTurnState(session_id="test_session", turn_count=5) - manager.save_state(state) - - # Load and increment - loaded_state = manager.load_state() - assert loaded_state.turn_count == 5, "Initial load should have turn_count=5" - - incremented_state = manager.increment_turn(loaded_state) - assert incremented_state.turn_count == 6, "After increment should have turn_count=6" - - manager.save_state(incremented_state) - - # Load again and verify persistence - reloaded_state = manager.load_state() - - # THIS SHOULD PASS BUT CURRENTLY FAILS (bug) - assert reloaded_state.turn_count == 6, ( - "Counter MUST persist as 6, not reset to 0. This is the core bug from Issue #1882." - ) - - def test_state_persists_across_multiple_write_read_cycles(self, tmp_path): - """MUST FAIL: State should persist correctly across cycles. - - Tests that state doesn't get corrupted or reset across multiple - save/load cycles (write → read → write → read). - """ - manager = TurnStateManager(tmp_path, "test_session") - - # Cycle 1: write turn_count=3 - state1 = PowerSteeringTurnState(session_id="test_session", turn_count=3) - manager.save_state(state1) - - loaded1 = manager.load_state() - assert loaded1.turn_count == 3 - - # Cycle 2: increment to 4, write - state2 = manager.increment_turn(loaded1) - manager.save_state(state2) - - loaded2 = manager.load_state() - assert loaded2.turn_count == 4 - - # Cycle 3: increment to 5, write - state3 = manager.increment_turn(loaded2) - manager.save_state(state3) - - loaded3 = manager.load_state() - - # THIS SHOULD PASS BUT CURRENTLY FAILS - assert loaded3.turn_count == 5, ( - "Counter should persist correctly across multiple cycles. " - "If this fails, there's a bug in save/load persistence." - ) - - def test_no_infinite_loop_in_100_consecutive_calls(self, tmp_path): - """MUST FAIL: Should not get stuck in infinite loop. - - Simulates 100 consecutive power steering checks to detect: - - Counter stall (same value repeated) - - Oscillation (A → B → A → B pattern) - - Infinite loop condition - """ - manager = TurnStateManager(tmp_path, "test_session") - - state = PowerSteeringTurnState(session_id="test_session") - previous_values = [] - - for i in range(100): - state = manager.increment_turn(state) - manager.save_state(state) - - reloaded = manager.load_state() - previous_values.append(reloaded.turn_count) - - # Check for stall (same value repeated 10+ times) - if len(previous_values) >= 10: - last_10 = previous_values[-10:] - if len(set(last_10)) == 1: - pytest.fail( - f"Counter STALLED at {last_10[0]} for 10 consecutive calls. " - f"This indicates an infinite loop condition." - ) - - # Check for oscillation (A → B → A → B pattern) - if len(previous_values) >= 4: - last_4 = previous_values[-4:] - if last_4[0] == last_4[2] and last_4[1] == last_4[3] and last_4[0] != last_4[1]: - pytest.fail( - f"Counter OSCILLATING between {last_4[0]} and {last_4[1]}. " - f"This indicates an infinite loop condition." - ) - - # Verify counter reached 100 (not stuck) - final_state = manager.load_state() - assert final_state.turn_count == 100, ( - f"After 100 increments, counter should be 100, not {final_state.turn_count}. " - f"History: {previous_values[-20:]}" - ) - - -class TestMonotonicityValidation: - """Tests for monotonicity requirement: counter NEVER decreases. - - REQ-1: Counter must increment reliably - Architect recommendation: Monotonicity check - """ - - def test_counter_never_decreases(self, tmp_path): - """Counter should warn but not block on monotonicity violation (fail-open design).""" - manager = TurnStateManager(tmp_path, "test_session") - - state = PowerSteeringTurnState(session_id="test_session", turn_count=10) - manager.save_state(state) - - # Attempt to save state with LOWER turn_count - regressed_state = PowerSteeringTurnState(session_id="test_session", turn_count=5) - - # Should NOT raise - fail-open design warns but continues - # No exception should be raised - manager.save_state(regressed_state) - - # Verify state was saved (fail-open) - loaded = manager.load_state() - assert loaded.turn_count == 5, ( - "State should be saved despite monotonicity violation (fail-open)" - ) - - def test_detect_counter_regression_from_previous_value(self, tmp_path): - """Should warn on regression but continue (fail-open design).""" - manager = TurnStateManager(tmp_path, "test_session") - - # Save state with turn_count=20 - state1 = PowerSteeringTurnState(session_id="test_session", turn_count=20) - manager.save_state(state1) - - # Try to save state with turn_count=15 (regression) - state2 = PowerSteeringTurnState(session_id="test_session", turn_count=15) - - # Should NOT raise - fail-open design warns but continues - manager.save_state(state2) - - # Verify state was saved despite regression - loaded = manager.load_state() - assert loaded.turn_count == 15, "State should be saved despite regression (fail-open)" - - def test_track_previous_state_for_validation(self, tmp_path): - """Manager should track previous state to detect violations.""" - manager = TurnStateManager(tmp_path, "test_session") - - # Initial state - state1 = PowerSteeringTurnState(session_id="test_session", turn_count=5) - manager.save_state(state1) - - # Should have tracked previous value - # THIS ASSUMES manager has `_previous_turn_count` attribute (needs to be added) - assert hasattr(manager, "_previous_turn_count"), ( - "Manager should track previous turn_count for monotonicity validation" - ) - assert manager._previous_turn_count == 5 - - -class TestAtomicWriteEnhancements: - """Tests for atomic write requirements. - - REQ-3: Atomic counter increment with retry - Architect recommendation: fsync, verification read, retry logic - """ - - def test_fsync_called_on_save(self, tmp_path): - """save_state should call fsync() to ensure data is written to disk.""" - manager = TurnStateManager(tmp_path, "test_session") - state = PowerSteeringTurnState(session_id="test_session", turn_count=1) - - # Mock os.fsync to verify it's called - with patch("os.fsync") as mock_fsync: - manager.save_state(state) - - # THIS SHOULD PASS (after fix adds fsync call) - assert mock_fsync.called, "fsync() MUST be called to ensure atomic write" - - def test_verification_read_after_write(self, tmp_path): - """Should verify state was written correctly by reading back.""" - manager = TurnStateManager(tmp_path, "test_session") - state = PowerSteeringTurnState(session_id="test_session", turn_count=42) - - # Mock to track read operations - original_read_text = Path.read_text - read_calls = [] - - def tracked_read_text(self, *args, **kwargs): - read_calls.append(str(self)) - return original_read_text(self, *args, **kwargs) - - with patch.object(Path, "read_text", tracked_read_text): - manager.save_state(state) - - # Verification read should happen AFTER write - # THIS ASSUMES manager does verification read (needs to be added) - state_file = manager.get_state_file_path() - assert str(state_file) in read_calls, ( - "State file MUST be read after write for verification" - ) - - def test_retry_on_write_failure(self, tmp_path): - """Should retry write operation on failure.""" - manager = TurnStateManager(tmp_path, "test_session") - state = PowerSteeringTurnState(session_id="test_session", turn_count=1) - - call_count = 0 - - def failing_write(*args, **kwargs): - nonlocal call_count - call_count += 1 - if call_count < 3: - raise OSError("Simulated write failure") - # Success on 3rd attempt - return MagicMock() - - with patch.object(Path, "write_text", side_effect=failing_write): - # THIS SHOULD SUCCEED after retries (needs retry logic) - manager.save_state(state) - - # Verify retry happened - assert call_count == 3, f"Should retry write operations, got {call_count} attempts" - - def test_verify_both_temp_file_and_final_path(self, tmp_path): - """Verification should check BOTH temp file AND final path. - - Architect recommendation: Verify both temp file AND final path. - """ - manager = TurnStateManager(tmp_path, "test_session") - state = PowerSteeringTurnState(session_id="test_session", turn_count=1) - - verified_paths = [] - original_exists = Path.exists - - def track_exists(self): - verified_paths.append(str(self)) - return original_exists(self) - - with patch.object(Path, "exists", track_exists): - manager.save_state(state) - - # Should verify both temp file and final path - # THIS ASSUMES verification happens (needs to be added) - state_file = str(manager.get_state_file_path()) - temp_files = [p for p in verified_paths if "turn_state_" in p and ".tmp" in p] - - assert len(temp_files) > 0, "Should verify temp file exists" - assert state_file in verified_paths, "Should verify final path exists" - - -class TestInfiniteLoopDetection: - """Tests for infinite loop detection capabilities. - - Architect recommendation: Auto-detect stall, oscillation, high failure rate. - """ - - def test_detect_counter_stall(self, tmp_path): - """Should detect when counter stays at same value (stall).""" - manager = TurnStateManager(tmp_path, "test_session") - - # Simulate counter stuck at 5 for 10 iterations - state = PowerSteeringTurnState(session_id="test_session", turn_count=5) - - for _ in range(10): - manager.save_state(state) - # Don't increment - simulate stall - - # THIS SHOULD DETECT STALL (needs stall detection) - # Manager should track write operations and detect repeated same-value writes - diagnostics = manager.get_diagnostics() # Needs to be implemented - - assert diagnostics["stall_detected"], "Should detect counter stall" - assert diagnostics["stall_value"] == 5 - assert diagnostics["stall_count"] >= 10 - - def test_detect_oscillation_pattern(self, tmp_path): - """Should detect A → B → A → B oscillation pattern.""" - manager = TurnStateManager(tmp_path, "test_session") - - # Simulate oscillation between 3 and 4 - for i in range(20): - turn_count = 3 if i % 2 == 0 else 4 - state = PowerSteeringTurnState(session_id="test_session", turn_count=turn_count) - manager.save_state(state) - - # THIS SHOULD DETECT OSCILLATION (needs oscillation detection) - diagnostics = manager.get_diagnostics() - - assert diagnostics["oscillation_detected"], "Should detect counter oscillation" - assert set(diagnostics["oscillation_values"]) == {3, 4} - - def test_detect_high_write_failure_rate(self, tmp_path): - """Should detect when write failure rate exceeds 30%.""" - manager = TurnStateManager(tmp_path, "test_session") - state = PowerSteeringTurnState(session_id="test_session", turn_count=1) - - failure_count = 0 - - def intermittent_failure(*args, **kwargs): - nonlocal failure_count - failure_count += 1 - if failure_count % 2 == 0: # 50% failure rate - raise OSError("Write failure") - return MagicMock() - - with patch.object(Path, "write_text", side_effect=intermittent_failure): - # Try 10 save operations - for i in range(10): - try: - manager.save_state(state) - except OSError: - pass # Expected failures - - # THIS SHOULD DETECT HIGH FAILURE RATE (needs failure tracking) - diagnostics = manager.get_diagnostics() - - assert diagnostics["write_failure_rate"] > 0.30, "Should detect write failure rate > 30%" - assert diagnostics["high_failure_rate_alert"], "Should alert on high write failure rate" - - -class TestEdgeCases: - """Edge case tests for filesystem errors and corruption. - - REQ-4: Robust state management with recovery - """ - - def test_handle_filesystem_full(self, tmp_path): - """Should handle ENOSPC (filesystem full) gracefully.""" - manager = TurnStateManager(tmp_path, "test_session") - state = PowerSteeringTurnState(session_id="test_session", turn_count=1) - - def raise_enospc(*args, **kwargs): - error = OSError("No space left on device") - error.errno = 28 # ENOSPC - raise error - - with patch.object(Path, "write_text", side_effect=raise_enospc): - # Should handle gracefully (fail-open) - # THIS SHOULD NOT RAISE (fail-open design) - manager.save_state(state) - - # Should log error for diagnostics - # (Logging not tested here, but error should be recoverable) - - def test_handle_permission_denied(self, tmp_path): - """Should handle EACCES (permission denied) gracefully.""" - manager = TurnStateManager(tmp_path, "test_session") - state = PowerSteeringTurnState(session_id="test_session", turn_count=1) - - def raise_eacces(*args, **kwargs): - error = OSError("Permission denied") - error.errno = 13 # EACCES - raise error - - with patch.object(Path, "write_text", side_effect=raise_eacces): - # Should handle gracefully (fail-open) - manager.save_state(state) - - def test_handle_corrupted_state_file(self, tmp_path): - """Should recover from corrupted state file.""" - manager = TurnStateManager(tmp_path, "test_session") - - # Create corrupted state file - state_file = manager.get_state_file_path() - state_file.parent.mkdir(parents=True, exist_ok=True) - state_file.write_text("CORRUPTED JSON{{{") - - # Should load empty state (fail-open) - loaded_state = manager.load_state() - - assert loaded_state.turn_count == 0, "Should return empty state on corruption" - assert loaded_state.session_id == "test_session" - - def test_handle_partial_write(self, tmp_path): - """Should detect and recover from partial write.""" - manager = TurnStateManager(tmp_path, "test_session") - state = PowerSteeringTurnState(session_id="test_session", turn_count=42) - - # Simulate partial write (incomplete JSON) - def partial_write(self, content, *args, **kwargs): - # Write only half the content - partial_content = content[: len(content) // 2] - Path.write_text(self, partial_content) - - with patch.object(Path, "write_text", partial_write): - manager.save_state(state) - - # Should detect corrupted/partial write - # THIS ASSUMES verification read catches partial write - loaded_state = manager.load_state() - - # If verification works, should have retried and written correctly - # OR should fail-open and return empty state - assert loaded_state is not None, "Should handle partial write gracefully" - - def test_atomic_rename_failure_recovery(self, tmp_path): - """Should recover if atomic rename fails.""" - manager = TurnStateManager(tmp_path, "test_session") - state = PowerSteeringTurnState(session_id="test_session", turn_count=1) - - def failing_rename(*args, **kwargs): - raise OSError("Rename failed") - - with patch("os.rename", side_effect=failing_rename): - # Should handle gracefully - manager.save_state(state) - - # Temp file should be cleaned up (no orphaned files) - temp_files = list(tmp_path.rglob("turn_state_*.tmp")) - assert len(temp_files) == 0, "Should clean up temp files on rename failure" - - -class TestMessageCustomization: - """Tests for REQ-2: Messages customized based on check results. - - Not directly related to infinite loop bug, but part of overall fix. - """ - - def test_message_includes_turn_count(self, tmp_path): - """Power steering message should include current turn count.""" - manager = TurnStateManager(tmp_path, "test_session") - state = PowerSteeringTurnState(session_id="test_session", turn_count=5) - - # THIS ASSUMES message generation exists - message = manager.generate_power_steering_message(state) - - assert "5" in message or "five" in message.lower(), "Message should include turn count" - - def test_message_customized_after_first_block(self, tmp_path): - """Message should change after first power steering block.""" - manager = TurnStateManager(tmp_path, "test_session") - - # First block - state1 = PowerSteeringTurnState(session_id="test_session", consecutive_blocks=1) - message1 = manager.generate_power_steering_message(state1) - - # Second block - state2 = PowerSteeringTurnState(session_id="test_session", consecutive_blocks=2) - message2 = manager.generate_power_steering_message(state2) - - # Messages should be different - assert message1 != message2, "Message should be customized based on consecutive blocks" - - -class TestDiagnosticLogging: - """Tests for Phase 1: Instrumentation (diagnostic logging). - - Architect recommendation: .jsonl logging for debugging. - """ - - def test_diagnostic_log_created(self, tmp_path): - """Should create diagnostic log file in .jsonl format.""" - manager = TurnStateManager(tmp_path, "test_session") - state = PowerSteeringTurnState(session_id="test_session", turn_count=1) - - manager.save_state(state) - - # THIS ASSUMES diagnostic logging is implemented - log_file = ( - tmp_path - / ".claude" - / "runtime" - / "power-steering" - / "test_session" - / "diagnostic.jsonl" - ) - - assert log_file.exists(), "Should create diagnostic log file" - - def test_diagnostic_log_includes_write_events(self, tmp_path): - """Diagnostic log should include write events.""" - manager = TurnStateManager(tmp_path, "test_session") - state = PowerSteeringTurnState(session_id="test_session", turn_count=1) - - manager.save_state(state) - - log_file = ( - tmp_path - / ".claude" - / "runtime" - / "power-steering" - / "test_session" - / "diagnostic.jsonl" - ) - - if log_file.exists(): - content = log_file.read_text() - log_entries = [json.loads(line) for line in content.strip().split("\n")] - - # Should have write event - write_events = [e for e in log_entries if e.get("event") == "state_write"] - assert len(write_events) > 0, "Should log state write events" - - def test_diagnostic_log_includes_read_events(self, tmp_path): - """Diagnostic log should include read events.""" - manager = TurnStateManager(tmp_path, "test_session") - - # Create state - state = PowerSteeringTurnState(session_id="test_session", turn_count=1) - manager.save_state(state) - - # Read state - manager.load_state() - - log_file = ( - tmp_path - / ".claude" - / "runtime" - / "power-steering" - / "test_session" - / "diagnostic.jsonl" - ) - - if log_file.exists(): - content = log_file.read_text() - log_entries = [json.loads(line) for line in content.strip().split("\n")] - - # Should have read event - read_events = [e for e in log_entries if e.get("event") == "state_read"] - assert len(read_events) > 0, "Should log state read events" - - -# ============================================================================ -# INTEGRATION TESTS (30% of test pyramid) -# ============================================================================ - - -class TestIntegrationSaveLoadCycles: - """Integration tests for complete save/load cycles.""" - - def test_full_power_steering_lifecycle(self, tmp_path): - """Test complete power steering lifecycle with multiple blocks.""" - manager = TurnStateManager(tmp_path, "test_session") - - # Initial state - state = manager.load_state() - assert state.turn_count == 0 - assert state.consecutive_blocks == 0 - - # Record first block - failed_evidence = [ - FailureEvidence( - consideration_id="todos_complete", - reason="3 TODOs incomplete", - ) - ] - state = manager.record_block_with_evidence(state, failed_evidence, transcript_length=10) - manager.save_state(state) - - # Load and verify - reloaded = manager.load_state() - assert reloaded.consecutive_blocks == 1 - assert len(reloaded.block_history) == 1 - - # Record second block - state = manager.record_block_with_evidence(reloaded, failed_evidence, transcript_length=20) - manager.save_state(state) - - # Final verification - final_state = manager.load_state() - assert final_state.consecutive_blocks == 2 - assert len(final_state.block_history) == 2 - - def test_state_recovery_after_crash_simulation(self, tmp_path): - """Simulate crash during write and verify recovery.""" - manager = TurnStateManager(tmp_path, "test_session") - - # Write initial state - state1 = PowerSteeringTurnState(session_id="test_session", turn_count=5) - manager.save_state(state1) - - # Simulate crash during second write (by raising exception) - state2 = PowerSteeringTurnState(session_id="test_session", turn_count=6) - - with patch("os.rename", side_effect=OSError("Simulated crash")): - try: - manager.save_state(state2) - except OSError: - pass # Expected - - # Load state - should have first state (atomic write preserved it) - recovered = manager.load_state() - - # THIS VERIFIES ATOMIC WRITE PROTECTION - assert recovered.turn_count == 5, "Atomic write should preserve previous state on crash" - - -# ============================================================================ -# E2E TESTS (10% of test pyramid) -# ============================================================================ - - -class TestEndToEndWorkflows: - """End-to-end tests for complete workflows.""" - - def test_complete_power_steering_session(self, tmp_path): - """Test complete power steering session from start to finish.""" - manager = TurnStateManager(tmp_path, "test_session") - - # Session starts - state = manager.load_state() - - # Turn 1: Increment and save - state = manager.increment_turn(state) - manager.save_state(state) - - # Block 1: Record failed checks - evidence1 = [ - FailureEvidence(consideration_id="todos", reason="Incomplete"), - ] - state = manager.record_block_with_evidence(state, evidence1, 10) - manager.save_state(state) - - # Turn 2: Increment - state = manager.increment_turn(state) - manager.save_state(state) - - # Block 2: Record more failed checks - evidence2 = [ - FailureEvidence(consideration_id="tests", reason="Not run"), - ] - state = manager.record_block_with_evidence(state, evidence2, 20) - manager.save_state(state) - - # Final verification - final = manager.load_state() - - assert final.turn_count == 2, "Should have 2 turns" - assert final.consecutive_blocks == 2, "Should have 2 consecutive blocks" - assert len(final.block_history) == 2, "Should have 2 blocks in history" - - # Verify state file exists and is valid JSON - state_file = manager.get_state_file_path() - assert state_file.exists() - - content = json.loads(state_file.read_text()) - assert content["turn_count"] == 2 - assert content["consecutive_blocks"] == 2 - - -# ============================================================================ -# FIXTURES -# ============================================================================ - - -@pytest.fixture -def tmp_path(tmp_path_factory): - """Create temporary directory for test isolation.""" - return tmp_path_factory.mktemp("test_power_steering") - - -@pytest.fixture -def mock_log(): - """Mock logging function.""" - return Mock() diff --git a/amplifier-bundle/tools/amplihack/hooks/tests/test_power_steering_checker.py b/amplifier-bundle/tools/amplihack/hooks/tests/test_power_steering_checker.py deleted file mode 100644 index 2ea1df7b7..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/tests/test_power_steering_checker.py +++ /dev/null @@ -1,742 +0,0 @@ -#!/usr/bin/env python3 -""" -Unit tests for PowerSteeringChecker module. - -Tests Phase 1 (MVP) functionality: -- Configuration loading -- Semaphore handling -- Q&A detection -- Top 5 critical checkers -- Continuation prompt generation -- Summary generation -- Fail-open error handling -""" - -import json -import sys -import tempfile -import unittest -from pathlib import Path - -sys.path.insert(0, str(Path(__file__).parent.parent)) - -from power_steering_checker import ( - CheckerResult, - ConsiderationAnalysis, - PowerSteeringChecker, -) - - -class TestPowerSteeringChecker(unittest.TestCase): - """Tests for PowerSteeringChecker class.""" - - def setUp(self): - """Set up test fixtures.""" - # Create temporary directory for testing - self.temp_dir = tempfile.mkdtemp() - self.project_root = Path(self.temp_dir) - - # Create directory structure - (self.project_root / ".claude" / "tools" / "amplihack").mkdir(parents=True, exist_ok=True) - (self.project_root / ".claude" / "runtime" / "power-steering").mkdir( - parents=True, exist_ok=True - ) - - # Create default config - config_path = ( - self.project_root / ".claude" / "tools" / "amplihack" / ".power_steering_config" - ) - config = { - "enabled": True, - "version": "1.0.0", - "phase": 1, - "checkers_enabled": { - "todos_complete": True, - "dev_workflow_complete": True, - "philosophy_compliance": True, - "local_testing": True, - "ci_status": True, - }, - } - config_path.write_text(json.dumps(config, indent=2)) - - def tearDown(self): - """Clean up test fixtures.""" - import shutil - - shutil.rmtree(self.temp_dir) - - def test_initialization(self): - """Test PowerSteeringChecker initialization.""" - checker = PowerSteeringChecker(self.project_root) - - self.assertEqual(checker.project_root, self.project_root) - self.assertTrue(checker.runtime_dir.exists()) - self.assertIsInstance(checker.config, dict) - self.assertTrue(checker.config.get("enabled")) - - def test_config_loading_with_defaults(self): - """Test config loading with missing file uses defaults.""" - # Remove config file - config_path = ( - self.project_root / ".claude" / "tools" / "amplihack" / ".power_steering_config" - ) - config_path.unlink() - - checker = PowerSteeringChecker(self.project_root) - - # Should use defaults (enabled by default per user requirement) - self.assertTrue(checker.config.get("enabled")) # Default is enabled - self.assertEqual(checker.config.get("phase"), 1) - - def test_is_disabled_by_config(self): - """Test _is_disabled checks config file.""" - # Set enabled to false - config_path = ( - self.project_root / ".claude" / "tools" / "amplihack" / ".power_steering_config" - ) - config = json.loads(config_path.read_text()) - config["enabled"] = False - config_path.write_text(json.dumps(config)) - - checker = PowerSteeringChecker(self.project_root) - self.assertTrue(checker._is_disabled()) - - def test_is_disabled_by_semaphore(self): - """Test _is_disabled checks semaphore file.""" - checker = PowerSteeringChecker(self.project_root) - - # Create semaphore - disabled_file = checker.runtime_dir / ".disabled" - disabled_file.touch() - - self.assertTrue(checker._is_disabled()) - - def test_is_disabled_by_env_var(self): - """Test _is_disabled checks environment variable.""" - import os - - os.environ["AMPLIHACK_SKIP_POWER_STEERING"] = "1" - - try: - checker = PowerSteeringChecker(self.project_root) - self.assertTrue(checker._is_disabled()) - finally: - del os.environ["AMPLIHACK_SKIP_POWER_STEERING"] - - def test_semaphore_handling(self): - """Test semaphore creation and detection.""" - checker = PowerSteeringChecker(self.project_root) - session_id = "test_session_123" - - # Initially not marked complete - self.assertFalse(checker._already_ran(session_id)) - - # Mark complete - checker._mark_complete(session_id) - - # Now should be marked complete - self.assertTrue(checker._already_ran(session_id)) - - def test_qa_session_detection_no_tools(self): - """Test Q&A session detection with no tool uses.""" - checker = PowerSteeringChecker(self.project_root) - - transcript = [ - {"type": "user", "message": {"content": "What is Python?"}}, - { - "type": "assistant", - "message": {"content": [{"type": "text", "text": "Python is..."}]}, - }, - {"type": "user", "message": {"content": "How do I use it?"}}, - {"type": "assistant", "message": {"content": [{"type": "text", "text": "You can..."}]}}, - ] - - self.assertTrue(checker._is_qa_session(transcript)) - - def test_qa_session_detection_with_tools(self): - """Test Q&A session detection with multiple tool uses.""" - checker = PowerSteeringChecker(self.project_root) - - # Session with 2 tool uses should NOT be Q&A - transcript = [ - {"type": "user", "message": {"content": "Create files"}}, - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Write", - "input": {"file_path": "/test1.py", "content": "..."}, - }, - { - "type": "tool_use", - "name": "Write", - "input": {"file_path": "/test2.py", "content": "..."}, - }, - ] - }, - }, - ] - - self.assertFalse(checker._is_qa_session(transcript)) - - def test_check_todos_complete_no_todos(self): - """Test _check_todos_complete with no TodoWrite calls.""" - checker = PowerSteeringChecker(self.project_root) - - transcript = [ - {"type": "user", "message": {"content": "Hello"}}, - {"type": "assistant", "message": {"content": [{"type": "text", "text": "Hi"}]}}, - ] - - result = checker._check_todos_complete(transcript, "test_session") - self.assertTrue(result) # No todos = satisfied - - def test_check_todos_complete_all_completed(self): - """Test _check_todos_complete with all todos completed.""" - checker = PowerSteeringChecker(self.project_root) - - transcript = [ - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "TodoWrite", - "input": { - "todos": [ - { - "content": "Task 1", - "status": "completed", - "activeForm": "Completing task 1", - }, - { - "content": "Task 2", - "status": "completed", - "activeForm": "Completing task 2", - }, - ] - }, - } - ] - }, - } - ] - - result = checker._check_todos_complete(transcript, "test_session") - self.assertTrue(result) - - def test_check_todos_complete_pending(self): - """Test _check_todos_complete with pending todos.""" - checker = PowerSteeringChecker(self.project_root) - - transcript = [ - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "TodoWrite", - "input": { - "todos": [ - { - "content": "Task 1", - "status": "completed", - "activeForm": "Completing task 1", - }, - { - "content": "Task 2", - "status": "pending", - "activeForm": "Working on task 2", - }, - ] - }, - } - ] - }, - } - ] - - result = checker._check_todos_complete(transcript, "test_session") - self.assertFalse(result) # Has pending todo - - def test_check_philosophy_compliance_clean_code(self): - """Test _check_philosophy_compliance with clean code.""" - checker = PowerSteeringChecker(self.project_root) - - transcript = [ - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Write", - "input": { - "file_path": "/test.py", - "content": 'def hello():\n return "world"', - }, - } - ] - }, - } - ] - - result = checker._check_philosophy_compliance(transcript, "test_session") - self.assertTrue(result) - - def test_check_philosophy_compliance_with_todo(self): - """Test _check_philosophy_compliance with TODO in code.""" - checker = PowerSteeringChecker(self.project_root) - - transcript = [ - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Write", - "input": { - "file_path": "/test.py", - "content": "def hello():\n # TODO: implement this\n pass", - }, - } - ] - }, - } - ] - - result = checker._check_philosophy_compliance(transcript, "test_session") - self.assertFalse(result) # Has TODO - - def test_check_local_testing_no_tests(self): - """Test _check_local_testing with no test execution.""" - checker = PowerSteeringChecker(self.project_root) - - transcript = [ - {"type": "user", "message": {"content": "Hello"}}, - {"type": "assistant", "message": {"content": [{"type": "text", "text": "Hi"}]}}, - ] - - result = checker._check_local_testing(transcript, "test_session") - self.assertFalse(result) # No tests run - - def test_continuation_prompt_generation(self): - """Test _generate_continuation_prompt with transcript containing incomplete todos.""" - checker = PowerSteeringChecker(self.project_root) - - analysis = ConsiderationAnalysis() - analysis.add_result( - CheckerResult( - consideration_id="todos_complete", - satisfied=False, - reason="Were all TODO items completed?", - severity="blocker", - ) - ) - - # Create transcript with incomplete todos to trigger the incomplete work section - transcript = [ - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "TodoWrite", - "input": { - "todos": [ - { - "content": "Fix the bug", - "status": "pending", - "activeForm": "Fixing bug", - }, - { - "content": "Add tests", - "status": "in_progress", - "activeForm": "Adding tests", - }, - ] - }, - } - ] - }, - } - ] - - prompt = checker._generate_continuation_prompt(analysis, transcript) - - self.assertIn("incomplete", prompt.lower()) - self.assertIn("TODO", prompt) - self.assertIn("Fix the bug", prompt) # Should show specific incomplete item - - def test_summary_generation(self): - """Test _generate_summary.""" - checker = PowerSteeringChecker(self.project_root) - - transcript = [] - analysis = ConsiderationAnalysis() - session_id = "test_session_123" - - summary = checker._generate_summary(transcript, analysis, session_id) - - self.assertIn(session_id, summary) - self.assertIn("complete", summary.lower()) - - def test_check_with_disabled(self): - """Test check() when power-steering is disabled.""" - # Disable power-steering - config_path = ( - self.project_root / ".claude" / "tools" / "amplihack" / ".power_steering_config" - ) - config = json.loads(config_path.read_text()) - config["enabled"] = False - config_path.write_text(json.dumps(config)) - - checker = PowerSteeringChecker(self.project_root) - - # Create dummy transcript - transcript_path = self.project_root / "transcript.jsonl" - transcript_path.write_text('{"type": "user", "message": {"content": "test"}}\n') - - result = checker.check(transcript_path, "test_session") - - self.assertEqual(result.decision, "approve") - self.assertIn("disabled", result.reasons) - - def test_check_with_already_ran(self): - """Test check() when already ran for session.""" - checker = PowerSteeringChecker(self.project_root) - session_id = "test_session_123" - - # Mark as already ran - checker._mark_complete(session_id) - - # Create dummy transcript - transcript_path = self.project_root / "transcript.jsonl" - transcript_path.write_text('{"type": "user", "message": {"content": "test"}}\n') - - result = checker.check(transcript_path, session_id) - - self.assertEqual(result.decision, "approve") - self.assertIn("already_ran", result.reasons) - - def test_fail_open_on_error(self): - """Test that errors result in fail-open approval.""" - checker = PowerSteeringChecker(self.project_root) - - # Use non-existent transcript path - transcript_path = self.project_root / "nonexistent.jsonl" - - result = checker.check(transcript_path, "test_session") - - # Should approve on error (fail-open) - self.assertEqual(result.decision, "approve") - self.assertIn("error", result.reasons[0].lower()) - - def test_format_results_text_all_checks_skipped(self): - """Test Issue #1744 Fix #1: Message when all checks skipped. - - Bug: Showed "ALL CHECKS PASSED (0 passed, 22 skipped)" - Fix: Should show "NO CHECKS APPLICABLE (22 skipped for session type)" - """ - checker = PowerSteeringChecker(self.project_root) - - # Create analysis with all checks skipped (empty results) - analysis = ConsiderationAnalysis() - # Don't add any results - simulates all checks skipped - - # Simulate we have 22 considerations but none evaluated - checker.considerations = [{"id": f"check_{i}", "category": "Test"} for i in range(22)] - - results_text = checker._format_results_text(analysis, "INFORMATIONAL") - - # Verify correct message - self.assertIn( - "NO CHECKS APPLICABLE", - results_text, - 'Should say "NO CHECKS APPLICABLE" not "ALL CHECKS PASSED"', - ) - self.assertNotIn( - "ALL CHECKS PASSED", - results_text, - 'Should NOT say "ALL CHECKS PASSED" when all skipped', - ) - # Should show "22 skipped" - self.assertIn( - "22 skipped", - results_text, - "Should show count of skipped checks", - ) - - def test_format_results_text_some_checks_passed(self): - """Test Issue #1744 Fix #1: Message when some checks passed. - - Bug: Would say "ALL CHECKS PASSED" even with 0 passed - Fix: Only say "ALL CHECKS PASSED" when total_passed > 0 - """ - checker = PowerSteeringChecker(self.project_root) - - # Create analysis with some checks passed - analysis = ConsiderationAnalysis() - analysis.add_result( - CheckerResult( - consideration_id="test_check1", - satisfied=True, - reason="Passed", - severity="blocker", - ) - ) - analysis.add_result( - CheckerResult( - consideration_id="test_check2", - satisfied=True, - reason="Passed", - severity="blocker", - ) - ) - - # Add considerations - checker.considerations = [ - {"id": "test_check1", "category": "Test"}, - {"id": "test_check2", "category": "Test"}, - {"id": "test_check3", "category": "Test"}, # Will be skipped (not in results) - ] - - results_text = checker._format_results_text(analysis, "DEVELOPMENT") - - # Verify correct message - self.assertIn( - "ALL CHECKS PASSED", - results_text, - 'Should say "ALL CHECKS PASSED" when some checks passed and none failed', - ) - # Should show "2 passed, 1 skipped" - self.assertIn( - "2 passed", - results_text, - "Should show count of passed checks", - ) - self.assertIn( - "1 skipped", - results_text, - "Should show count of skipped checks", - ) - - def test_check_integration_no_applicable_checks(self): - """Integration test for Issue #1744 Fix #2: Complete check() behavior with no applicable checks. - - This integration test verifies the complete flow when no checks are applicable: - 1. First call to check() approves immediately (no blocking) - 2. Returns decision="approve" with reason="no_applicable_checks" - 3. Marks session complete to prevent re-running - 4. Second call returns "already_ran" (session marked complete) - - This complements the unit tests by testing the entire check() method flow. - - Note: This test requires full environment setup (considerations.yaml, etc.) which - may not be available in all test environments. The unit tests above provide - comprehensive coverage of the fixes without requiring full integration. - """ - # Skip this test - unit tests provide sufficient coverage without full environment setup - # The two unit tests above (test_format_results_text_*) comprehensively test the fixes - self.skipTest( - "Integration test requires full environment - unit tests provide sufficient coverage" - ) - - -class TestConsiderationAnalysis(unittest.TestCase): - """Tests for ConsiderationAnalysis class.""" - - def test_has_blockers_empty(self): - """Test has_blockers with no results.""" - analysis = ConsiderationAnalysis() - self.assertFalse(analysis.has_blockers) - - def test_has_blockers_with_blocker(self): - """Test has_blockers with blocker result.""" - analysis = ConsiderationAnalysis() - result = CheckerResult( - consideration_id="test", satisfied=False, reason="Test failed", severity="blocker" - ) - analysis.add_result(result) - - self.assertTrue(analysis.has_blockers) - self.assertEqual(len(analysis.failed_blockers), 1) - - def test_has_blockers_warning_only(self): - """Test has_blockers with only warnings.""" - analysis = ConsiderationAnalysis() - result = CheckerResult( - consideration_id="test", satisfied=False, reason="Test warning", severity="warning" - ) - analysis.add_result(result) - - self.assertFalse(analysis.has_blockers) - self.assertEqual(len(analysis.failed_warnings), 1) - - def test_group_by_category(self): - """Test group_by_category.""" - analysis = ConsiderationAnalysis() - - result1 = CheckerResult( - consideration_id="todos_complete", - satisfied=False, - reason="Todos incomplete", - severity="blocker", - ) - result2 = CheckerResult( - consideration_id="local_testing", satisfied=False, reason="No tests", severity="blocker" - ) - - analysis.add_result(result1) - analysis.add_result(result2) - - grouped = analysis.group_by_category() - - # Should have categories - self.assertGreater(len(grouped), 0) - - -class TestPreCompactionTranscript(unittest.TestCase): - """Tests for Issue #1962: Pre-compaction transcript handling.""" - - def setUp(self): - """Set up test fixtures.""" - self.temp_dir = tempfile.mkdtemp() - self.project_root = Path(self.temp_dir) - - # Create directory structure - (self.project_root / ".claude" / "tools" / "amplihack").mkdir(parents=True, exist_ok=True) - (self.project_root / ".claude" / "runtime" / "power-steering").mkdir( - parents=True, exist_ok=True - ) - (self.project_root / ".claude" / "runtime" / "logs").mkdir(parents=True, exist_ok=True) - - # Create default config - config_path = ( - self.project_root / ".claude" / "tools" / "amplihack" / ".power_steering_config" - ) - config = {"enabled": True, "version": "1.0.0", "phase": 1} - config_path.write_text(json.dumps(config, indent=2)) - - def tearDown(self): - """Clean up test fixtures.""" - import shutil - - shutil.rmtree(self.temp_dir) - - def test_get_pre_compaction_transcript_no_compaction(self): - """Test _get_pre_compaction_transcript returns None when no compaction.""" - checker = PowerSteeringChecker(self.project_root) - session_id = "test_session_123" - - # Create session directory without compaction events - session_dir = self.project_root / ".claude" / "runtime" / "logs" / session_id - session_dir.mkdir(parents=True, exist_ok=True) - - result = checker._get_pre_compaction_transcript(session_id) - self.assertIsNone(result) - - def test_get_pre_compaction_transcript_with_compaction(self): - """Test _get_pre_compaction_transcript returns path when compaction detected.""" - checker = PowerSteeringChecker(self.project_root) - session_id = "test_session_456" - - # Create session directory with compaction events - session_dir = self.project_root / ".claude" / "runtime" / "logs" / session_id - session_dir.mkdir(parents=True, exist_ok=True) - - # Create the pre-compaction transcript file - transcript_path = session_dir / "CONVERSATION_TRANSCRIPT.md" - transcript_path.write_text("## User\nTest message\n## Assistant\nTest response\n") - - # Create compaction events file pointing to transcript - compaction_file = session_dir / "compaction_events.json" - compaction_events = [ - { - "timestamp": "2025-01-17T10:00:00", - "session_id": session_id, - "messages_exported": 767, - "transcript_path": str(transcript_path), - } - ] - compaction_file.write_text(json.dumps(compaction_events)) - - result = checker._get_pre_compaction_transcript(session_id) - self.assertIsNotNone(result) - self.assertEqual(result, transcript_path) - - def test_load_pre_compaction_transcript_markdown(self): - """Test _load_pre_compaction_transcript parses markdown format.""" - checker = PowerSteeringChecker(self.project_root) - - # Create markdown transcript - transcript_path = self.project_root / "test_transcript.md" - transcript_content = """# Conversation Transcript - -## User -Please implement authentication - -## Assistant -I'll help you implement authentication. Let me start by... - -## User -Make sure to include JWT support - -## Assistant -Got it, I'll add JWT support as requested. -""" - transcript_path.write_text(transcript_content) - - messages = checker._load_pre_compaction_transcript(transcript_path) - - self.assertEqual(len(messages), 4) - self.assertEqual(messages[0]["role"], "user") - self.assertIn("authentication", messages[0]["content"]) - self.assertEqual(messages[1]["role"], "assistant") - self.assertEqual(messages[2]["role"], "user") - self.assertIn("JWT", messages[2]["content"]) - - def test_load_pre_compaction_transcript_jsonl(self): - """Test _load_pre_compaction_transcript parses JSONL format.""" - checker = PowerSteeringChecker(self.project_root) - - # Create JSONL transcript - transcript_path = self.project_root / "test_transcript.jsonl" - transcript_content = """{"role": "user", "content": "Hello"} -{"role": "assistant", "content": "Hi there!"} -{"role": "user", "content": "Help me"} -""" - transcript_path.write_text(transcript_content) - - messages = checker._load_pre_compaction_transcript(transcript_path) - - self.assertEqual(len(messages), 3) - self.assertEqual(messages[0]["role"], "user") - self.assertEqual(messages[0]["content"], "Hello") - self.assertEqual(messages[1]["role"], "assistant") - - def test_verify_actual_state_no_pr(self): - """Test _verify_actual_state handles case with no PR.""" - checker = PowerSteeringChecker(self.project_root) - - # This will likely fail to find gh/git in test env, should return empty results - result = checker._verify_actual_state("test_session") - - # Should be a dict with expected keys - self.assertIn("ci_passing", result) - self.assertIn("pr_mergeable", result) - self.assertIn("branch_current", result) - self.assertIn("all_passing", result) - # In test environment without git setup, all should be False - self.assertFalse(result["all_passing"]) - - -if __name__ == "__main__": - unittest.main() diff --git a/amplifier-bundle/tools/amplihack/hooks/tests/test_power_steering_evidence_integration.py b/amplifier-bundle/tools/amplihack/hooks/tests/test_power_steering_evidence_integration.py deleted file mode 100644 index 364a01f64..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/tests/test_power_steering_evidence_integration.py +++ /dev/null @@ -1,240 +0,0 @@ -#!/usr/bin/env python3 -""" -Integration tests for evidence-based completion checking in power_steering_checker. - -Tests the integration of completion_evidence with power_steering_checker to -verify that concrete evidence can override SDK analysis. -""" - -import json -import sys -from pathlib import Path -from unittest.mock import MagicMock, patch - -import pytest - -# Add parent directory to path for imports -sys.path.insert(0, str(Path(__file__).parent.parent)) - -from power_steering_checker import PowerSteeringChecker - - -class TestEvidenceBasedCompletion: - """Integration tests for evidence-based completion checking.""" - - @pytest.fixture - def temp_project(self, tmp_path): - """Create temporary project directory.""" - project = tmp_path / "project" - project.mkdir() - - # Create .claude directory structure - claude_dir = project / ".claude" - claude_dir.mkdir() - - tools_dir = claude_dir / "tools" / "amplihack" - tools_dir.mkdir(parents=True) - - runtime_dir = claude_dir / "runtime" / "power-steering" - runtime_dir.mkdir(parents=True) - - return project - - @pytest.fixture - def transcript_path(self, temp_project): - """Create a test transcript file.""" - transcript = temp_project / "transcript.jsonl" - - # Create a realistic development transcript with tool use - with open(transcript, "w") as f: - entry = { - "role": "user", - "content": [{"type": "text", "text": "Fix the bug in the authentication module"}], - } - f.write(json.dumps(entry) + "\n") - - # Add tool use to make it a DEVELOPMENT session - entry = { - "role": "assistant", - "content": [ - {"type": "text", "text": "I'll fix the authentication bug"}, - { - "type": "tool_use", - "name": "Write", - "input": {"file_path": "/path/to/file.py"}, - }, - ], - } - f.write(json.dumps(entry) + "\n") - - entry = { - "role": "user", - "content": [{"type": "tool_result", "tool_use_id": "123"}], - } - f.write(json.dumps(entry) + "\n") - - return transcript - - @pytest.fixture - def checker(self, temp_project): - """Create PowerSteeringChecker instance.""" - return PowerSteeringChecker(project_root=temp_project) - - @patch("power_steering_checker.PowerSteeringChecker._is_qa_session", return_value=False) - @patch("subprocess.run") - def test_pr_merged_allows_stop(self, mock_run, mock_qa, checker, transcript_path, temp_project): - """Test that merged PR allows stop without SDK analysis.""" - # Mock gh CLI to return merged PR - mock_run.return_value = MagicMock( - returncode=0, - stdout='{"state": "MERGED", "mergedAt": "2025-01-01T00:00:00Z"}', - ) - - result = checker.check(transcript_path, "test-session-123") - - assert result.decision == "approve" - assert "PR merged successfully" in result.reasons - # Verify we didn't run expensive SDK analysis - # (would have taken longer if we did) - - @patch("power_steering_checker.PowerSteeringChecker._is_qa_session", return_value=False) - def test_user_confirmation_allows_stop(self, mock_qa, checker, transcript_path, temp_project): - """Test that user confirmation allows stop without SDK analysis.""" - # Create user confirmation file - session_dir = temp_project / ".claude" / "runtime" / "power-steering" / "test-session-456" - session_dir.mkdir(parents=True) - confirmation_file = session_dir / "user_confirmed_complete" - confirmation_file.write_text("Work is complete") - - result = checker.check(transcript_path, "test-session-456") - - assert result.decision == "approve" - assert "User explicitly confirmed work is complete" in result.reasons - - @patch("subprocess.run") - def test_evidence_available_flag_respected( - self, mock_run, checker, transcript_path, temp_project - ): - """Test that evidence checking only runs when EVIDENCE_AVAILABLE is True.""" - # Mock gh CLI to simulate unavailable - mock_run.side_effect = FileNotFoundError() - - # This should not crash - it should fail-open and continue to SDK analysis - result = checker.check(transcript_path, "test-session-789") - - # Result should still be valid (either approve or block from SDK analysis) - assert result.decision in ["approve", "block"] - - @patch("power_steering_checker.PowerSteeringChecker._is_qa_session", return_value=False) - def test_evidence_results_attached_to_result( - self, mock_qa, checker, transcript_path, temp_project - ): - """Test that evidence results are attached to PowerSteeringResult.""" - # Create transcript with completed TODOs - with open(transcript_path, "w") as f: - entry = { - "role": "assistant", - "content": [{"type": "text", "text": "- [x] Task 1\n- [x] Task 2"}], - } - f.write(json.dumps(entry) + "\n") - - result = checker.check(transcript_path, "test-session-evidence") - - # Evidence results field should be attached (even if empty due to EVIDENCE_AVAILABLE flag) - assert hasattr(result, "evidence_results") - # This is a list (empty or populated depending on EVIDENCE_AVAILABLE) - assert isinstance(result.evidence_results, list) - - @patch("subprocess.run") - def test_evidence_checking_fails_gracefully( - self, mock_run, checker, transcript_path, temp_project - ): - """Test that evidence checking failures don't break the checker.""" - # Mock subprocess to raise an unexpected exception - mock_run.side_effect = RuntimeError("Unexpected error") - - # Should not crash - should log warning and continue to SDK analysis - result = checker.check(transcript_path, "test-session-fail") - - # Result should still be valid - assert result.decision in ["approve", "block"] - - -class TestEvidenceSuggestsComplete: - """Tests for _evidence_suggests_complete helper method.""" - - @pytest.fixture - def checker(self, tmp_path): - """Create PowerSteeringChecker instance.""" - return PowerSteeringChecker(project_root=tmp_path) - - def test_no_evidence_returns_false(self, checker): - """Test that no evidence returns False.""" - result = checker._evidence_suggests_complete([]) - assert result is False - - def test_strong_evidence_returns_true(self, checker): - """Test that strong evidence types return True.""" - from completion_evidence import Evidence, EvidenceType - - evidence = [ - Evidence( - evidence_type=EvidenceType.PR_MERGED, - verified=True, - details="PR merged", - confidence=1.0, - ) - ] - - result = checker._evidence_suggests_complete(evidence) - assert result is True - - def test_multiple_medium_evidence_returns_true(self, checker): - """Test that 3+ verified medium evidence types return True.""" - from completion_evidence import Evidence, EvidenceType - - evidence = [ - Evidence( - evidence_type=EvidenceType.TODO_COMPLETE, - verified=True, - details="All TODOs complete", - confidence=0.8, - ), - Evidence( - evidence_type=EvidenceType.FILES_COMMITTED, - verified=True, - details="All files committed", - confidence=0.7, - ), - Evidence( - evidence_type=EvidenceType.CI_PASSING, - verified=True, - details="CI passing", - confidence=0.9, - ), - ] - - result = checker._evidence_suggests_complete(evidence) - assert result is True - - def test_insufficient_evidence_returns_false(self, checker): - """Test that insufficient evidence returns False.""" - from completion_evidence import Evidence, EvidenceType - - evidence = [ - Evidence( - evidence_type=EvidenceType.TODO_COMPLETE, - verified=False, - details="Some TODOs incomplete", - confidence=0.0, - ), - Evidence( - evidence_type=EvidenceType.FILES_COMMITTED, - verified=False, - details="Files uncommitted", - confidence=0.0, - ), - ] - - result = checker._evidence_suggests_complete(evidence) - assert result is False diff --git a/amplifier-bundle/tools/amplihack/hooks/tests/test_power_steering_phase2.py b/amplifier-bundle/tools/amplihack/hooks/tests/test_power_steering_phase2.py deleted file mode 100644 index c8aa7d89b..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/tests/test_power_steering_phase2.py +++ /dev/null @@ -1,1386 +0,0 @@ -#!/usr/bin/env python3 -""" -Unit tests for PowerSteeringChecker Phase 2 functionality. - -Tests: -- YAML loading and validation -- All 16 new checker methods -- Generic analyzer -- User customization -- Backward compatibility with Phase 1 -""" - -import json -import sys -import tempfile -import unittest -from pathlib import Path - -sys.path.insert(0, str(Path(__file__).parent.parent)) - -from power_steering_checker import PowerSteeringChecker - - -class TestYAMLLoading(unittest.TestCase): - """Tests for YAML loading and validation.""" - - def setUp(self): - """Set up test fixtures.""" - self.temp_dir = tempfile.mkdtemp() - self.project_root = Path(self.temp_dir) - - # Create directory structure - (self.project_root / ".claude" / "tools" / "amplihack").mkdir(parents=True) - (self.project_root / ".claude" / "runtime" / "power-steering").mkdir( - parents=True, exist_ok=True - ) - - # Create default config - config_path = ( - self.project_root / ".claude" / "tools" / "amplihack" / ".power_steering_config" - ) - config = {"enabled": True, "version": "1.0.0", "phase": 2} - config_path.write_text(json.dumps(config, indent=2)) - - def tearDown(self): - """Clean up test fixtures.""" - import shutil - - shutil.rmtree(self.temp_dir) - - def test_yaml_loading_valid(self): - """Test YAML loading with valid file.""" - # Create valid YAML file - yaml_path = self.project_root / ".claude" / "tools" / "amplihack" / "considerations.yaml" - yaml_content = """ -- id: test_consideration - category: Test Category - question: Is this a test? - description: Test consideration - severity: blocker - checker: _check_todos_complete - enabled: true -""" - yaml_path.write_text(yaml_content) - - checker = PowerSteeringChecker(self.project_root) - - # Should load YAML successfully - self.assertEqual(len(checker.considerations), 1) - self.assertEqual(checker.considerations[0]["id"], "test_consideration") - - def test_yaml_loading_missing_file(self): - """Test YAML loading falls back to package default when file missing. - - When no YAML exists in the project root, the system falls back to - loading the package's default considerations.yaml (22 considerations), - not the hardcoded Phase 1 fallback (5 considerations). - """ - # No YAML file created in temp project root - checker = PowerSteeringChecker(self.project_root) - - # Should fall back to package default YAML (22 considerations) - # The fallback mechanism loads considerations.yaml from the package directory - self.assertGreaterEqual(len(checker.considerations), 5) # At least Phase 1 - self.assertEqual(checker.considerations[0]["id"], "todos_complete") - - def test_yaml_loading_invalid_format(self): - """Test YAML loading with invalid format.""" - # Create invalid YAML (not a list) - yaml_path = self.project_root / ".claude" / "tools" / "amplihack" / "considerations.yaml" - yaml_content = """ -invalid_format: not_a_list -""" - yaml_path.write_text(yaml_content) - - checker = PowerSteeringChecker(self.project_root) - - # Should fall back to Phase 1 - self.assertEqual(len(checker.considerations), 5) - - def test_yaml_loading_malformed(self): - """Test YAML loading with malformed syntax.""" - # Create malformed YAML - yaml_path = self.project_root / ".claude" / "tools" / "amplihack" / "considerations.yaml" - yaml_content = """ -- id: test - missing_colon after key - invalid syntax -""" - yaml_path.write_text(yaml_content) - - checker = PowerSteeringChecker(self.project_root) - - # Should fall back to Phase 1 on parse error - self.assertEqual(len(checker.considerations), 5) - - def test_yaml_loading_partial_valid(self): - """Test YAML loading with mix of valid and invalid considerations.""" - # Create YAML with one valid, one invalid - yaml_path = self.project_root / ".claude" / "tools" / "amplihack" / "considerations.yaml" - yaml_content = """ -- id: valid_consideration - category: Test - question: Valid? - description: Valid test - severity: blocker - checker: _check_todos_complete - enabled: true - -- id: invalid_consideration - # Missing required fields - question: Invalid? -""" - yaml_path.write_text(yaml_content) - - checker = PowerSteeringChecker(self.project_root) - - # Should load only valid consideration - self.assertEqual(len(checker.considerations), 1) - self.assertEqual(checker.considerations[0]["id"], "valid_consideration") - - -class TestYAMLValidation(unittest.TestCase): - """Tests for YAML schema validation.""" - - def setUp(self): - """Set up test fixtures.""" - self.temp_dir = tempfile.mkdtemp() - self.project_root = Path(self.temp_dir) - (self.project_root / ".claude" / "tools" / "amplihack").mkdir(parents=True) - (self.project_root / ".claude" / "runtime" / "power-steering").mkdir( - parents=True, exist_ok=True - ) - config_path = ( - self.project_root / ".claude" / "tools" / "amplihack" / ".power_steering_config" - ) - config_path.write_text(json.dumps({"enabled": True})) - - self.checker = PowerSteeringChecker(self.project_root) - - def tearDown(self): - """Clean up test fixtures.""" - import shutil - - shutil.rmtree(self.temp_dir) - - def test_validate_consideration_valid(self): - """Test validation of valid consideration.""" - consideration = { - "id": "test", - "category": "Test", - "question": "Test?", - "description": "Test desc", - "severity": "blocker", - "checker": "test_checker", - "enabled": True, - } - - result = self.checker._validate_consideration_schema(consideration) - self.assertTrue(result) - - def test_validate_consideration_missing_fields(self): - """Test validation with missing required fields.""" - consideration = { - "id": "test", - # Missing other required fields - } - - result = self.checker._validate_consideration_schema(consideration) - self.assertFalse(result) - - def test_validate_consideration_invalid_severity(self): - """Test validation with invalid severity.""" - consideration = { - "id": "test", - "category": "Test", - "question": "Test?", - "description": "Test desc", - "severity": "invalid_severity", # Invalid - "checker": "test_checker", - "enabled": True, - } - - result = self.checker._validate_consideration_schema(consideration) - self.assertFalse(result) - - def test_validate_consideration_invalid_enabled(self): - """Test validation with invalid enabled type.""" - consideration = { - "id": "test", - "category": "Test", - "question": "Test?", - "description": "Test desc", - "severity": "blocker", - "checker": "test_checker", - "enabled": "yes", # Should be boolean - } - - result = self.checker._validate_consideration_schema(consideration) - self.assertFalse(result) - - def test_validate_consideration_not_dict(self): - """Test validation with non-dictionary input.""" - result = self.checker._validate_consideration_schema("not a dict") - self.assertFalse(result) - - -class TestGenericAnalyzer(unittest.TestCase): - """Tests for generic analyzer.""" - - def setUp(self): - """Set up test fixtures.""" - self.temp_dir = tempfile.mkdtemp() - self.project_root = Path(self.temp_dir) - (self.project_root / ".claude" / "tools" / "amplihack").mkdir(parents=True) - (self.project_root / ".claude" / "runtime" / "power-steering").mkdir( - parents=True, exist_ok=True - ) - config_path = ( - self.project_root / ".claude" / "tools" / "amplihack" / ".power_steering_config" - ) - config_path.write_text(json.dumps({"enabled": True})) - - self.checker = PowerSteeringChecker(self.project_root) - - def tearDown(self): - """Clean up test fixtures.""" - import shutil - - shutil.rmtree(self.temp_dir) - - def test_generic_analyzer_basic(self): - """Test generic analyzer with basic consideration.""" - transcript = [ - {"type": "user", "message": {"content": "Test question"}}, - {"type": "assistant", "message": {"content": [{"type": "text", "text": "Answer"}]}}, - ] - - consideration = { - "id": "test", - "question": "Is this a test?", - "category": "Test", - "severity": "warning", - } - - # Generic analyzer should default to satisfied (fail-open) - result = self.checker._generic_analyzer(transcript, "test_session", consideration) - self.assertTrue(result) - - def test_generic_analyzer_with_keywords(self): - """Test generic analyzer extracts keywords from question.""" - transcript = [ - { - "type": "assistant", - "message": {"content": [{"type": "text", "text": "security scan completed"}]}, - } - ] - - consideration = { - "id": "security_scan", - "question": "Were security scans performed?", - "category": "Security", - "severity": "blocker", - } - - # Should extract "security" and "scan" as keywords - result = self.checker._generic_analyzer(transcript, "test_session", consideration) - # Phase 2: Always satisfied (fail-open) - self.assertTrue(result) - - def test_generic_analyzer_empty_question(self): - """Test generic analyzer with empty question.""" - transcript = [] - consideration = {"id": "empty", "question": "", "category": "Test", "severity": "warning"} - - result = self.checker._generic_analyzer(transcript, "test_session", consideration) - self.assertTrue(result) # Should default to satisfied - - -class TestNewCheckers(unittest.TestCase): - """Tests for the 16 new checker methods.""" - - def setUp(self): - """Set up test fixtures.""" - self.temp_dir = tempfile.mkdtemp() - self.project_root = Path(self.temp_dir) - (self.project_root / ".claude" / "tools" / "amplihack").mkdir(parents=True) - (self.project_root / ".claude" / "runtime" / "power-steering").mkdir( - parents=True, exist_ok=True - ) - config_path = ( - self.project_root / ".claude" / "tools" / "amplihack" / ".power_steering_config" - ) - config_path.write_text(json.dumps({"enabled": True})) - - self.checker = PowerSteeringChecker(self.project_root) - - def tearDown(self): - """Clean up test fixtures.""" - import shutil - - shutil.rmtree(self.temp_dir) - - def test_check_agent_unnecessary_questions(self): - """Test _check_agent_unnecessary_questions.""" - # Transcript with many questions - transcript = [ - { - "type": "assistant", - "message": { - "content": [ - {"type": "text", "text": "Question 1? Question 2? Question 3? Question 4?"} - ] - }, - } - ] - - result = self.checker._check_agent_unnecessary_questions(transcript, "test_session") - self.assertFalse(result) # Too many questions - - # Transcript with few questions - transcript_good = [ - {"type": "assistant", "message": {"content": [{"type": "text", "text": "Done."}]}} - ] - - result = self.checker._check_agent_unnecessary_questions(transcript_good, "test_session") - self.assertTrue(result) - - def test_check_objective_completion(self): - """Test _check_objective_completion.""" - # Transcript with completion indicators - transcript = [ - {"type": "user", "message": {"content": "Implement feature X"}}, - { - "type": "assistant", - "message": {"content": [{"type": "text", "text": "Implementation complete"}]}, - }, - ] - - result = self.checker._check_objective_completion(transcript, "test_session") - self.assertTrue(result) - - # Transcript without completion - transcript_incomplete = [ - {"type": "user", "message": {"content": "Implement feature X"}}, - { - "type": "assistant", - "message": {"content": [{"type": "text", "text": "Working on it"}]}, - }, - ] - - result = self.checker._check_objective_completion(transcript_incomplete, "test_session") - self.assertFalse(result) - - def test_check_documentation_updates(self): - """Test _check_documentation_updates.""" - # Code changes with doc updates - transcript = [ - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Write", - "input": {"file_path": "/test.py", "content": "code"}, - }, - { - "type": "tool_use", - "name": "Write", - "input": {"file_path": "/README.md", "content": "docs"}, - }, - ] - }, - } - ] - - result = self.checker._check_documentation_updates(transcript, "test_session") - self.assertTrue(result) - - # Code changes without doc updates - transcript_no_docs = [ - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Write", - "input": {"file_path": "/test.py", "content": "code"}, - } - ] - }, - } - ] - - result = self.checker._check_documentation_updates(transcript_no_docs, "test_session") - self.assertFalse(result) - - def test_check_tutorial_needed(self): - """Test _check_tutorial_needed.""" - # New feature with tutorial - transcript = [ - {"type": "user", "message": {"content": "Add new feature X"}}, - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Write", - "input": {"file_path": "/examples/tutorial.md", "content": "guide"}, - } - ] - }, - }, - ] - - result = self.checker._check_tutorial_needed(transcript, "test_session") - self.assertTrue(result) - - # New feature without tutorial - transcript_no_tutorial = [ - {"type": "user", "message": {"content": "Add new feature X"}}, - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Write", - "input": {"file_path": "/src/feature.py", "content": "code"}, - } - ] - }, - }, - ] - - result = self.checker._check_tutorial_needed(transcript_no_tutorial, "test_session") - self.assertFalse(result) - - def test_check_presentation_needed(self): - """Test _check_presentation_needed.""" - # Always returns True (low priority check) - transcript = [] - result = self.checker._check_presentation_needed(transcript, "test_session") - self.assertTrue(result) - - def test_check_next_steps(self): - """Test _check_next_steps. - - INVERTED LOGIC (per issue #1679): - - Returns FALSE when next steps ARE found (work incomplete - should continue) - - Returns TRUE when NO next steps found (work is complete) - """ - # Transcript with next steps mentioned - should return FALSE (incomplete) - transcript = [ - { - "type": "assistant", - "message": { - "content": [{"type": "text", "text": "Next steps: implement feature Y"}] - }, - } - ] - - result = self.checker._check_next_steps(transcript, "test_session") - self.assertFalse(result) # FALSE = work incomplete, has next steps - - # Transcript without next steps - should return TRUE (complete) - transcript_no_steps = [ - {"type": "assistant", "message": {"content": [{"type": "text", "text": "Done"}]}} - ] - - result = self.checker._check_next_steps(transcript_no_steps, "test_session") - self.assertTrue(result) # TRUE = work complete, no next steps - - def test_check_docs_organization(self): - """Test _check_docs_organization.""" - # Docs in correct location - transcript = [ - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Write", - "input": { - "file_path": "/.claude/runtime/investigation.md", - "content": "findings", - }, - } - ] - }, - } - ] - - result = self.checker._check_docs_organization(transcript, "test_session") - self.assertTrue(result) - - # Docs in wrong location - transcript_wrong = [ - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Write", - "input": { - "file_path": "/investigation.md", # Should be in .claude/ - "content": "findings", - }, - } - ] - }, - } - ] - - result = self.checker._check_docs_organization(transcript_wrong, "test_session") - self.assertFalse(result) - - def test_check_investigation_docs(self): - """Test _check_investigation_docs.""" - # Investigation with documentation - transcript = [ - {"type": "user", "message": {"content": "Investigation into bug X"}}, - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Write", - "input": {"file_path": "/findings.md", "content": "results"}, - } - ] - }, - }, - ] - - result = self.checker._check_investigation_docs(transcript, "test_session") - self.assertTrue(result) - - # Investigation without documentation - transcript_no_docs = [ - {"type": "user", "message": {"content": "Investigation into bug X"}}, - { - "type": "assistant", - "message": {"content": [{"type": "text", "text": "Found issues"}]}, - }, - ] - - result = self.checker._check_investigation_docs(transcript_no_docs, "test_session") - self.assertFalse(result) - - def test_check_shortcuts(self): - """Test _check_shortcuts.""" - # Code with shortcut indicators - transcript = [ - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Write", - "input": { - "file_path": "/test.py", - "content": "def foo():\n pass # TODO: fix later", - }, - } - ] - }, - } - ] - - result = self.checker._check_shortcuts(transcript, "test_session") - self.assertFalse(result) - - # Clean code - transcript_clean = [ - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Write", - "input": { - "file_path": "/test.py", - "content": "def foo():\n return 42", - }, - } - ] - }, - } - ] - - result = self.checker._check_shortcuts(transcript_clean, "test_session") - self.assertTrue(result) - - def test_check_interactive_testing(self): - """Test _check_interactive_testing.""" - # Transcript with interactive testing mention - transcript = [ - { - "type": "assistant", - "message": {"content": [{"type": "text", "text": "Manually tested the feature"}]}, - } - ] - - result = self.checker._check_interactive_testing(transcript, "test_session") - self.assertTrue(result) - - # Only automated tests - transcript_automated = [ - { - "type": "tool_result", - "message": {"content": "Tests: 5 passed"}, - } - ] - - result = self.checker._check_interactive_testing(transcript_automated, "test_session") - self.assertFalse(result) # Not enough tests - - def test_check_unrelated_changes(self): - """Test _check_unrelated_changes.""" - # Few files modified - transcript = [ - {"type": "user", "message": {"content": "Fix bug in auth"}}, - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Write", - "input": {"file_path": "/auth.py", "content": "fix"}, - } - ] - }, - }, - ] - - result = self.checker._check_unrelated_changes(transcript, "test_session") - self.assertTrue(result) - - # Many files modified (scope creep) - transcript_many = [ - {"type": "user", "message": {"content": "Fix bug in auth"}}, - ] - # Add 25 file modifications - for i in range(25): - transcript_many.append( - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Write", - "input": {"file_path": f"/file{i}.py", "content": "code"}, - } - ] - }, - } - ) - - result = self.checker._check_unrelated_changes(transcript_many, "test_session") - self.assertFalse(result) - - def test_check_root_pollution(self): - """Test _check_root_pollution.""" - # Acceptable root file - transcript = [ - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Write", - "input": {"file_path": "/README.md", "content": "docs"}, - } - ] - }, - } - ] - - result = self.checker._check_root_pollution(transcript, "test_session") - self.assertTrue(result) - - # Unacceptable root file - transcript_pollution = [ - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Write", - "input": {"file_path": "/random_file.txt", "content": "stuff"}, - } - ] - }, - } - ] - - result = self.checker._check_root_pollution(transcript_pollution, "test_session") - self.assertFalse(result) - - def test_check_pr_description(self): - """Test _check_pr_description.""" - # PR with good description - transcript = [ - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Bash", - "input": { - "command": 'gh pr create --title "Fix" --body "Summary: fix\nTest plan: tested"' - }, - } - ] - }, - } - ] - - result = self.checker._check_pr_description(transcript, "test_session") - self.assertTrue(result) - - # PR with poor description - transcript_bad = [ - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Bash", - "input": {"command": 'gh pr create --title "Fix" --body "Quick fix"'}, - } - ] - }, - } - ] - - result = self.checker._check_pr_description(transcript_bad, "test_session") - self.assertFalse(result) - - def test_check_review_responses(self): - """Test _check_review_responses.""" - # Review feedback addressed - transcript = [ - {"type": "user", "message": {"content": "Please address the review comments"}}, - { - "type": "assistant", - "message": {"content": [{"type": "text", "text": "Addressed all feedback"}]}, - }, - ] - - result = self.checker._check_review_responses(transcript, "test_session") - self.assertTrue(result) - - # Review feedback not addressed - transcript_not_addressed = [ - {"type": "user", "message": {"content": "Please address the review comments"}}, - { - "type": "assistant", - "message": {"content": [{"type": "text", "text": "Working on it"}]}, - }, - ] - - result = self.checker._check_review_responses(transcript_not_addressed, "test_session") - self.assertFalse(result) - - def test_check_branch_rebase(self): - """Test _check_branch_rebase.""" - # Branch up to date - transcript = [ - {"type": "tool_result", "message": {"content": "Your branch is up to date with main"}} - ] - - result = self.checker._check_branch_rebase(transcript, "test_session") - self.assertTrue(result) - - # Branch behind - transcript_behind = [ - { - "type": "tool_result", - "message": {"content": "Your branch is behind main by 5 commits"}, - } - ] - - result = self.checker._check_branch_rebase(transcript_behind, "test_session") - self.assertFalse(result) - - def test_check_ci_precommit_mismatch(self): - """Test _check_ci_precommit_mismatch.""" - # No mismatch - transcript = [ - {"type": "tool_result", "message": {"content": "pre-commit passed"}}, - {"type": "tool_result", "message": {"content": "CI checks passed"}}, - ] - - result = self.checker._check_ci_precommit_mismatch(transcript, "test_session") - self.assertTrue(result) - - # Mismatch detected - transcript_mismatch = [ - {"type": "tool_result", "message": {"content": "pre-commit passed"}}, - {"type": "tool_result", "message": {"content": "CI checks failed"}}, - ] - - result = self.checker._check_ci_precommit_mismatch(transcript_mismatch, "test_session") - self.assertFalse(result) - - -class TestUserCustomization(unittest.TestCase): - """Tests for user customization features.""" - - def setUp(self): - """Set up test fixtures.""" - self.temp_dir = tempfile.mkdtemp() - self.project_root = Path(self.temp_dir) - (self.project_root / ".claude" / "tools" / "amplihack").mkdir(parents=True) - (self.project_root / ".claude" / "runtime" / "power-steering").mkdir( - parents=True, exist_ok=True - ) - config_path = ( - self.project_root / ".claude" / "tools" / "amplihack" / ".power_steering_config" - ) - config_path.write_text(json.dumps({"enabled": True})) - - def tearDown(self): - """Clean up test fixtures.""" - import shutil - - shutil.rmtree(self.temp_dir) - - def test_custom_consideration_loaded(self): - """Test custom considerations are loaded from YAML.""" - # Create YAML with custom consideration - yaml_path = self.project_root / ".claude" / "tools" / "amplihack" / "considerations.yaml" - yaml_content = """ -- id: custom_security_check - category: Security - question: Was security audit performed? - description: Custom security consideration - severity: blocker - checker: generic - enabled: true -""" - yaml_path.write_text(yaml_content) - - checker = PowerSteeringChecker(self.project_root) - - # Should load custom consideration - self.assertEqual(len(checker.considerations), 1) - self.assertEqual(checker.considerations[0]["id"], "custom_security_check") - - def test_consideration_disabled(self): - """Test disabled considerations are not checked.""" - yaml_path = self.project_root / ".claude" / "tools" / "amplihack" / "considerations.yaml" - yaml_content = """ -- id: enabled_check - category: Test - question: Enabled? - description: Enabled consideration - severity: blocker - checker: generic - enabled: true - applicable_session_types: ["*"] - -- id: disabled_check - category: Test - question: Disabled? - description: Disabled consideration - severity: blocker - checker: generic - enabled: false - applicable_session_types: ["*"] -""" - yaml_path.write_text(yaml_content) - - checker = PowerSteeringChecker(self.project_root) - - # Create test transcript with enough activity to not be SIMPLE/INFORMATIONAL - transcript = [ - {"type": "user", "message": {"content": "Create a feature"}}, - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Write", - "input": {"file_path": "/test.py", "content": "x=1"}, - }, - { - "type": "tool_use", - "name": "Write", - "input": {"file_path": "/test2.py", "content": "y=2"}, - }, - ] - }, - }, - ] - analysis = checker._analyze_considerations(transcript, "test_session") - - # Only enabled consideration should be in results - self.assertIn("enabled_check", analysis.results) - self.assertNotIn("disabled_check", analysis.results) - - def test_custom_consideration_with_generic_checker(self): - """Test custom considerations work with generic checker. - - NOTE: With SDK-first refactoring, we must mock SDK_AVAILABLE=False - to test the generic checker fallback path. When SDK is available, - SDK analysis is used instead. - """ - from unittest.mock import patch - - yaml_path = self.project_root / ".claude" / "tools" / "amplihack" / "considerations.yaml" - yaml_content = """ -- id: custom_check - category: Custom - question: Is custom requirement met? - description: Custom check - severity: warning - checker: generic - enabled: true - applicable_session_types: ["*"] -""" - yaml_path.write_text(yaml_content) - - checker = PowerSteeringChecker(self.project_root) - # Transcript with tool usage to ensure it's not classified as SIMPLE/INFORMATIONAL - transcript = [ - {"type": "user", "message": {"content": "Build a feature"}}, - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Write", - "input": {"file_path": "/app.py", "content": "code"}, - }, - { - "type": "tool_use", - "name": "Write", - "input": {"file_path": "/test.py", "content": "tests"}, - }, - ] - }, - }, - ] - - # Mock SDK_AVAILABLE=False to test the generic checker fallback path - with patch("power_steering_checker.SDK_AVAILABLE", False): - analysis = checker._analyze_considerations(transcript, "test_session") - - # Should have result for custom check - self.assertIn("custom_check", analysis.results) - # Generic analyzer defaults to satisfied (fail-open) when SDK unavailable - self.assertTrue(analysis.results["custom_check"].satisfied) - - -class TestBackwardCompatibility(unittest.TestCase): - """Tests for backward compatibility with Phase 1.""" - - def setUp(self): - """Set up test fixtures.""" - self.temp_dir = tempfile.mkdtemp() - self.project_root = Path(self.temp_dir) - (self.project_root / ".claude" / "tools" / "amplihack").mkdir(parents=True) - (self.project_root / ".claude" / "runtime" / "power-steering").mkdir( - parents=True, exist_ok=True - ) - config_path = ( - self.project_root / ".claude" / "tools" / "amplihack" / ".power_steering_config" - ) - config = { - "enabled": True, - "checkers_enabled": { - "todos_complete": True, - "dev_workflow_complete": True, - "philosophy_compliance": True, - "local_testing": True, - "ci_status": True, - }, - } - config_path.write_text(json.dumps(config, indent=2)) - - def tearDown(self): - """Clean up test fixtures.""" - import shutil - - shutil.rmtree(self.temp_dir) - - def test_phase1_checkers_still_work(self): - """Test Phase 1 checkers still function with Phase 2 code. - - When no YAML exists in the project root, the system falls back to - loading the package's default considerations.yaml. The original Phase 1 - considerations (todos_complete, dev_workflow_complete, etc.) should - still be present and functional. - """ - # No YAML file - should use package default fallback - checker = PowerSteeringChecker(self.project_root) - - # Should have at least Phase 1 considerations (could be more from package YAML) - self.assertGreaterEqual(len(checker.considerations), 5) - - # Test a Phase 1 checker still works - transcript = [ - {"type": "user", "message": {"content": "Test"}}, - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "TodoWrite", - "input": { - "todos": [ - { - "content": "Task 1", - "status": "completed", - "activeForm": "Completing", - } - ] - }, - } - ] - }, - }, - ] - - result = checker._check_todos_complete(transcript, "test_session") - self.assertTrue(result) - - def test_config_checkers_enabled_respected(self): - """Test Phase 1 config checkers_enabled still works.""" - # Disable a checker in config - config_path = ( - self.project_root / ".claude" / "tools" / "amplihack" / ".power_steering_config" - ) - config = json.loads(config_path.read_text()) - config["checkers_enabled"]["todos_complete"] = False - config_path.write_text(json.dumps(config)) - - checker = PowerSteeringChecker(self.project_root) - transcript = [] - - analysis = checker._analyze_considerations(transcript, "test_session") - - # todos_complete should not be in results (disabled in config) - self.assertNotIn("todos_complete", analysis.results) - - -class TestSDKFirstRefactoring(unittest.TestCase): - """TDD tests for SDK-First refactoring (Issue #1679). - - These tests verify that SDK is tried FIRST for ALL considerations, - with heuristics as fallback only. Current implementation has BACKWARDS - logic that will cause these tests to FAIL. - """ - - def setUp(self): - """Set up test fixtures.""" - self.temp_dir = tempfile.mkdtemp() - self.project_root = Path(self.temp_dir) - (self.project_root / ".claude" / "tools" / "amplihack").mkdir(parents=True) - (self.project_root / ".claude" / "runtime" / "power-steering").mkdir( - parents=True, exist_ok=True - ) - config_path = ( - self.project_root / ".claude" / "tools" / "amplihack" / ".power_steering_config" - ) - config_path.write_text(json.dumps({"enabled": True})) - - def tearDown(self): - """Clean up test fixtures.""" - import shutil - - shutil.rmtree(self.temp_dir) - - def test_sdk_first_for_all_considerations(self): - """Test that SDK is tried FIRST for ALL consideration types. - - SDK analysis should be attempted for ALL considerations, including - those with checker='generic'. Heuristics are only used as fallback. - """ - import asyncio - from unittest.mock import AsyncMock, patch - - checker = PowerSteeringChecker(self.project_root) - - # Mock transcript - transcript = [ - {"type": "user", "message": {"content": "Test"}}, - {"type": "assistant", "message": {"content": [{"type": "text", "text": "Response"}]}}, - ] - - # Test consideration with "generic" checker (currently SKIPPED by SDK) - consideration = { - "id": "test_generic", - "question": "Is this satisfied?", - "category": "Test", - "severity": "warning", - "checker": "generic", # This should still use SDK first! - } - - # Mock SDK to track if it was called - with ( - patch("power_steering_checker.SDK_AVAILABLE", True), - patch( - "power_steering_checker.analyze_consideration", new_callable=AsyncMock - ) as mock_sdk, - ): - mock_sdk.return_value = True - - # Run async check - result = asyncio.run( - checker._check_single_consideration_async(consideration, transcript, "test_session") - ) - - # SDK MUST be called even for "generic" checker - mock_sdk.assert_called_once() - self.assertTrue(result.satisfied) - - def test_sdk_used_for_generic_checkers(self): - """Test that SDK is used even when checker='generic'. - - SDK should be tried for generic checkers too. When SDK succeeds, - heuristic fallback should NOT be called. - """ - import asyncio - from unittest.mock import AsyncMock, patch - - checker = PowerSteeringChecker(self.project_root) - - transcript = [ - {"type": "user", "message": {"content": "Test"}}, - ] - - consideration = { - "id": "generic_check", - "question": "Generic question?", - "category": "Test", - "severity": "warning", - "checker": "generic", - } - - with ( - patch("power_steering_checker.SDK_AVAILABLE", True), - patch( - "power_steering_checker.analyze_consideration", new_callable=AsyncMock - ) as mock_sdk, - ): - mock_sdk.return_value = False - - # Also mock the heuristic fallback to track if it's called - with patch.object(checker, "_generic_analyzer", return_value=True) as mock_heuristic: - asyncio.run( - checker._check_single_consideration_async( - consideration, transcript, "test_session" - ) - ) - - # SDK MUST be called first (even for generic) - mock_sdk.assert_called_once() - - # Heuristic should NOT be called since SDK succeeded - mock_heuristic.assert_not_called() - - def test_fallback_to_heuristics_when_sdk_unavailable(self): - """Test that heuristics are used when SDK_AVAILABLE=False. - - EXPECTED: When SDK not available, fall back to heuristics. - - This test should PASS even with current implementation. - """ - import asyncio - from unittest.mock import patch - - checker = PowerSteeringChecker(self.project_root) - - transcript = [ - {"type": "user", "message": {"content": "Test"}}, - ] - - consideration = { - "id": "test_fallback", - "question": "Test?", - "category": "Test", - "severity": "warning", - "checker": "_check_todos_complete", - } - - with patch("power_steering_checker.SDK_AVAILABLE", False): - # Mock the heuristic checker - with patch.object( - checker, "_check_todos_complete", return_value=True - ) as mock_heuristic: - result = asyncio.run( - checker._check_single_consideration_async( - consideration, transcript, "test_session" - ) - ) - - # Heuristic should be called when SDK unavailable - mock_heuristic.assert_called_once() - self.assertTrue(result.satisfied) - - def test_fallback_to_heuristics_when_sdk_fails(self): - """Test that heuristics are used when SDK call raises exception. - - When SDK fails, gracefully fall back to heuristics. - """ - import asyncio - from unittest.mock import AsyncMock, patch - - checker = PowerSteeringChecker(self.project_root) - - transcript = [ - {"type": "user", "message": {"content": "Test"}}, - ] - - consideration = { - "id": "test_sdk_failure", - "question": "Test?", - "category": "Test", - "severity": "warning", - "checker": "generic", - } - - with ( - patch("power_steering_checker.SDK_AVAILABLE", True), - patch( - "power_steering_checker.analyze_consideration", new_callable=AsyncMock - ) as mock_sdk, - ): - # SDK raises exception - mock_sdk.side_effect = Exception("SDK timeout") - - # Mock heuristic fallback - with patch.object(checker, "_generic_analyzer", return_value=True) as mock_heuristic: - result = asyncio.run( - checker._check_single_consideration_async( - consideration, transcript, "test_session" - ) - ) - - # SDK should have been attempted - mock_sdk.assert_called_once() - - # Heuristic should be called as fallback after SDK failure - mock_heuristic.assert_called_once() - self.assertTrue(result.satisfied) - - def test_fail_open_on_complete_failure(self): - """Test that system fails open when both SDK and heuristics fail. - - EXPECTED: Return satisfied=True to allow user to continue. - - This test verifies fail-open behavior. - """ - import asyncio - from unittest.mock import AsyncMock, patch - - checker = PowerSteeringChecker(self.project_root) - - transcript = [ - {"type": "user", "message": {"content": "Test"}}, - ] - - consideration = { - "id": "test_fail_open", - "question": "Test?", - "category": "Test", - "severity": "blocker", - "checker": "generic", - } - - with ( - patch("power_steering_checker.SDK_AVAILABLE", True), - patch( - "power_steering_checker.analyze_consideration", new_callable=AsyncMock - ) as mock_sdk, - ): - # SDK fails - mock_sdk.side_effect = Exception("SDK error") - - # Heuristic also fails - with patch.object( - checker, "_generic_analyzer", side_effect=Exception("Heuristic error") - ): - result = asyncio.run( - checker._check_single_consideration_async( - consideration, transcript, "test_session" - ) - ) - - # Must fail-open: satisfied=True even though everything failed - self.assertTrue(result.satisfied) - self.assertIn("fail-open", result.reason.lower()) - - def test_sdk_first_for_specific_checkers(self): - """Test that SDK is used first for specific _check_* methods. - - SDK should be attempted first even for considerations with specific - checker methods like _check_todos_complete. Heuristics are fallback only. - """ - import asyncio - from unittest.mock import AsyncMock, patch - - checker = PowerSteeringChecker(self.project_root) - - transcript = [ - {"type": "user", "message": {"content": "Test"}}, - ] - - consideration = { - "id": "todos_complete", - "question": "Are todos complete?", - "category": "Workflow", - "severity": "blocker", - "checker": "_check_todos_complete", - } - - with ( - patch("power_steering_checker.SDK_AVAILABLE", True), - patch( - "power_steering_checker.analyze_consideration", new_callable=AsyncMock - ) as mock_sdk, - ): - mock_sdk.return_value = True - - with patch.object( - checker, "_check_todos_complete", return_value=False - ) as mock_heuristic: - result = asyncio.run( - checker._check_single_consideration_async( - consideration, transcript, "test_session" - ) - ) - - # SDK should be called first - mock_sdk.assert_called_once() - - # Heuristic should NOT be called since SDK succeeded - mock_heuristic.assert_not_called() - - # Result should use SDK result, not heuristic - self.assertTrue(result.satisfied) - - -if __name__ == "__main__": - unittest.main() diff --git a/amplifier-bundle/tools/amplihack/hooks/tests/test_power_steering_pr_review_classification.py b/amplifier-bundle/tools/amplihack/hooks/tests/test_power_steering_pr_review_classification.py deleted file mode 100644 index 9bff8578f..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/tests/test_power_steering_pr_review_classification.py +++ /dev/null @@ -1,158 +0,0 @@ -#!/usr/bin/env python3 -""" -Tests for power-steering classification of PR review/merge sessions. - -Verifies that PR review and merge operations are classified as SIMPLE, -not DEVELOPMENT. Fixes issue #2563. -""" - -import sys -from pathlib import Path - -import pytest - -# Add parent directory to path for imports -sys.path.insert(0, str(Path(__file__).parent.parent)) - -from power_steering_checker import PowerSteeringChecker - - -class TestPRReviewClassification: - """Tests for PR review/merge session classification (issue #2563).""" - - @pytest.fixture - def temp_project(self, tmp_path): - """Create temporary project directory.""" - project = tmp_path / "project" - project.mkdir() - - claude_dir = project / ".claude" - claude_dir.mkdir() - - tools_dir = claude_dir / "tools" / "amplihack" - tools_dir.mkdir(parents=True) - - runtime_dir = claude_dir / "runtime" / "power-steering" - runtime_dir.mkdir(parents=True) - - return project - - @pytest.fixture - def checker(self, temp_project): - """Create PowerSteeringChecker instance.""" - return PowerSteeringChecker(project_root=temp_project) - - def _make_transcript(self, user_message, tool_commands=None): - """Build a transcript with a user message and optional Bash tool calls.""" - transcript = [ - { - "type": "user", - "message": {"content": user_message}, - } - ] - - if tool_commands: - content_blocks = [] - for cmd in tool_commands: - content_blocks.append( - { - "type": "tool_use", - "name": "Bash", - "input": {"command": cmd}, - } - ) - transcript.append( - { - "type": "assistant", - "message": {"content": content_blocks}, - } - ) - - return transcript - - def test_review_and_merge_pr_is_simple(self, checker): - """PR review+merge request should be SIMPLE, not DEVELOPMENT.""" - transcript = self._make_transcript("review PR 2533 and merge it if it looks good") - session_type = checker.detect_session_type(transcript) - assert session_type == "SIMPLE", f"PR review+merge should be SIMPLE, got {session_type}" - - def test_merge_pr_keyword_is_simple(self, checker): - """'merge pr' keyword should trigger SIMPLE classification.""" - transcript = self._make_transcript("merge pr 123") - session_type = checker.detect_session_type(transcript) - assert session_type == "SIMPLE", f"'merge pr' should be SIMPLE, got {session_type}" - - def test_review_pr_keyword_is_simple(self, checker): - """'review pr' keyword should trigger SIMPLE classification.""" - transcript = self._make_transcript("review pr 456") - session_type = checker.detect_session_type(transcript) - assert session_type == "SIMPLE", f"'review pr' should be SIMPLE, got {session_type}" - - def test_review_and_merge_keyword_is_simple(self, checker): - """'review and merge' keyword should trigger SIMPLE classification.""" - transcript = self._make_transcript("review and merge the latest PR") - session_type = checker.detect_session_type(transcript) - assert session_type == "SIMPLE", f"'review and merge' should be SIMPLE, got {session_type}" - - def test_gh_pr_merge_does_not_trigger_development(self, checker): - """gh pr merge commands should NOT classify session as DEVELOPMENT.""" - transcript = self._make_transcript( - "merge that PR", - tool_commands=[ - "gh pr view 2533 --json title,body,state", - "gh pr checks 2533", - "gh pr merge 2533 --squash", - ], - ) - session_type = checker.detect_session_type(transcript) - assert session_type != "DEVELOPMENT", ( - f"gh pr merge/view/checks should NOT be DEVELOPMENT, got {session_type}" - ) - - def test_gh_pr_view_does_not_trigger_development(self, checker): - """gh pr view commands should NOT classify session as DEVELOPMENT.""" - transcript = self._make_transcript( - "check the status of PR 100", - tool_commands=["gh pr view 100 --json state,statusCheckRollup"], - ) - session_type = checker.detect_session_type(transcript) - assert session_type != "DEVELOPMENT", ( - f"gh pr view should NOT be DEVELOPMENT, got {session_type}" - ) - - def test_gh_pr_create_still_triggers_development(self, checker): - """gh pr create commands SHOULD classify session as DEVELOPMENT.""" - transcript = self._make_transcript( - "create a PR for this feature", - tool_commands=[ - 'gh pr create --title "Add feature" --body "Details"', - ], - ) - session_type = checker.detect_session_type(transcript) - assert session_type == "DEVELOPMENT", ( - f"gh pr create SHOULD be DEVELOPMENT, got {session_type}" - ) - - def test_gh_pr_edit_still_triggers_development(self, checker): - """gh pr edit commands SHOULD classify session as DEVELOPMENT.""" - transcript = self._make_transcript( - "update the PR description", - tool_commands=[ - 'gh pr edit 100 --title "Updated title"', - ], - ) - session_type = checker.detect_session_type(transcript) - assert session_type == "DEVELOPMENT", ( - f"gh pr edit SHOULD be DEVELOPMENT, got {session_type}" - ) - - def test_simple_session_skips_all_checks(self, checker): - """SIMPLE sessions should have zero applicable considerations.""" - applicable = checker.get_applicable_considerations("SIMPLE") - assert len(applicable) == 0, ( - f"SIMPLE sessions should skip all checks, got {len(applicable)}" - ) - - -if __name__ == "__main__": - pytest.main([__file__, "-v"]) diff --git a/amplifier-bundle/tools/amplihack/hooks/tests/test_power_steering_redirects.py b/amplifier-bundle/tools/amplihack/hooks/tests/test_power_steering_redirects.py deleted file mode 100644 index 019cd63ee..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/tests/test_power_steering_redirects.py +++ /dev/null @@ -1,476 +0,0 @@ -#!/usr/bin/env python3 -""" -Unit tests for power-steering redirect functionality. - -Tests redirect persistence, loading, formatting, and edge cases. -""" - -import json -import tempfile -from pathlib import Path - -import pytest -from power_steering_checker import PowerSteeringChecker, PowerSteeringRedirect - - -class TestRedirectPersistence: - """Test redirect save and load operations.""" - - def test_save_redirect(self): - """Test saving a redirect creates proper JSONL file.""" - with tempfile.TemporaryDirectory() as tmpdir: - project_root = Path(tmpdir) - (project_root / ".claude").mkdir() - - checker = PowerSteeringChecker(project_root) - session_id = "test_session_001" - - # Save a redirect - checker._save_redirect( - session_id=session_id, - failed_considerations=["todos_complete", "ci_status"], - continuation_prompt="Please complete TODOs and fix CI", - work_summary="Implemented feature X", - ) - - # Verify file was created - redirects_file = checker._get_redirect_file(session_id) - assert redirects_file.exists() - - # Verify file permissions (owner read/write only) - assert oct(redirects_file.stat().st_mode)[-3:] == "600" - - # Verify content - with open(redirects_file) as f: - line = f.readline().strip() - data = json.loads(line) - - assert data["redirect_number"] == 1 - assert data["failed_considerations"] == ["todos_complete", "ci_status"] - assert data["continuation_prompt"] == "Please complete TODOs and fix CI" - assert data["work_summary"] == "Implemented feature X" - assert "timestamp" in data - - def test_save_multiple_redirects(self): - """Test saving multiple redirects increments redirect_number.""" - with tempfile.TemporaryDirectory() as tmpdir: - project_root = Path(tmpdir) - (project_root / ".claude").mkdir() - - checker = PowerSteeringChecker(project_root) - session_id = "test_session_002" - - # Save first redirect - checker._save_redirect( - session_id=session_id, - failed_considerations=["todos_complete"], - continuation_prompt="Complete TODOs", - ) - - # Save second redirect - checker._save_redirect( - session_id=session_id, - failed_considerations=["ci_status"], - continuation_prompt="Fix CI", - ) - - # Verify both redirects exist with correct numbers - redirects = checker._load_redirects(session_id) - assert len(redirects) == 2 - assert redirects[0].redirect_number == 1 - assert redirects[1].redirect_number == 2 - - def test_load_redirects(self): - """Test loading redirects works correctly.""" - with tempfile.TemporaryDirectory() as tmpdir: - project_root = Path(tmpdir) - (project_root / ".claude").mkdir() - - checker = PowerSteeringChecker(project_root) - session_id = "test_session_003" - - # Save some redirects - checker._save_redirect( - session_id=session_id, - failed_considerations=["todos_complete"], - continuation_prompt="Complete TODOs", - work_summary="Did some work", - ) - - checker._save_redirect( - session_id=session_id, - failed_considerations=["ci_status", "local_testing"], - continuation_prompt="Fix CI and run tests", - ) - - # Load redirects - redirects = checker._load_redirects(session_id) - - assert len(redirects) == 2 - assert isinstance(redirects[0], PowerSteeringRedirect) - assert isinstance(redirects[1], PowerSteeringRedirect) - assert redirects[0].failed_considerations == ["todos_complete"] - assert redirects[1].failed_considerations == ["ci_status", "local_testing"] - - -class TestRedirectEdgeCases: - """Test edge cases and error handling.""" - - def test_empty_redirects(self): - """Test loading when no redirects file exists.""" - with tempfile.TemporaryDirectory() as tmpdir: - project_root = Path(tmpdir) - (project_root / ".claude").mkdir() - - checker = PowerSteeringChecker(project_root) - session_id = "nonexistent_session" - - # Should return empty list, not error - redirects = checker._load_redirects(session_id) - assert redirects == [] - - def test_malformed_jsonl(self): - """Test loading skips malformed JSONL lines.""" - with tempfile.TemporaryDirectory() as tmpdir: - project_root = Path(tmpdir) - runtime_dir = project_root / ".claude" / "runtime" / "power-steering" - runtime_dir.mkdir(parents=True) - - session_id = "test_malformed" - session_dir = runtime_dir / session_id - session_dir.mkdir() - redirects_file = session_dir / "redirects.jsonl" - - # Write mixed valid and invalid JSONL - with open(redirects_file, "w") as f: - # Valid line - f.write( - json.dumps( - { - "redirect_number": 1, - "timestamp": "2024-01-01T00:00:00", - "failed_considerations": ["test"], - "continuation_prompt": "test", - } - ) - + "\n" - ) - # Malformed JSON - f.write("not valid json\n") - # Empty line - f.write("\n") - # Another valid line - f.write( - json.dumps( - { - "redirect_number": 2, - "timestamp": "2024-01-01T00:01:00", - "failed_considerations": ["test2"], - "continuation_prompt": "test2", - } - ) - + "\n" - ) - - checker = PowerSteeringChecker(project_root) - redirects = checker._load_redirects(session_id) - - # Should load only valid entries - assert len(redirects) == 2 - assert redirects[0].redirect_number == 1 - assert redirects[1].redirect_number == 2 - - def test_missing_required_fields(self): - """Test loading skips entries with missing required fields.""" - with tempfile.TemporaryDirectory() as tmpdir: - project_root = Path(tmpdir) - runtime_dir = project_root / ".claude" / "runtime" / "power-steering" - runtime_dir.mkdir(parents=True) - - session_id = "test_missing_fields" - session_dir = runtime_dir / session_id - session_dir.mkdir() - redirects_file = session_dir / "redirects.jsonl" - - # Write JSONL with missing fields - with open(redirects_file, "w") as f: - # Missing continuation_prompt (required) - f.write( - json.dumps( - { - "redirect_number": 1, - "timestamp": "2024-01-01T00:00:00", - "failed_considerations": ["test"], - } - ) - + "\n" - ) - # Valid entry - f.write( - json.dumps( - { - "redirect_number": 2, - "timestamp": "2024-01-01T00:01:00", - "failed_considerations": ["test2"], - "continuation_prompt": "test2", - } - ) - + "\n" - ) - - checker = PowerSteeringChecker(project_root) - redirects = checker._load_redirects(session_id) - - # Should skip entry with missing field - assert len(redirects) == 1 - assert redirects[0].redirect_number == 2 - - def test_save_redirect_fails_gracefully(self): - """Test saving redirect fails gracefully (fail-open).""" - with tempfile.TemporaryDirectory() as tmpdir: - project_root = Path(tmpdir) - (project_root / ".claude").mkdir() - - checker = PowerSteeringChecker(project_root) - - # Use invalid session ID (contains path traversal) - session_id = "../../../etc/passwd" - - # Should not raise exception (fail-open) - checker._save_redirect( - session_id=session_id, - failed_considerations=["test"], - continuation_prompt="test", - ) - - # No exception = test passes - - -class TestRedirectFormatting: - """Test formatting redirects for display.""" - - def test_format_redirects_context(self): - """Test formatting redirect history for context.""" - from claude_reflection import format_redirects_context - - redirects = [ - { - "redirect_number": 1, - "timestamp": "2024-01-01T10:00:00", - "failed_considerations": ["todos_complete", "ci_status"], - "continuation_prompt": "Please complete TODOs and fix CI", - }, - { - "redirect_number": 2, - "timestamp": "2024-01-01T10:30:00", - "failed_considerations": ["local_testing"], - "continuation_prompt": "Run local tests", - }, - ] - - result = format_redirects_context(redirects) - - # Verify structure - assert "## Power-Steering Redirect History" in result - assert "Redirect #1" in result - assert "Redirect #2" in result - assert "todos_complete, ci_status" in result - assert "local_testing" in result - assert "Please complete TODOs and fix CI" in result - assert "Run local tests" in result - - def test_format_redirects_context_single_redirect(self): - """Test formatting with single redirect uses correct plural.""" - from claude_reflection import format_redirects_context - - redirects = [ - { - "redirect_number": 1, - "timestamp": "2024-01-01T10:00:00", - "failed_considerations": ["todos_complete"], - "continuation_prompt": "Complete TODOs", - } - ] - - result = format_redirects_context(redirects) - - # Should use singular form (after fix) - assert "1 power-steering redirect" in result - - def test_format_redirects_context_multiple_redirects(self): - """Test formatting with multiple redirects uses correct plural.""" - from claude_reflection import format_redirects_context - - redirects = [ - { - "redirect_number": 1, - "timestamp": "2024-01-01T10:00:00", - "failed_considerations": ["todos_complete"], - "continuation_prompt": "Complete TODOs", - }, - { - "redirect_number": 2, - "timestamp": "2024-01-01T10:30:00", - "failed_considerations": ["ci_status"], - "continuation_prompt": "Fix CI", - }, - ] - - result = format_redirects_context(redirects) - - # Should use plural form - assert "2 power-steering redirects" in result - - def test_format_redirects_context_empty(self): - """Test formatting with empty redirects list.""" - from claude_reflection import format_redirects_context - - result = format_redirects_context(None) - assert result == "" - - result = format_redirects_context([]) - assert result == "" - - -class TestRedirectIntegration: - """Integration tests for redirect flow.""" - - def test_redirect_saved_on_block_decision(self): - """Test that redirects are saved when session is blocked.""" - with tempfile.TemporaryDirectory() as tmpdir: - project_root = Path(tmpdir) - (project_root / ".claude").mkdir() - - # Create minimal transcript with incomplete TODOs and file operations - # to avoid Q&A session detection - transcript_file = project_root / "transcript.jsonl" - with open(transcript_file, "w") as f: - # User request - f.write( - json.dumps( - { - "type": "user", - "message": {"content": "Implement feature X"}, - } - ) - + "\n" - ) - # Assistant creates TODOs - f.write( - json.dumps( - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "id": "tool_1", - "name": "TodoWrite", - "input": { - "todos": [ - { - "content": "Write code", - "status": "pending", - "activeForm": "Writing code", - } - ] - }, - }, - { - "type": "tool_use", - "id": "tool_2", - "name": "Write", - "input": { - "file_path": "/tmp/test.py", - "content": "print('hello')", - }, - }, - ] - }, - } - ) - + "\n" - ) - # Tool results - f.write( - json.dumps( - { - "type": "tool_result", - "message": {"tool_use_id": "tool_1", "content": "TODOs created"}, - } - ) - + "\n" - ) - f.write( - json.dumps( - { - "type": "tool_result", - "message": {"tool_use_id": "tool_2", "content": "File written"}, - } - ) - + "\n" - ) - - checker = PowerSteeringChecker(project_root) - session_id = "test_integration_001" - - # Run check (should block due to incomplete TODOs) - result = checker.check(transcript_file, session_id) - - # Verify blocked - assert result.decision == "block" - assert "todos_complete" in result.reasons - - # Verify redirect was saved - redirects = checker._load_redirects(session_id) - assert len(redirects) == 1 - assert "todos_complete" in redirects[0].failed_considerations - - def test_no_redirect_saved_on_approve_decision(self): - """Test that no redirects are saved when session is approved.""" - with tempfile.TemporaryDirectory() as tmpdir: - project_root = Path(tmpdir) - (project_root / ".claude").mkdir() - - # Create minimal transcript with Q&A pattern (will be approved) - transcript_file = project_root / "transcript.jsonl" - with open(transcript_file, "w") as f: - f.write( - json.dumps( - { - "type": "user", - "message": {"content": "What is Python?"}, - } - ) - + "\n" - ) - f.write( - json.dumps( - { - "type": "assistant", - "message": { - "content": [ - {"type": "text", "text": "Python is a programming language"} - ] - }, - } - ) - + "\n" - ) - - checker = PowerSteeringChecker(project_root) - session_id = "test_integration_002" - - # Run check (should approve as Q&A) - result = checker.check(transcript_file, session_id) - - # Verify approved - assert result.decision == "approve" - - # Verify no redirects saved - redirects_file = checker._get_redirect_file(session_id) - assert not redirects_file.exists() - - -if __name__ == "__main__": - pytest.main([__file__, "-v"]) diff --git a/amplifier-bundle/tools/amplihack/hooks/tests/test_power_steering_shutdown.py b/amplifier-bundle/tools/amplihack/hooks/tests/test_power_steering_shutdown.py deleted file mode 100644 index 3097d91f2..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/tests/test_power_steering_shutdown.py +++ /dev/null @@ -1,623 +0,0 @@ -#!/usr/bin/env python3 -""" -TDD Tests for Power Steering Shutdown Fix - -Tests power steering's graceful shutdown behavior when AMPLIHACK_SHUTDOWN_IN_PROGRESS -environment variable is set. These tests follow TDD principles - written BEFORE -implementation to define expected behavior. - -Testing Pyramid Distribution: -- 60% Unit Tests: Individual function behavior with shutdown flag -- 30% Integration Tests: Complete shutdown sequence across all functions -- 10% E2E Tests: Exit timing and regression prevention - -Philosophy: -- Ruthlessly Simple: Clear, focused tests with single responsibilities -- Zero-BS: All tests work, no stubs or placeholders -- Fail-Open: Shutdown behavior returns safe defaults that never block users -""" - -import os - -# Import the functions we're testing -import sys -import time -from pathlib import Path -from unittest.mock import AsyncMock, MagicMock, patch - -import pytest - -sys.path.insert(0, str(Path(__file__).parent.parent)) - -from claude_power_steering import ( - analyze_claims_sync, - analyze_consideration_sync, - analyze_if_addressed_sync, - is_shutting_down, -) - -# ============================================================================= -# UNIT TESTS (60%) -# ============================================================================= - - -class TestIsShuttingDown: - """Unit tests for shutdown detection helper function. - - Tests the core helper that checks AMPLIHACK_SHUTDOWN_IN_PROGRESS env var. - This is the foundational function that all sync wrappers depend on. - """ - - def test_returns_true_when_env_var_set(self): - """Should return True when AMPLIHACK_SHUTDOWN_IN_PROGRESS=1""" - # ARRANGE - os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - - try: - # ACT - result = is_shutting_down() - - # ASSERT - assert result is True, "Should detect shutdown when env var is '1'" - finally: - # CLEANUP - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - def test_returns_false_when_env_var_not_set(self): - """Should return False when AMPLIHACK_SHUTDOWN_IN_PROGRESS not set""" - # ARRANGE - ensure env var is not set - if "AMPLIHACK_SHUTDOWN_IN_PROGRESS" in os.environ: - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - # ACT - result = is_shutting_down() - - # ASSERT - assert result is False, "Should not detect shutdown when env var absent" - - def test_returns_false_when_env_var_set_to_zero(self): - """Should return False when AMPLIHACK_SHUTDOWN_IN_PROGRESS=0""" - # ARRANGE - os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "0" - - try: - # ACT - result = is_shutting_down() - - # ASSERT - assert result is False, "Should not detect shutdown when env var is '0'" - finally: - # CLEANUP - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - def test_returns_false_when_env_var_set_to_empty_string(self): - """Should return False when AMPLIHACK_SHUTDOWN_IN_PROGRESS=''""" - # ARRANGE - os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "" - - try: - # ACT - result = is_shutting_down() - - # ASSERT - assert result is False, "Should not detect shutdown when env var is empty" - finally: - # CLEANUP - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - -class TestAnalyzeClaimsSyncShutdown: - """Unit tests for analyze_claims_sync during shutdown. - - Tests that analyze_claims_sync returns empty list immediately during - shutdown without starting async operations. - """ - - def test_returns_empty_list_during_shutdown(self): - """Should return [] immediately when shutting down""" - # ARRANGE - os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - delta_text = "Task complete! All tests passing." - project_root = Path.cwd() - - try: - # ACT - start_time = time.time() - result = analyze_claims_sync(delta_text, project_root) - elapsed = time.time() - start_time - - # ASSERT - assert result == [], "Should return empty list during shutdown" - assert elapsed < 0.1, f"Should return immediately (<100ms), took {elapsed * 1000:.1f}ms" - finally: - # CLEANUP - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - @patch("claude_power_steering.analyze_claims") - def test_does_not_call_async_during_shutdown(self, mock_analyze_claims): - """Should not invoke async analyze_claims when shutting down""" - # ARRANGE - os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - mock_analyze_claims.return_value = AsyncMock(return_value=["claim"]) - - try: - # ACT - result = analyze_claims_sync("some text", Path.cwd()) - - # ASSERT - assert result == [], "Should return [] without calling async" - mock_analyze_claims.assert_not_called() - finally: - # CLEANUP - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - @patch("claude_power_steering.analyze_claims") - @patch("claude_power_steering.asyncio.run") - def test_calls_async_during_normal_operation(self, mock_asyncio_run, mock_analyze_claims): - """Should call async analyze_claims during normal operation""" - # ARRANGE - ensure NOT shutting down - if "AMPLIHACK_SHUTDOWN_IN_PROGRESS" in os.environ: - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - mock_asyncio_run.return_value = ["detected claim"] - - # ACT - result = analyze_claims_sync("Task complete!", Path.cwd()) - - # ASSERT - mock_asyncio_run.assert_called_once() - assert result == ["detected claim"], "Should return async result during normal operation" - - -class TestAnalyzeIfAddressedSyncShutdown: - """Unit tests for analyze_if_addressed_sync during shutdown. - - Tests that analyze_if_addressed_sync returns None immediately during - shutdown without starting async operations. - """ - - def test_returns_none_during_shutdown(self): - """Should return None immediately when shutting down""" - # ARRANGE - os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - failure_id = "todos_complete" - failure_reason = "3 TODOs remain incomplete" - delta_text = "Completed all TODOs" - project_root = Path.cwd() - - try: - # ACT - start_time = time.time() - result = analyze_if_addressed_sync(failure_id, failure_reason, delta_text, project_root) - elapsed = time.time() - start_time - - # ASSERT - assert result is None, "Should return None during shutdown" - assert elapsed < 0.1, f"Should return immediately (<100ms), took {elapsed * 1000:.1f}ms" - finally: - # CLEANUP - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - @patch("claude_power_steering.analyze_if_addressed") - def test_does_not_call_async_during_shutdown(self, mock_analyze_if_addressed): - """Should not invoke async analyze_if_addressed when shutting down""" - # ARRANGE - os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - mock_analyze_if_addressed.return_value = AsyncMock(return_value="evidence") - - try: - # ACT - result = analyze_if_addressed_sync( - "todos_complete", "3 TODOs incomplete", "Completed all TODOs", Path.cwd() - ) - - # ASSERT - assert result is None, "Should return None without calling async" - mock_analyze_if_addressed.assert_not_called() - finally: - # CLEANUP - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - @patch("claude_power_steering.analyze_if_addressed") - @patch("claude_power_steering.asyncio.run") - def test_calls_async_during_normal_operation(self, mock_asyncio_run, mock_analyze_if_addressed): - """Should call async analyze_if_addressed during normal operation""" - # ARRANGE - ensure NOT shutting down - if "AMPLIHACK_SHUTDOWN_IN_PROGRESS" in os.environ: - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - mock_asyncio_run.return_value = "Completed all TODOs via git commit" - - # ACT - result = analyze_if_addressed_sync( - "todos_complete", "3 TODOs incomplete", "Completed all TODOs", Path.cwd() - ) - - # ASSERT - mock_asyncio_run.assert_called_once() - assert result == "Completed all TODOs via git commit" - - -class TestAnalyzeConsiderationSyncShutdown: - """Unit tests for analyze_consideration_sync during shutdown. - - Tests that analyze_consideration_sync returns (True, None) immediately - during shutdown without starting async operations. - """ - - def test_returns_satisfied_tuple_during_shutdown(self): - """Should return (True, None) immediately when shutting down""" - # ARRANGE - os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - conversation = [{"role": "user", "content": "Hello"}] - consideration = { - "id": "tests_passing", - "question": "Are all tests passing?", - "description": "Verify test suite passes", - "category": "Quality", - } - project_root = Path.cwd() - - try: - # ACT - start_time = time.time() - satisfied, reason = analyze_consideration_sync( - conversation, consideration, project_root - ) - elapsed = time.time() - start_time - - # ASSERT - assert satisfied is True, "Should return satisfied=True during shutdown" - assert reason is None, "Should return reason=None during shutdown" - assert elapsed < 0.1, f"Should return immediately (<100ms), took {elapsed * 1000:.1f}ms" - finally: - # CLEANUP - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - @patch("claude_power_steering.analyze_consideration") - def test_does_not_call_async_during_shutdown(self, mock_analyze_consideration): - """Should not invoke async analyze_consideration when shutting down""" - # ARRANGE - os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - mock_analyze_consideration.return_value = AsyncMock(return_value=(False, "reason")) - - try: - # ACT - satisfied, reason = analyze_consideration_sync( - [{"role": "user", "content": "Hello"}], - {"id": "test", "question": "Test?"}, - Path.cwd(), - ) - - # ASSERT - assert satisfied is True, "Should return satisfied=True" - assert reason is None, "Should return reason=None" - mock_analyze_consideration.assert_not_called() - finally: - # CLEANUP - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - @patch("claude_power_steering.analyze_consideration") - @patch("claude_power_steering.asyncio.run") - def test_calls_async_during_normal_operation( - self, mock_asyncio_run, mock_analyze_consideration - ): - """Should call async analyze_consideration during normal operation""" - # ARRANGE - ensure NOT shutting down - if "AMPLIHACK_SHUTDOWN_IN_PROGRESS" in os.environ: - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - mock_asyncio_run.return_value = (False, "Tests are failing") - - # ACT - satisfied, reason = analyze_consideration_sync( - [{"role": "user", "content": "Hello"}], - {"id": "tests_passing", "question": "Tests pass?"}, - Path.cwd(), - ) - - # ASSERT - mock_asyncio_run.assert_called_once() - assert satisfied is False - assert reason == "Tests are failing" - - -# ============================================================================= -# INTEGRATION TESTS (30%) -# ============================================================================= - - -class TestShutdownSequenceIntegration: - """Integration tests for complete shutdown sequence. - - Tests that all three sync wrapper functions behave correctly during - shutdown, ensuring fail-open behavior across the entire power steering - system. - """ - - def test_all_sync_wrappers_return_safe_defaults_during_shutdown(self): - """All sync wrappers should return safe defaults when shutting down""" - # ARRANGE - os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - project_root = Path.cwd() - conversation = [{"role": "user", "content": "Test"}] - consideration = {"id": "test", "question": "Test?"} - - try: - # ACT - claims = analyze_claims_sync("Task complete!", project_root) - evidence = analyze_if_addressed_sync( - "todos_complete", "3 TODOs", "Completed", project_root - ) - satisfied, reason = analyze_consideration_sync( - conversation, consideration, project_root - ) - - # ASSERT - assert claims == [], "analyze_claims_sync returns []" - assert evidence is None, "analyze_if_addressed_sync returns None" - assert satisfied is True, "analyze_consideration_sync returns satisfied=True" - assert reason is None, "analyze_consideration_sync returns reason=None" - finally: - # CLEANUP - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - def test_fail_open_behavior_prevents_user_blocking(self): - """Fail-open defaults should never block user from exiting. - - Tests that returned values follow fail-open philosophy: - - Empty claims list = no completion claims detected - - None evidence = no evidence of addressing failure - - (True, None) = consideration assumed satisfied - - All of these allow power steering to proceed without blocking. - """ - # ARRANGE - os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - - try: - # ACT - claims = analyze_claims_sync("Critical bug found!", Path.cwd()) - evidence = analyze_if_addressed_sync( - "critical_check", "Must fix", "Ignored it", Path.cwd() - ) - satisfied, reason = analyze_consideration_sync( - [{"role": "user", "content": "Bug exists"}], - {"id": "no_bugs", "question": "Are there bugs?"}, - Path.cwd(), - ) - - # ASSERT - Verify fail-open behavior - assert claims == [], "No claims detected during shutdown (fail-open)" - assert evidence is None, "No evidence found during shutdown (fail-open)" - assert satisfied is True, "Consideration satisfied during shutdown (fail-open)" - assert reason is None, "No blocking reason during shutdown (fail-open)" - - # PHILOSOPHY CHECK: These values should never trigger a blocking message - # in power steering's exit prevention logic - finally: - # CLEANUP - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - def test_shutdown_sequence_completes_within_budget(self): - """Complete shutdown sequence should finish within 1 second. - - Integration test that verifies the complete shutdown sequence - (all three sync wrappers) completes fast enough to support the - 2-3 second target exit time. - """ - # ARRANGE - os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - project_root = Path.cwd() - conversation = [{"role": "user", "content": "Test"}] - consideration = {"id": "test", "question": "Test?"} - - try: - # ACT - start_time = time.time() - - # Simulate complete shutdown sequence - _ = analyze_claims_sync("Done!", project_root) - _ = analyze_if_addressed_sync("todos", "incomplete", "fixed", project_root) - _ = analyze_consideration_sync(conversation, consideration, project_root) - - elapsed = time.time() - start_time - - # ASSERT - assert elapsed < 1.0, ( - f"Shutdown sequence took {elapsed:.2f}s, should be <1.0s " - f"to support 2-3s exit target" - ) - finally: - # CLEANUP - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - @patch("claude_power_steering.analyze_claims") - @patch("claude_power_steering.analyze_if_addressed") - @patch("claude_power_steering.analyze_consideration") - def test_no_async_operations_started_during_shutdown( - self, mock_consideration, mock_if_addressed, mock_claims - ): - """No async functions should be called when shutting down. - - Integration test verifying that shutdown bypasses ALL async - operations across all sync wrapper functions. - """ - # ARRANGE - os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - - try: - # ACT - Call all sync wrappers - analyze_claims_sync("text", Path.cwd()) - analyze_if_addressed_sync("id", "reason", "delta", Path.cwd()) - analyze_consideration_sync( - [{"role": "user", "content": "test"}], - {"id": "test", "question": "test?"}, - Path.cwd(), - ) - - # ASSERT - No async functions called - mock_claims.assert_not_called() - mock_if_addressed.assert_not_called() - mock_consideration.assert_not_called() - finally: - # CLEANUP - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - -# ============================================================================= -# E2E TESTS (10%) -# ============================================================================= - - -class TestEndToEndExitTiming: - """End-to-end tests for exit timing and performance. - - Tests that verify the complete user experience: - - Exit completes within 3 seconds during shutdown - - No performance regression during normal operation - """ - - @pytest.mark.slow - def test_exit_completes_within_three_seconds_during_shutdown(self): - """E2E: Complete exit sequence should finish within 3 seconds. - - This test simulates a realistic exit scenario where power steering - performs multiple checks before allowing exit. During shutdown, - all checks should complete within 3 seconds total. - - Target: <3 seconds (user perception threshold for "fast") - Current: ~10-13 seconds without fix (UNACCEPTABLE) - Expected: <3 seconds with shutdown checks (GOOD) - """ - # ARRANGE - os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - project_root = Path.cwd() - - # Simulate realistic power steering check sequence - conversations = [[{"role": "user", "content": f"Conversation {i}"}] for i in range(5)] - considerations = [{"id": f"check_{i}", "question": f"Check {i}?"} for i in range(10)] - delta_texts = [f"Delta text {i}" for i in range(5)] - - try: - # ACT - start_time = time.time() - - # Simulate complete power steering exit sequence - for delta in delta_texts: - _ = analyze_claims_sync(delta, project_root) - - for i, conv in enumerate(conversations): - _ = analyze_if_addressed_sync(f"check_{i}", f"reason_{i}", delta, project_root) - - for conv in conversations: - for consideration in considerations: - _ = analyze_consideration_sync(conv, consideration, project_root) - - elapsed = time.time() - start_time - - # ASSERT - assert elapsed < 3.0, ( - f"Exit sequence took {elapsed:.2f}s, should be <3.0s for good UX. " - f"Without fix: ~10-13s. Target: <3s." - ) - - finally: - # CLEANUP - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - @pytest.mark.slow - @patch("claude_power_steering.CLAUDE_SDK_AVAILABLE", True) - @patch("claude_power_steering.query") - def test_no_timing_regression_during_normal_operation(self, mock_query): - """E2E: Normal operation timing should not regress from shutdown checks. - - Verifies that adding is_shutting_down() checks does not slow down - normal operation. The check is O(1) env var lookup, should add - negligible overhead (<1ms per call). - """ - # ARRANGE - ensure NOT shutting down - if "AMPLIHACK_SHUTDOWN_IN_PROGRESS" in os.environ: - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - # Mock SDK to return quickly (isolate shutdown check overhead) - async def mock_query_response(*args, **kwargs): - yield MagicMock(text="SATISFIED: All good") - - mock_query.return_value = mock_query_response() - - # ACT - start_time = time.time() - - # Run single operation 100 times to measure overhead - for _ in range(100): - _ = is_shutting_down() - - elapsed = time.time() - start_time - avg_per_call = (elapsed / 100) * 1000 # Convert to milliseconds - - # ASSERT - assert avg_per_call < 1.0, ( - f"is_shutting_down() took {avg_per_call:.2f}ms average, " - f"should be <1ms (env var lookup is O(1))" - ) - - @pytest.mark.slow - def test_repeated_shutdown_checks_do_not_accumulate_delay(self): - """E2E: Multiple shutdown checks should not accumulate delays. - - Tests that calling sync wrappers repeatedly during shutdown - maintains consistent fast performance without degradation. - """ - # ARRANGE - os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - project_root = Path.cwd() - - try: - # ACT - Call sync wrappers 50 times each - start_time = time.time() - - for _ in range(50): - analyze_claims_sync("text", project_root) - analyze_if_addressed_sync("id", "reason", "delta", project_root) - analyze_consideration_sync( - [{"role": "user", "content": "test"}], - {"id": "test", "question": "test?"}, - project_root, - ) - - elapsed = time.time() - start_time - - # ASSERT - 150 calls (50 * 3 functions) should complete quickly - assert elapsed < 1.0, ( - f"150 shutdown checks took {elapsed:.2f}s, should be <1.0s. " - f"Each check should be <7ms average." - ) - - finally: - # CLEANUP - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - -# ============================================================================= -# TEST CONFIGURATION -# ============================================================================= - - -@pytest.fixture(autouse=True) -def cleanup_env_var(): - """Ensure AMPLIHACK_SHUTDOWN_IN_PROGRESS is cleaned up after each test. - - This fixture runs automatically for every test to prevent test pollution - from env var state leaking between tests. - """ - yield - # Cleanup after test - if "AMPLIHACK_SHUTDOWN_IN_PROGRESS" in os.environ: - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - -# Mark slow tests for optional execution -def pytest_configure(config): - """Register custom pytest markers""" - config.addinivalue_line("markers", "slow: marks tests as slow (run with 'pytest -m slow')") diff --git a/amplifier-bundle/tools/amplihack/hooks/tests/test_precommit_installer.py b/amplifier-bundle/tools/amplihack/hooks/tests/test_precommit_installer.py deleted file mode 100644 index e6229ce36..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/tests/test_precommit_installer.py +++ /dev/null @@ -1,460 +0,0 @@ -#!/usr/bin/env python3 -""" -Tests for precommit_installer hook. - -Testing pyramid: -- 60% Unit tests (fast, heavily mocked) -- 30% Integration tests (multiple components) -- 10% E2E tests (complete workflows) -""" - -import os -import subprocess -import sys -import tempfile -import unittest -from pathlib import Path -from unittest.mock import MagicMock, Mock, call, patch - -# Add hooks directory to path -sys.path.insert(0, str(Path(__file__).parent.parent)) -from precommit_installer import PrecommitInstallerHook - - -class TestEnvironmentDisable(unittest.TestCase): - """Test environment variable disabling (Unit - 60%).""" - - def setUp(self): - """Set up test hook instance.""" - self.hook = PrecommitInstallerHook() - - def test_disabled_with_zero(self): - """Test AMPLIHACK_AUTO_PRECOMMIT=0 disables hook.""" - with patch.dict(os.environ, {"AMPLIHACK_AUTO_PRECOMMIT": "0"}): - self.assertTrue(self.hook._is_env_disabled()) - - def test_disabled_with_false(self): - """Test AMPLIHACK_AUTO_PRECOMMIT=false disables hook.""" - with patch.dict(os.environ, {"AMPLIHACK_AUTO_PRECOMMIT": "false"}): - self.assertTrue(self.hook._is_env_disabled()) - - def test_disabled_with_no(self): - """Test AMPLIHACK_AUTO_PRECOMMIT=no disables hook.""" - with patch.dict(os.environ, {"AMPLIHACK_AUTO_PRECOMMIT": "no"}): - self.assertTrue(self.hook._is_env_disabled()) - - def test_disabled_with_off(self): - """Test AMPLIHACK_AUTO_PRECOMMIT=off disables hook.""" - with patch.dict(os.environ, {"AMPLIHACK_AUTO_PRECOMMIT": "off"}): - self.assertTrue(self.hook._is_env_disabled()) - - def test_disabled_case_insensitive(self): - """Test environment variable is case insensitive.""" - with patch.dict(os.environ, {"AMPLIHACK_AUTO_PRECOMMIT": "FALSE"}): - self.assertTrue(self.hook._is_env_disabled()) - - def test_enabled_with_other_values(self): - """Test other values don't disable hook.""" - with patch.dict(os.environ, {"AMPLIHACK_AUTO_PRECOMMIT": "1"}): - self.assertFalse(self.hook._is_env_disabled()) - with patch.dict(os.environ, {"AMPLIHACK_AUTO_PRECOMMIT": "true"}): - self.assertFalse(self.hook._is_env_disabled()) - - def test_enabled_when_not_set(self): - """Test hook enabled when environment variable not set.""" - with patch.dict(os.environ, {}, clear=True): - self.assertFalse(self.hook._is_env_disabled()) - - -class TestPrecommitAvailability(unittest.TestCase): - """Test pre-commit availability checking (Unit - 60%).""" - - def setUp(self): - """Set up test hook instance.""" - self.hook = PrecommitInstallerHook() - - def test_precommit_available(self): - """Test detection when pre-commit is available.""" - with patch("subprocess.run") as mock_run: - mock_run.return_value = Mock( - returncode=0, - stdout="pre-commit 3.5.0\n", - stderr="", - ) - result = self.hook._is_precommit_available() - - self.assertTrue(result["available"]) - self.assertEqual(result["version"], "pre-commit 3.5.0") - - def test_precommit_not_found(self): - """Test handling when pre-commit command not found.""" - with patch("subprocess.run", side_effect=FileNotFoundError()): - result = self.hook._is_precommit_available() - - self.assertFalse(result["available"]) - self.assertIn("not found in PATH", result["error"]) - - def test_precommit_timeout(self): - """Test handling when pre-commit check times out.""" - with patch("subprocess.run", side_effect=subprocess.TimeoutExpired("pre-commit", 5)): - result = self.hook._is_precommit_available() - - self.assertFalse(result["available"]) - self.assertIn("timed out", result["error"]) - - def test_precommit_os_error(self): - """Test handling of OS errors.""" - with patch("subprocess.run", side_effect=OSError("Disk error")): - result = self.hook._is_precommit_available() - - self.assertFalse(result["available"]) - self.assertIn("OS error", result["error"]) - - def test_precommit_nonzero_exit(self): - """Test handling when pre-commit returns non-zero exit code.""" - with patch("subprocess.run") as mock_run: - mock_run.return_value = Mock( - returncode=1, - stdout="", - stderr="error", - ) - result = self.hook._is_precommit_available() - - self.assertFalse(result["available"]) - self.assertIn("returned 1", result["error"]) - - -class TestHooksInstalled(unittest.TestCase): - """Test hook installation detection (Unit - 60%).""" - - def setUp(self): - """Set up test hook instance and temp directory.""" - self.hook = PrecommitInstallerHook() - self.temp_dir = tempfile.mkdtemp() - self.hook.project_root = Path(self.temp_dir) - self.hooks_dir = Path(self.temp_dir) / ".git" / "hooks" - self.hooks_dir.mkdir(parents=True, exist_ok=True) - self.hook_file = self.hooks_dir / "pre-commit" - - def tearDown(self): - """Clean up temp directory.""" - import shutil - - shutil.rmtree(self.temp_dir, ignore_errors=True) - - def test_hooks_not_installed_file_missing(self): - """Test detection when hook file doesn't exist.""" - result = self.hook._are_hooks_installed() - self.assertFalse(result["installed"]) - - def test_hooks_installed_valid_precommit(self): - """Test detection when valid pre-commit hook exists.""" - self.hook_file.write_text( - "#!/usr/bin/env python3\n" - "# This is a pre-commit hook\n" - "import sys\n" - "from pre-commit import main\n" - "sys.exit(main())\n" - ) - result = self.hook._are_hooks_installed() - self.assertTrue(result["installed"]) - - def test_hooks_corrupted_too_small(self): - """Test detection of corrupted hook (too small).""" - self.hook_file.write_text("#!/bin/sh\n") - result = self.hook._are_hooks_installed() - - self.assertFalse(result["installed"]) - self.assertTrue(result.get("corrupted", False)) - - def test_hooks_corrupted_not_precommit(self): - """Test detection when hook is not pre-commit managed.""" - self.hook_file.write_text( - "#!/bin/bash\n" - "# Custom hook that has nothing to do with pre-commit\n" - "echo 'Running custom validation'\n" - "exit 0\n" - ) - result = self.hook._are_hooks_installed() - - self.assertFalse(result["installed"]) - self.assertTrue(result.get("corrupted", False)) - - def test_hooks_permission_error(self): - """Test handling of permission errors.""" - self.hook_file.write_text("content") - with patch.object(Path, "read_text", side_effect=PermissionError()): - result = self.hook._are_hooks_installed() - - self.assertFalse(result["installed"]) - self.assertIn("Permission denied", result["error"]) - - def test_hooks_unicode_error(self): - """Test handling of invalid text encoding.""" - self.hook_file.write_bytes(b"\xff\xfe\x00\x00invalid") - result = self.hook._are_hooks_installed() - - self.assertFalse(result["installed"]) - self.assertTrue(result.get("corrupted", False)) - - -class TestInstallHooks(unittest.TestCase): - """Test hook installation (Unit - 60%).""" - - def setUp(self): - """Set up test hook instance.""" - self.hook = PrecommitInstallerHook() - - def test_install_success(self): - """Test successful hook installation.""" - with patch("subprocess.run") as mock_run: - mock_run.return_value = Mock( - returncode=0, - stdout="pre-commit installed at .git/hooks/pre-commit\n", - stderr="", - ) - result = self.hook._install_hooks() - - self.assertTrue(result["success"]) - self.assertNotIn("error", result) - - def test_install_permission_denied(self): - """Test handling of permission errors during installation.""" - with patch("subprocess.run") as mock_run: - mock_run.return_value = Mock( - returncode=1, - stdout="", - stderr="Permission denied writing to .git/hooks", - ) - result = self.hook._install_hooks() - - self.assertFalse(result["success"]) - self.assertIn("Permission denied", result["error"]) - - def test_install_network_error(self): - """Test handling of network errors during installation.""" - with patch("subprocess.run") as mock_run: - mock_run.return_value = Mock( - returncode=1, - stdout="", - stderr="Network connection failed downloading hooks", - ) - result = self.hook._install_hooks() - - self.assertFalse(result["success"]) - self.assertIn("Network error", result["error"]) - - def test_install_invalid_config(self): - """Test handling of invalid config file.""" - with patch("subprocess.run") as mock_run: - mock_run.return_value = Mock( - returncode=1, - stdout="", - stderr="Invalid YAML in .pre-commit-config.yaml", - ) - result = self.hook._install_hooks() - - self.assertFalse(result["success"]) - self.assertIn("Invalid .pre-commit-config.yaml", result["error"]) - - def test_install_timeout(self): - """Test handling of installation timeout.""" - with patch("subprocess.run", side_effect=subprocess.TimeoutExpired("pre-commit", 30)): - result = self.hook._install_hooks() - - self.assertFalse(result["success"]) - self.assertIn("timed out", result["error"]) - - def test_install_file_not_found(self): - """Test handling when pre-commit not found.""" - with patch("subprocess.run", side_effect=FileNotFoundError()): - result = self.hook._install_hooks() - - self.assertFalse(result["success"]) - self.assertIn("not found", result["error"]) - - def test_install_os_error(self): - """Test handling of OS errors during installation.""" - with patch("subprocess.run", side_effect=OSError("Disk full")): - result = self.hook._install_hooks() - - self.assertFalse(result["success"]) - self.assertIn("OS error", result["error"]) - - -class TestProcessWorkflow(unittest.TestCase): - """Test complete process workflow (Integration - 30%).""" - - def setUp(self): - """Set up test hook instance.""" - self.hook = PrecommitInstallerHook() - self.hook.project_root = Path(tempfile.mkdtemp()) - self.hook.log = MagicMock() - self.hook.save_metric = MagicMock() - - def tearDown(self): - """Clean up temp directory.""" - import shutil - - shutil.rmtree(self.hook.project_root, ignore_errors=True) - - def test_process_env_disabled(self): - """Test process early exit when disabled via env.""" - with patch.dict(os.environ, {"AMPLIHACK_AUTO_PRECOMMIT": "0"}): - result = self.hook.process({}) - - self.assertEqual(result, {}) - self.hook.save_metric.assert_called_with("precommit_env_disabled", True) - - def test_process_not_git_repo(self): - """Test process early exit when not a git repo.""" - result = self.hook.process({}) - - self.assertEqual(result, {}) - self.hook.save_metric.assert_called_with("precommit_not_git_repo", True) - - def test_process_no_config(self): - """Test process early exit when no config file.""" - git_dir = self.hook.project_root / ".git" - git_dir.mkdir() - - result = self.hook.process({}) - - self.assertEqual(result, {}) - self.hook.save_metric.assert_called_with("precommit_no_config", True) - - def test_process_precommit_not_available(self): - """Test process when pre-commit not available.""" - git_dir = self.hook.project_root / ".git" - git_dir.mkdir() - config_file = self.hook.project_root / ".pre-commit-config.yaml" - config_file.write_text("repos: []") - - with patch.object( - self.hook, - "_is_precommit_available", - return_value={"available": False, "error": "not found"}, - ): - result = self.hook.process({}) - - self.assertEqual(result, {}) - self.hook.save_metric.assert_called_with("precommit_available", False) - - def test_process_hooks_already_installed(self): - """Test process when hooks already installed.""" - git_dir = self.hook.project_root / ".git" - git_dir.mkdir() - config_file = self.hook.project_root / ".pre-commit-config.yaml" - config_file.write_text("repos: []") - - with patch.object( - self.hook, - "_is_precommit_available", - return_value={"available": True, "version": "3.5.0"}, - ): - with patch.object( - self.hook, - "_are_hooks_installed", - return_value={"installed": True}, - ): - result = self.hook.process({}) - - self.assertEqual(result, {}) - self.hook.save_metric.assert_called_with("precommit_already_installed", True) - - def test_process_successful_install(self): - """Test complete successful installation workflow.""" - git_dir = self.hook.project_root / ".git" - git_dir.mkdir() - config_file = self.hook.project_root / ".pre-commit-config.yaml" - config_file.write_text("repos: []") - - with patch.object( - self.hook, - "_is_precommit_available", - return_value={"available": True, "version": "3.5.0"}, - ): - with patch.object( - self.hook, - "_are_hooks_installed", - return_value={"installed": False}, - ): - with patch.object( - self.hook, - "_install_hooks", - return_value={"success": True}, - ): - result = self.hook.process({}) - - self.assertEqual(result, {}) - self.hook.save_metric.assert_called_with("precommit_installed", True) - - def test_process_failed_install(self): - """Test workflow when installation fails.""" - git_dir = self.hook.project_root / ".git" - git_dir.mkdir() - config_file = self.hook.project_root / ".pre-commit-config.yaml" - config_file.write_text("repos: []") - - with patch.object( - self.hook, - "_is_precommit_available", - return_value={"available": True, "version": "3.5.0"}, - ): - with patch.object( - self.hook, - "_are_hooks_installed", - return_value={"installed": False}, - ): - with patch.object( - self.hook, - "_install_hooks", - return_value={"success": False, "error": "Permission denied"}, - ): - result = self.hook.process({}) - - self.assertEqual(result, {}) - # Check both metrics were saved - calls = self.hook.save_metric.call_args_list - self.assertIn(call("precommit_installed", False), calls) - self.assertIn(call("precommit_install_error", "Permission denied"), calls) - - def test_process_graceful_exception_handling(self): - """Test that exceptions are handled gracefully.""" - git_dir = self.hook.project_root / ".git" - git_dir.mkdir() - config_file = self.hook.project_root / ".pre-commit-config.yaml" - config_file.write_text("repos: []") - - with patch.object( - self.hook, - "_is_precommit_available", - side_effect=Exception("Unexpected error"), - ): - result = self.hook.process({}) - - self.assertEqual(result, {}) - # Should log error and save metric - self.hook.log.assert_called() - self.hook.save_metric.assert_called() - - -class TestEndToEnd(unittest.TestCase): - """Test complete end-to-end scenarios (E2E - 10%).""" - - def test_main_entry_point(self): - """Test main() entry point executes without errors.""" - from precommit_installer import main - - with patch("precommit_installer.PrecommitInstallerHook") as mock_hook_class: - mock_instance = MagicMock() - mock_hook_class.return_value = mock_instance - - main() - - mock_hook_class.assert_called_once() - mock_instance.run.assert_called_once() - - -if __name__ == "__main__": - unittest.main() diff --git a/amplifier-bundle/tools/amplihack/hooks/tests/test_sdk_integration.py b/amplifier-bundle/tools/amplihack/hooks/tests/test_sdk_integration.py deleted file mode 100644 index 2fc29f61e..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/tests/test_sdk_integration.py +++ /dev/null @@ -1,248 +0,0 @@ -#!/usr/bin/env python3 -""" -Test script to verify Claude SDK integration with power-steering mode. - -This script tests that: -1. Claude SDK is available and can be imported -2. The analyze_consideration function works correctly -3. The integration with PowerSteeringChecker functions properly -4. Fallback to heuristic checkers works when SDK fails -""" - -import sys -import tempfile -from pathlib import Path - -# Add parent directory to path -sys.path.insert(0, str(Path(__file__).parent.parent)) - -from claude_power_steering import ( - CLAUDE_SDK_AVAILABLE, - _format_consideration_prompt, - _format_conversation_summary, - analyze_consideration_sync, -) -from power_steering_checker import SDK_AVAILABLE, PowerSteeringChecker - - -def test_sdk_availability(): - """Test that Claude SDK is available.""" - print("Testing SDK availability...") - print(f" CLAUDE_SDK_AVAILABLE: {CLAUDE_SDK_AVAILABLE}") - print(f" SDK_AVAILABLE (from checker): {SDK_AVAILABLE}") - - if CLAUDE_SDK_AVAILABLE: - print(" ✓ Claude SDK is available") - return True - print(" ⚠ Claude SDK is NOT available (will use heuristic fallback)") - return False - - -def test_conversation_formatting(): - """Test conversation summary formatting.""" - print("\nTesting conversation formatting...") - - conversation = [ - {"type": "user", "message": {"content": "Implement feature X"}}, - { - "type": "assistant", - "message": { - "content": [ - {"type": "text", "text": "I'll implement feature X for you."}, - {"type": "tool_use", "name": "Write", "id": "1"}, - ] - }, - }, - {"type": "tool_result", "message": {"content": "File written"}}, - ] - - summary = _format_conversation_summary(conversation) - print(f" Formatted {len(conversation)} messages") - print(f" Summary length: {len(summary)} chars") - - # Check that key elements are present - assert "user" in summary.lower() or "message" in summary.lower() - assert "assistant" in summary.lower() or "message" in summary.lower() - assert "feature" in summary.lower() or "implement" in summary.lower() - - print(" ✓ Conversation formatting works") - return True - - -def test_prompt_formatting(): - """Test consideration prompt formatting.""" - print("\nTesting prompt formatting...") - - consideration = { - "id": "test_check", - "question": "Were all TODO items completed?", - "description": "Verify TodoWrite has all items marked complete", - "category": "Completion", - } - - conversation = [ - {"type": "user", "message": {"content": "Complete all TODOs"}}, - {"type": "assistant", "message": {"content": "All TODOs are now complete"}}, - ] - - prompt = _format_consideration_prompt(consideration, conversation) - print(f" Prompt length: {len(prompt)} chars") - - # Check that key elements are present - assert "TODO items completed" in prompt - assert "SATISFIED" in prompt - assert "NOT SATISFIED" in prompt - - print(" ✓ Prompt formatting works") - return True - - -def test_sdk_analysis(): - """Test actual SDK analysis (if SDK available).""" - if not CLAUDE_SDK_AVAILABLE: - print("\nSkipping SDK analysis test (SDK not available)") - return True - - print("\nTesting SDK analysis...") - - consideration = { - "id": "local_testing", - "question": "Were tests run locally?", - "description": "Check if pytest or similar test command was executed", - "category": "Testing", - } - - # Conversation WITH tests - conversation_with_tests = [ - {"type": "user", "message": {"content": "Run the tests"}}, - { - "type": "assistant", - "message": {"content": [{"type": "text", "text": "Running pytest..."}]}, - }, - { - "type": "tool_result", - "message": {"content": "pytest output: 10 passed, 0 failed"}, - }, - ] - - # Conversation WITHOUT tests - conversation_without_tests = [ - {"type": "user", "message": {"content": "Fix the bug"}}, - {"type": "assistant", "message": {"content": [{"type": "text", "text": "Fixed"}]}}, - ] - - temp_dir = Path(tempfile.mkdtemp()) - - try: - print(" Testing WITH tests in transcript...") - result_with = analyze_consideration_sync(conversation_with_tests, consideration, temp_dir) - print(f" Result: {'SATISFIED' if result_with else 'NOT SATISFIED'}") - - print(" Testing WITHOUT tests in transcript...") - result_without = analyze_consideration_sync( - conversation_without_tests, consideration, temp_dir - ) - print(f" Result: {'SATISFIED' if result_without else 'NOT SATISFIED'}") - - # We expect the SDK to correctly identify presence/absence of tests - # Note: SDK might still return True (fail-open) if it's unsure - print(" ✓ SDK analysis completed successfully") - return True - - except Exception as e: - print(f" ✗ SDK analysis failed: {e}") - return False - - -def test_integration_with_checker(): - """Test integration with PowerSteeringChecker.""" - print("\nTesting integration with PowerSteeringChecker...") - - temp_dir = Path(tempfile.mkdtemp()) - (temp_dir / ".claude" / "tools" / "amplihack").mkdir(parents=True) - (temp_dir / ".claude" / "runtime" / "power-steering").mkdir(parents=True, exist_ok=True) - - # Create considerations YAML with a specific checker - yaml_path = temp_dir / ".claude" / "tools" / "amplihack" / "considerations.yaml" - yaml_content = """ -- id: todos_complete - category: Completion - question: Were all TODO items completed? - description: Check TodoWrite for completion - severity: blocker - checker: _check_todos_complete - enabled: true -""" - yaml_path.write_text(yaml_content) - - checker = PowerSteeringChecker(temp_dir) - - # Transcript with completed TODOs - transcript = [ - {"type": "user", "message": {"content": "Complete the task"}}, - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "TodoWrite", - "input": {"todos": [{"status": "completed"}]}, - } - ] - }, - }, - ] - - analysis = checker._analyze_considerations(transcript, "test_session") - - print(f" Analysis completed with {len(analysis.results)} results") - - # Note: Results might be empty if SDK is unavailable or checker method doesn't exist - # This is expected behavior (fail-open), not a failure - if len(analysis.results) > 0: - if "todos_complete" in analysis.results: - print(f" todos_complete satisfied: {analysis.results['todos_complete'].satisfied}") - else: - print(f" Available results: {list(analysis.results.keys())}") - else: - print(" No results returned (expected when SDK unavailable or checker method missing)") - - print(" ✓ Integration with PowerSteeringChecker works (no errors)") - return True - - -def main(): - """Run all tests.""" - print("=" * 70) - print("Claude SDK Integration Test Suite") - print("=" * 70) - - results = [] - - results.append(("SDK Availability", test_sdk_availability())) - results.append(("Conversation Formatting", test_conversation_formatting())) - results.append(("Prompt Formatting", test_prompt_formatting())) - results.append(("SDK Analysis", test_sdk_analysis())) - results.append(("Checker Integration", test_integration_with_checker())) - - print("\n" + "=" * 70) - print("Test Results:") - print("=" * 70) - - for test_name, passed in results: - status = "✓ PASS" if passed else "✗ FAIL" - print(f" {status}: {test_name}") - - all_passed = all(passed for _, passed in results) - print("=" * 70) - - if all_passed: - print("✅ All tests passed!") - return 0 - print("❌ Some tests failed") - return 1 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/amplifier-bundle/tools/amplihack/hooks/tests/test_security_features.py b/amplifier-bundle/tools/amplihack/hooks/tests/test_security_features.py deleted file mode 100644 index fdb020213..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/tests/test_security_features.py +++ /dev/null @@ -1,311 +0,0 @@ -#!/usr/bin/env python3 -""" -Tests for security features added to power-steering. - -Tests cover: -- Path validation helper -- Config integrity checks -- Checker timeouts -""" - -import json -import sys -from pathlib import Path - -try: - import pytest -except ImportError: - pytest = None - -sys.path.insert(0, str(Path(__file__).parent.parent)) - -from power_steering_checker import PowerSteeringChecker - - -class TestPathValidation: - """Test path validation security feature.""" - - def test_validate_path_within_allowed(self, tmp_path): - """Test path validation for path within allowed directory.""" - checker = PowerSteeringChecker(tmp_path) - - # Create a path within project root - safe_path = tmp_path / "subdir" / "file.txt" - - assert checker._validate_path(safe_path, tmp_path) is True - - def test_validate_path_outside_allowed(self, tmp_path): - """Test path validation rejects paths outside allowed directory.""" - checker = PowerSteeringChecker(tmp_path) - - # Create a path outside project root - unsafe_path = Path("/etc/passwd") - - assert checker._validate_path(unsafe_path, tmp_path) is False - - def test_validate_path_with_symlink_escape(self, tmp_path): - """Test path validation prevents symlink escapes.""" - checker = PowerSteeringChecker(tmp_path) - - # Create a symlink that points outside - link_dir = tmp_path / "link_dir" - link_dir.mkdir() - symlink = link_dir / "escape" - - try: - symlink.symlink_to("/tmp") - # Path validation should allow /tmp (it's a whitelisted temp directory) - result = checker._validate_path(symlink, tmp_path) - # Should be True because /tmp is allowed - assert result is True - except OSError: - # Symlink creation might fail on some systems - pytest.skip("Symlink creation not supported") - - def test_validate_path_in_home_directory(self, tmp_path): - """Test path validation allows paths within user's home directory.""" - checker = PowerSteeringChecker(tmp_path) - - # Create a path in user's home directory - home_path = Path.home() / ".claude" / "projects" / "test" / "session.jsonl" - - # Should allow paths in user's home directory - assert checker._validate_path(home_path, tmp_path) is True - - def test_validate_path_claude_projects_directory(self, tmp_path): - """Test path validation allows Claude Code transcript paths.""" - checker = PowerSteeringChecker(tmp_path) - - # Simulate Claude Code's typical transcript location - claude_projects_path = ( - Path.home() / ".claude" / "projects" / "-home-user-project" / "session-id.jsonl" - ) - - # This is the primary fix - Claude Code transcripts should be allowed - assert checker._validate_path(claude_projects_path, tmp_path) is True - - def test_validate_path_temp_directories(self, tmp_path): - """Test path validation allows common temp directories.""" - checker = PowerSteeringChecker(tmp_path) - - # Test /tmp - tmp_file = Path("/tmp") / "test_transcript.jsonl" - assert checker._validate_path(tmp_file, tmp_path) is True - - # Test /var/tmp - var_tmp_file = Path("/var/tmp") / "test_transcript.jsonl" - assert checker._validate_path(var_tmp_file, tmp_path) is True - - def test_validate_path_truly_outside_all_allowed(self, tmp_path): - """Test path validation rejects paths outside all allowed locations.""" - checker = PowerSteeringChecker(tmp_path) - - # Try a path that's definitely outside: - # - Not in project root - # - Not in user's home directory - # - Not in temp directories - # This should fail unless the path is somehow under home or temp - unsafe_path = Path("/etc/passwd") - - # Only assert False if /etc is truly outside home - home = Path.home().resolve() - etc_path = unsafe_path.resolve() - try: - etc_path.relative_to(home) - # If /etc is somehow under home (unlikely), skip test - pytest.skip("/etc is under home directory on this system") - except ValueError: - # /etc is outside home - should be rejected - assert checker._validate_path(unsafe_path, tmp_path) is False - - def test_load_transcript_validates_path(self, tmp_path): - """Test that _load_transcript validates transcript path.""" - checker = PowerSteeringChecker(tmp_path) - - # Try to load a transcript that's truly outside all allowed locations - # Need a path outside: project root, home directory, and temp directories - unsafe_transcript = Path("/etc/passwd") - - # Only test if /etc is truly outside home - home = Path.home().resolve() - try: - unsafe_transcript.resolve().relative_to(home) - pytest.skip("/etc is under home directory on this system") - except ValueError: - # /etc is outside home - should raise error - with pytest.raises(ValueError, match="outside project root"): - checker._load_transcript(unsafe_transcript) - - -class TestConfigIntegrity: - """Test config integrity validation.""" - - def test_validate_config_valid(self, tmp_path): - """Test config validation with valid config.""" - checker = PowerSteeringChecker(tmp_path) - - valid_config = { - "enabled": True, - "phase": 2, - "checkers_enabled": {"todos_complete": True, "ci_status": False}, - } - - assert checker._validate_config_integrity(valid_config) is True - - def test_validate_config_missing_enabled(self, tmp_path): - """Test config validation rejects missing 'enabled' key.""" - checker = PowerSteeringChecker(tmp_path) - - invalid_config = {"phase": 2} - - assert checker._validate_config_integrity(invalid_config) is False - - def test_validate_config_wrong_enabled_type(self, tmp_path): - """Test config validation rejects non-boolean 'enabled'.""" - checker = PowerSteeringChecker(tmp_path) - - invalid_config = {"enabled": "true"} # String instead of bool - - assert checker._validate_config_integrity(invalid_config) is False - - def test_validate_config_wrong_phase_type(self, tmp_path): - """Test config validation rejects non-integer 'phase'.""" - checker = PowerSteeringChecker(tmp_path) - - invalid_config = {"enabled": True, "phase": "2"} # String instead of int - - assert checker._validate_config_integrity(invalid_config) is False - - def test_validate_config_invalid_checkers_enabled(self, tmp_path): - """Test config validation rejects invalid checkers_enabled.""" - checker = PowerSteeringChecker(tmp_path) - - # Non-dict checkers_enabled - invalid_config = {"enabled": True, "checkers_enabled": []} - - assert checker._validate_config_integrity(invalid_config) is False - - def test_validate_config_checkers_enabled_non_bool_values(self, tmp_path): - """Test config validation rejects non-boolean values in checkers_enabled.""" - checker = PowerSteeringChecker(tmp_path) - - invalid_config = { - "enabled": True, - "checkers_enabled": {"todos_complete": "yes"}, # String instead of bool - } - - assert checker._validate_config_integrity(invalid_config) is False - - def test_load_config_uses_validation(self, tmp_path): - """Test that _load_config uses integrity validation.""" - # Create config file with invalid content - config_path = tmp_path / ".claude" / "tools" / "amplihack" / ".power_steering_config" - config_path.parent.mkdir(parents=True, exist_ok=True) - - invalid_config = {"enabled": "not_a_boolean"} - config_path.write_text(json.dumps(invalid_config)) - - checker = PowerSteeringChecker(tmp_path) - - # Should fall back to defaults due to failed validation - assert checker.config["enabled"] is True # Default value - - -class TestCheckerTimeouts: - """Test checker timeout mechanism.""" - - def test_timeout_context_manager(self): - """Test that timeout context manager works.""" - import time - - from power_steering_checker import _timeout - - # This should NOT timeout - try: - with _timeout(2): - time.sleep(0.1) - success = True - except TimeoutError: - success = False - - assert success is True - - def test_timeout_context_manager_triggers(self): - """Test that timeout context manager triggers on long operations.""" - import time - - from power_steering_checker import _timeout - - # This SHOULD timeout - with pytest.raises(TimeoutError): - with _timeout(1): - time.sleep(5) - - def test_checker_timeout_in_analyze(self, tmp_path): - """Test that checker timeout is applied during analysis.""" - checker = PowerSteeringChecker(tmp_path) - - # Create a mock slow checker - def slow_checker(transcript, session_id): - import time - - time.sleep(15) # Exceeds CHECKER_TIMEOUT - return True - - # Monkey-patch a checker to be slow - checker._check_todos_complete = slow_checker - - # Create minimal transcript - transcript = [{"type": "user", "message": {"content": "test"}}] - - # Run analysis - should timeout but NOT raise exception (fail-open) - analysis = checker._analyze_considerations(transcript, "test_session") - - # Check that the checker result shows timeout (satisfied=True due to fail-open) - result = analysis.results.get("todos_complete") - if result: - assert result.satisfied is True - assert "Timeout" in result.reason or result.satisfied - - -class TestFilePermissions: - """Test that files are created with correct permissions.""" - - def test_semaphore_permissions(self, tmp_path): - """Test that semaphore files have 0o600 permissions.""" - checker = PowerSteeringChecker(tmp_path) - - checker._mark_complete("test_session") - - semaphore = checker.runtime_dir / ".test_session_completed" - assert semaphore.exists() - - # Check permissions (mask with 0o777 to ignore file type bits) - perms = semaphore.stat().st_mode & 0o777 - assert perms == 0o600 - - def test_summary_permissions(self, tmp_path): - """Test that summary files have 0o644 permissions.""" - checker = PowerSteeringChecker(tmp_path) - - checker._write_summary("test_session", "Test summary content") - - summary_file = checker.runtime_dir / "test_session" / "summary.md" - assert summary_file.exists() - - # Check permissions - perms = summary_file.stat().st_mode & 0o777 - assert perms == 0o644 - - def test_log_file_permissions(self, tmp_path): - """Test that log files have 0o600 permissions.""" - checker = PowerSteeringChecker(tmp_path) - - checker._log("Test log message") - - log_file = checker.runtime_dir / "power_steering.log" - assert log_file.exists() - - # Check permissions - perms = log_file.stat().st_mode & 0o777 - assert perms == 0o600 diff --git a/amplifier-bundle/tools/amplihack/hooks/tests/test_session_classification.py b/amplifier-bundle/tools/amplihack/hooks/tests/test_session_classification.py deleted file mode 100644 index 05fa7b3ab..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/tests/test_session_classification.py +++ /dev/null @@ -1,1472 +0,0 @@ -#!/usr/bin/env python3 -""" -Failing tests for Power Steering Session Classification (Issue #1492). - -Tests session type detection and selective consideration application -to prevent false positives for non-development sessions. - -Session Types: -1. DEVELOPMENT - Full workflow (PR, CI/CD, testing, reviews) -2. INFORMATIONAL - Q&A, help, capability queries -3. MAINTENANCE - Cleanup, docs, config updates -4. INVESTIGATION - Research, exploration, analysis - -Test-Driven Development: -- All tests written to FAIL initially -- Tests define expected behavior -- Implementation will make tests pass -""" - -import json -import sys -import tempfile -import unittest -from pathlib import Path - -sys.path.insert(0, str(Path(__file__).parent.parent)) - -from power_steering_checker import PowerSteeringChecker - - -class TestSessionClassification(unittest.TestCase): - """Tests for session type classification.""" - - def setUp(self): - """Set up test fixtures.""" - self.temp_dir = tempfile.mkdtemp() - self.project_root = Path(self.temp_dir) - - # Create directory structure - (self.project_root / ".claude" / "tools" / "amplihack").mkdir(parents=True, exist_ok=True) - (self.project_root / ".claude" / "runtime" / "power-steering").mkdir( - parents=True, exist_ok=True - ) - - # Create default config - config_path = ( - self.project_root / ".claude" / "tools" / "amplihack" / ".power_steering_config" - ) - config = { - "enabled": True, - "version": "2.1.0", - "phase": 2, - } - config_path.write_text(json.dumps(config, indent=2)) - - # Initialize checker - self.checker = PowerSteeringChecker(self.project_root) - - def tearDown(self): - """Clean up test fixtures.""" - import shutil - - shutil.rmtree(self.temp_dir) - - # ======================================================================== - # Session Type Detection Tests - # ======================================================================== - - def test_detect_development_session_with_pr_and_ci(self): - """DEVELOPMENT: PR creation + code changes + tests + CI checks.""" - transcript = [ - { - "type": "user", - "message": {"content": "Add authentication feature to the API"}, - }, - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Write", - "input": {"file_path": "src/auth.py"}, - }, - ] - }, - }, - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Bash", - "input": {"command": "pytest tests/test_auth.py"}, - }, - ] - }, - }, - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Bash", - "input": {"command": "gh pr create --title 'Add auth'"}, - }, - ] - }, - }, - ] - - session_type = self.checker.detect_session_type(transcript) - self.assertEqual(session_type, "DEVELOPMENT") - - def test_detect_informational_session_qa_only(self): - """INFORMATIONAL: Q&A with no tool usage.""" - transcript = [ - { - "type": "user", - "message": {"content": "What skills do you have available?"}, - }, - { - "type": "assistant", - "message": { - "content": [ - { - "type": "text", - "text": "I have the following skills: analyzer, builder...", - }, - ] - }, - }, - { - "type": "user", - "message": {"content": "What slash commands are available?"}, - }, - { - "type": "assistant", - "message": { - "content": [ - { - "type": "text", - "text": "Available commands: /ultrathink, /analyze...", - }, - ] - }, - }, - ] - - session_type = self.checker.detect_session_type(transcript) - self.assertEqual(session_type, "INFORMATIONAL") - - def test_detect_maintenance_session_docs_and_config(self): - """MAINTENANCE: Documentation and configuration updates only.""" - transcript = [ - { - "type": "user", - "message": {"content": "Update README with new installation instructions"}, - }, - { - "type": "assistant", - "message": { - "content": [ - {"type": "tool_use", "name": "Edit", "input": {"file_path": "README.md"}}, - ] - }, - }, - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Edit", - "input": {"file_path": ".github/workflows/ci.yml"}, - }, - ] - }, - }, - ] - - session_type = self.checker.detect_session_type(transcript) - self.assertEqual(session_type, "MAINTENANCE") - - def test_detect_investigation_session_read_only(self): - """INVESTIGATION: Read-only exploration with analysis.""" - transcript = [ - { - "type": "user", - "message": {"content": "Investigate why authentication is failing"}, - }, - { - "type": "assistant", - "message": { - "content": [ - {"type": "tool_use", "name": "Read", "input": {"file_path": "src/auth.py"}}, - ] - }, - }, - { - "type": "assistant", - "message": { - "content": [ - {"type": "tool_use", "name": "Grep", "input": {"pattern": "auth_token"}}, - ] - }, - }, - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Bash", - "input": {"command": "git log --grep='auth' -5"}, - }, - ] - }, - }, - { - "type": "assistant", - "message": { - "content": [ - { - "type": "text", - "text": "Analysis complete: The issue is in token validation...", - }, - ] - }, - }, - ] - - session_type = self.checker.detect_session_type(transcript) - self.assertEqual(session_type, "INVESTIGATION") - - def test_detect_development_session_without_pr(self): - """DEVELOPMENT: Code changes and tests but no PR yet.""" - transcript = [ - { - "type": "user", - "message": {"content": "Fix the login bug"}, - }, - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Edit", - "input": {"file_path": "src/login.py"}, - }, - ] - }, - }, - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Bash", - "input": {"command": "pytest tests/test_login.py"}, - }, - ] - }, - }, - ] - - session_type = self.checker.detect_session_type(transcript) - self.assertEqual(session_type, "DEVELOPMENT") - - def test_detect_informational_session_with_read_tools(self): - """INFORMATIONAL: Q&A with Read tools but no modifications.""" - transcript = [ - { - "type": "user", - "message": {"content": "What does the auth module do?"}, - }, - { - "type": "assistant", - "message": { - "content": [ - {"type": "tool_use", "name": "Read", "input": {"file_path": "src/auth.py"}}, - ] - }, - }, - { - "type": "assistant", - "message": { - "content": [ - { - "type": "text", - "text": "The auth module provides JWT token validation...", - }, - ] - }, - }, - ] - - session_type = self.checker.detect_session_type(transcript) - self.assertEqual(session_type, "INFORMATIONAL") - - # ======================================================================== - # Selective Consideration Application Tests - # ======================================================================== - - def test_informational_session_skips_pr_checks(self): - """INFORMATIONAL: Should skip all PR-related considerations.""" - transcript = [ - { - "type": "user", - "message": {"content": "Explain how this works"}, - }, - { - "type": "assistant", - "message": { - "content": [ - {"type": "text", "text": "Here's how it works..."}, - ] - }, - }, - ] - - # Save transcript - transcript_path = self.project_root / "transcript.jsonl" - with open(transcript_path, "w") as f: - for msg in transcript: - f.write(json.dumps(msg) + "\n") - - result = self.checker.check(transcript_path, "test_session") - - # Should approve without checking PR considerations - self.assertEqual(result.decision, "approve") - - # Verify PR checks were not applied - analysis = self.checker._analyze_considerations(transcript, "test_session") - pr_checks = [ - "unrelated_changes", - "root_pollution", - "pr_description", - "review_responses", - ] - - for check_id in pr_checks: - if check_id in analysis.results: - # If checked, should be satisfied (not blocking) - self.assertTrue(analysis.results[check_id].satisfied) - - def test_informational_session_skips_ci_checks(self): - """INFORMATIONAL: Should skip CI/CD considerations.""" - transcript = [ - { - "type": "user", - "message": {"content": "What's the current project status?"}, - }, - { - "type": "assistant", - "message": { - "content": [ - {"type": "text", "text": "Current status: 5 open issues..."}, - ] - }, - }, - ] - - transcript_path = self.project_root / "transcript.jsonl" - with open(transcript_path, "w") as f: - for msg in transcript: - f.write(json.dumps(msg) + "\n") - - result = self.checker.check(transcript_path, "test_session") - - # Should approve without CI checks - self.assertEqual(result.decision, "approve") - - analysis = self.checker._analyze_considerations(transcript, "test_session") - ci_checks = ["ci_status", "branch_rebase", "ci_precommit_mismatch"] - - for check_id in ci_checks: - if check_id in analysis.results: - self.assertTrue(analysis.results[check_id].satisfied) - - def test_informational_session_skips_testing_checks(self): - """INFORMATIONAL: Should skip testing considerations.""" - transcript = [ - { - "type": "user", - "message": {"content": "Show me the test coverage"}, - }, - { - "type": "assistant", - "message": { - "content": [ - {"type": "text", "text": "Current test coverage is 85%..."}, - ] - }, - }, - ] - - transcript_path = self.project_root / "transcript.jsonl" - with open(transcript_path, "w") as f: - for msg in transcript: - f.write(json.dumps(msg) + "\n") - - result = self.checker.check(transcript_path, "test_session") - self.assertEqual(result.decision, "approve") - - analysis = self.checker._analyze_considerations(transcript, "test_session") - test_checks = ["local_testing", "interactive_testing"] - - for check_id in test_checks: - if check_id in analysis.results: - self.assertTrue(analysis.results[check_id].satisfied) - - def test_development_session_applies_all_checks(self): - """DEVELOPMENT: Should apply full workflow checks.""" - transcript = [ - { - "type": "user", - "message": {"content": "Add feature X"}, - }, - { - "type": "assistant", - "message": { - "content": [ - {"type": "tool_use", "name": "Write", "input": {"file_path": "src/new.py"}}, - ] - }, - }, - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "TodoWrite", - "input": { - "todos": [ - { - "content": "Implement X", - "status": "pending", - "activeForm": "...", - }, - ] - }, - }, - ] - }, - }, - ] - - transcript_path = self.project_root / "transcript.jsonl" - with open(transcript_path, "w") as f: - for msg in transcript: - f.write(json.dumps(msg) + "\n") - - result = self.checker.check(transcript_path, "test_session") - - # Should block because TODOs incomplete and tests missing - self.assertEqual(result.decision, "block") - self.assertIn("todos_complete", result.reasons) - - def test_maintenance_session_applies_minimal_checks(self): - """MAINTENANCE: Should apply documentation and organization checks only.""" - transcript = [ - { - "type": "user", - "message": {"content": "Update the README"}, - }, - { - "type": "assistant", - "message": { - "content": [ - {"type": "tool_use", "name": "Edit", "input": {"file_path": "README.md"}}, - ] - }, - }, - ] - - transcript_path = self.project_root / "transcript.jsonl" - with open(transcript_path, "w") as f: - for msg in transcript: - f.write(json.dumps(msg) + "\n") - - result = self.checker.check(transcript_path, "test_session") - - # Should approve (documentation updated, minimal checks) - self.assertEqual(result.decision, "approve") - - def test_investigation_session_applies_documentation_checks(self): - """INVESTIGATION: Should require investigation docs but skip workflow.""" - transcript = [ - { - "type": "user", - "message": {"content": "Investigate the performance issue"}, - }, - { - "type": "assistant", - "message": { - "content": [ - {"type": "tool_use", "name": "Read", "input": {"file_path": "src/main.py"}}, - ] - }, - }, - { - "type": "assistant", - "message": { - "content": [ - { - "type": "text", - "text": "Analysis: The bottleneck is in database queries...", - }, - ] - }, - }, - ] - - transcript_path = self.project_root / "transcript.jsonl" - with open(transcript_path, "w") as f: - for msg in transcript: - f.write(json.dumps(msg) + "\n") - - _ = self.checker.check(transcript_path, "test_session") - - # Should block if investigation not documented - analysis = self.checker._analyze_considerations(transcript, "test_session") - if "investigation_docs" in analysis.results: - # This check should be applied for INVESTIGATION sessions - self.assertIsNotNone(analysis.results["investigation_docs"]) - - # ======================================================================== - # Edge Cases and Boundary Tests - # ======================================================================== - - def test_mixed_session_prioritizes_development(self): - """Mixed session with Q&A and development should be DEVELOPMENT.""" - transcript = [ - { - "type": "user", - "message": {"content": "What does this function do?"}, - }, - { - "type": "assistant", - "message": { - "content": [ - {"type": "text", "text": "This function validates..."}, - ] - }, - }, - { - "type": "user", - "message": {"content": "Fix the bug in it"}, - }, - { - "type": "assistant", - "message": { - "content": [ - {"type": "tool_use", "name": "Edit", "input": {"file_path": "src/code.py"}}, - ] - }, - }, - ] - - session_type = self.checker.detect_session_type(transcript) - self.assertEqual(session_type, "DEVELOPMENT") - - def test_empty_transcript_defaults_to_informational(self): - """Empty transcript should default to INFORMATIONAL (fail-open).""" - transcript = [] - - session_type = self.checker.detect_session_type(transcript) - self.assertEqual(session_type, "INFORMATIONAL") - - def test_single_read_tool_is_informational(self): - """Single Read tool with no follow-up is INFORMATIONAL.""" - transcript = [ - { - "type": "user", - "message": {"content": "Show me the config"}, - }, - { - "type": "assistant", - "message": { - "content": [ - {"type": "tool_use", "name": "Read", "input": {"file_path": "config.json"}}, - ] - }, - }, - ] - - session_type = self.checker.detect_session_type(transcript) - self.assertEqual(session_type, "INFORMATIONAL") - - def test_multiple_reads_with_analysis_is_investigation(self): - """Multiple Read/Grep tools with analysis is INVESTIGATION.""" - transcript = [ - { - "type": "user", - "message": {"content": "Find all uses of deprecated API"}, - }, - { - "type": "assistant", - "message": { - "content": [ - {"type": "tool_use", "name": "Grep", "input": {"pattern": "old_api"}}, - ] - }, - }, - { - "type": "assistant", - "message": { - "content": [ - {"type": "tool_use", "name": "Read", "input": {"file_path": "file1.py"}}, - ] - }, - }, - { - "type": "assistant", - "message": { - "content": [ - {"type": "tool_use", "name": "Read", "input": {"file_path": "file2.py"}}, - ] - }, - }, - ] - - session_type = self.checker.detect_session_type(transcript) - self.assertEqual(session_type, "INVESTIGATION") - - def test_git_commit_cleanup_is_maintenance(self): - """Git commits for cleanup without code changes is MAINTENANCE.""" - transcript = [ - { - "type": "user", - "message": {"content": "Commit the pending changes"}, - }, - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Bash", - "input": {"command": "git add . && git commit -m 'Cleanup'"}, - }, - ] - }, - }, - ] - - session_type = self.checker.detect_session_type(transcript) - self.assertEqual(session_type, "MAINTENANCE") - - # ======================================================================== - # Environment Override Tests - # ======================================================================== - - def test_environment_override_session_type(self): - """AMPLIHACK_SESSION_TYPE env var overrides detection.""" - import os - - transcript = [ - { - "type": "user", - "message": {"content": "Add feature"}, - }, - { - "type": "assistant", - "message": { - "content": [ - {"type": "tool_use", "name": "Write", "input": {"file_path": "src/new.py"}}, - ] - }, - }, - ] - - # Set environment override - os.environ["AMPLIHACK_SESSION_TYPE"] = "INFORMATIONAL" - - try: - session_type = self.checker.detect_session_type(transcript) - self.assertEqual(session_type, "INFORMATIONAL") - finally: - del os.environ["AMPLIHACK_SESSION_TYPE"] - - def test_invalid_environment_override_ignored(self): - """Invalid session type in env var should be ignored.""" - import os - - transcript = [ - { - "type": "user", - "message": {"content": "Show me the docs"}, - }, - ] - - os.environ["AMPLIHACK_SESSION_TYPE"] = "INVALID_TYPE" - - try: - session_type = self.checker.detect_session_type(transcript) - # Should fall back to detection - self.assertEqual(session_type, "INFORMATIONAL") - finally: - del os.environ["AMPLIHACK_SESSION_TYPE"] - - # ======================================================================== - # Backward Compatibility Tests - # ======================================================================== - - def test_backward_compatibility_no_session_type_method(self): - """Old code without detect_session_type should still work.""" - transcript = [ - { - "type": "user", - "message": {"content": "Test"}, - }, - ] - - transcript_path = self.project_root / "transcript.jsonl" - with open(transcript_path, "w") as f: - for msg in transcript: - f.write(json.dumps(msg) + "\n") - - # Should not crash if detect_session_type doesn't exist - result = self.checker.check(transcript_path, "test_session") - self.assertIn(result.decision, ["approve", "block"]) - - def test_existing_qa_detection_still_works(self): - """Existing _is_qa_session method should still function.""" - transcript = [ - { - "type": "user", - "message": {"content": "What is this?"}, - }, - { - "type": "assistant", - "message": { - "content": [ - {"type": "text", "text": "This is..."}, - ] - }, - }, - ] - - is_qa = self.checker._is_qa_session(transcript) - self.assertTrue(is_qa) - - # ======================================================================== - # Session Type Heuristics Tests - # ======================================================================== - - def test_development_indicators_code_file_extensions(self): - """Code file modifications indicate DEVELOPMENT.""" - transcript = [ - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Write", - "input": {"file_path": "src/module.py"}, - }, - ] - }, - }, - ] - - session_type = self.checker.detect_session_type(transcript) - self.assertEqual(session_type, "DEVELOPMENT") - - def test_maintenance_indicators_doc_files_only(self): - """Only .md and .txt modifications indicate MAINTENANCE.""" - transcript = [ - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Edit", - "input": {"file_path": "docs/guide.md"}, - }, - ] - }, - }, - ] - - session_type = self.checker.detect_session_type(transcript) - self.assertEqual(session_type, "MAINTENANCE") - - def test_investigation_indicators_grep_patterns(self): - """Multiple Grep/search operations indicate INVESTIGATION.""" - transcript = [ - { - "type": "assistant", - "message": { - "content": [ - {"type": "tool_use", "name": "Grep", "input": {"pattern": "error"}}, - ] - }, - }, - { - "type": "assistant", - "message": { - "content": [ - {"type": "tool_use", "name": "Grep", "input": {"pattern": "exception"}}, - ] - }, - }, - ] - - session_type = self.checker.detect_session_type(transcript) - self.assertEqual(session_type, "INVESTIGATION") - - def test_informational_indicators_question_marks(self): - """High question density indicates INFORMATIONAL. - - Note: Questions like "How does X work?" are now classified as INVESTIGATION - per issue #1604. This test uses truly informational questions about - capabilities and features rather than system internals. - """ - transcript = [ - { - "type": "user", - # Use informational questions (capabilities/features) not investigation questions - "message": { - "content": "What commands are available? Can you help me? What's the format?" - }, - }, - { - "type": "assistant", - "message": { - "content": [ - { - "type": "text", - "text": "Available commands are... Yes I can help... The format is...", - }, - ] - }, - }, - ] - - session_type = self.checker.detect_session_type(transcript) - self.assertEqual(session_type, "INFORMATIONAL") - - -class TestConsiderationMapping(unittest.TestCase): - """Tests for consideration-to-session-type mapping.""" - - def setUp(self): - """Set up test fixtures.""" - self.temp_dir = tempfile.mkdtemp() - self.project_root = Path(self.temp_dir) - - (self.project_root / ".claude" / "tools" / "amplihack").mkdir(parents=True, exist_ok=True) - (self.project_root / ".claude" / "runtime" / "power-steering").mkdir( - parents=True, exist_ok=True - ) - - config_path = ( - self.project_root / ".claude" / "tools" / "amplihack" / ".power_steering_config" - ) - config = {"enabled": True, "version": "2.1.0", "phase": 2} - config_path.write_text(json.dumps(config, indent=2)) - - self.checker = PowerSteeringChecker(self.project_root) - - def tearDown(self): - """Clean up test fixtures.""" - import shutil - - shutil.rmtree(self.temp_dir) - - def test_get_applicable_considerations_for_development(self): - """DEVELOPMENT sessions should get all considerations.""" - applicable = self.checker.get_applicable_considerations("DEVELOPMENT") - - # Should include all categories - consideration_ids = {c["id"] for c in applicable} - self.assertIn("todos_complete", consideration_ids) - self.assertIn("ci_status", consideration_ids) - self.assertIn("local_testing", consideration_ids) - self.assertIn("pr_description", consideration_ids) - - def test_get_applicable_considerations_for_informational(self): - """INFORMATIONAL sessions should get minimal considerations.""" - applicable = self.checker.get_applicable_considerations("INFORMATIONAL") - - # Should NOT include PR/CI/testing checks - consideration_ids = {c["id"] for c in applicable} - self.assertNotIn("ci_status", consideration_ids) - self.assertNotIn("local_testing", consideration_ids) - self.assertNotIn("pr_description", consideration_ids) - - # Should include completion checks - self.assertIn("objective_completion", consideration_ids) - - def test_get_applicable_considerations_for_maintenance(self): - """MAINTENANCE sessions should get doc and organization checks.""" - applicable = self.checker.get_applicable_considerations("MAINTENANCE") - - consideration_ids = {c["id"] for c in applicable} - - # Should include doc checks - self.assertIn("documentation_updates", consideration_ids) - self.assertIn("docs_organization", consideration_ids) - - # Should NOT include testing/CI - self.assertNotIn("local_testing", consideration_ids) - self.assertNotIn("ci_status", consideration_ids) - - def test_get_applicable_considerations_for_investigation(self): - """INVESTIGATION sessions should get investigation docs check.""" - applicable = self.checker.get_applicable_considerations("INVESTIGATION") - - consideration_ids = {c["id"] for c in applicable} - - # Should require investigation docs - self.assertIn("investigation_docs", consideration_ids) - - # Should NOT include workflow checks - self.assertNotIn("dev_workflow_complete", consideration_ids) - self.assertNotIn("ci_status", consideration_ids) - - -class TestPerformance(unittest.TestCase): - """Performance tests for session classification.""" - - def setUp(self): - """Set up test fixtures.""" - self.temp_dir = tempfile.mkdtemp() - self.project_root = Path(self.temp_dir) - - (self.project_root / ".claude" / "tools" / "amplihack").mkdir(parents=True, exist_ok=True) - (self.project_root / ".claude" / "runtime" / "power-steering").mkdir( - parents=True, exist_ok=True - ) - - config_path = ( - self.project_root / ".claude" / "tools" / "amplihack" / ".power_steering_config" - ) - config = {"enabled": True, "version": "2.1.0", "phase": 2} - config_path.write_text(json.dumps(config, indent=2)) - - self.checker = PowerSteeringChecker(self.project_root) - - def tearDown(self): - """Clean up test fixtures.""" - import shutil - - shutil.rmtree(self.temp_dir) - - def test_classification_performance_under_500ms(self): - """Session type classification should complete in under 500ms.""" - import time - - # Create a realistic transcript with mixed operations - transcript = [] - - # Add 50 messages (realistic session size) - for i in range(50): - # User messages - transcript.append( - { - "type": "user", - "message": {"content": f"Please implement feature {i}"}, - } - ) - - # Assistant responses with tool usage - transcript.append( - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Write", - "input": {"file_path": f"src/module{i}.py"}, - }, - ] - }, - } - ) - - transcript.append( - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Read", - "input": {"file_path": f"src/other{i}.py"}, - }, - ] - }, - } - ) - - # Measure classification time - start_time = time.time() - session_type = self.checker.detect_session_type(transcript) - elapsed_ms = (time.time() - start_time) * 1000 - - # Verify it completed in under 500ms - self.assertLess( - elapsed_ms, 500, f"Classification took {elapsed_ms:.2f}ms, should be < 500ms" - ) - - # Verify correct classification - self.assertEqual(session_type, "DEVELOPMENT") - - def test_classification_performance_large_transcript(self): - """Classification should handle large transcripts efficiently.""" - import time - - # Create a large transcript (200 messages) - transcript = [] - for i in range(200): - transcript.append( - { - "type": "user", - "message": {"content": f"Request {i}"}, - } - ) - transcript.append( - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Read", - "input": {"file_path": f"file{i}.py"}, - }, - ] - }, - } - ) - - # Measure classification time - start_time = time.time() - _ = self.checker.detect_session_type( - transcript - ) # Result not used, just measuring performance - elapsed_ms = (time.time() - start_time) * 1000 - - # Should still complete in reasonable time (under 1 second for large transcript) - self.assertLess( - elapsed_ms, - 1000, - f"Large transcript classification took {elapsed_ms:.2f}ms, should be < 1000ms", - ) - - -class TestInvestigationKeywordDetection(unittest.TestCase): - """Tests for investigation keyword detection (Issue #1604). - - These tests verify that investigation/troubleshooting sessions are correctly - classified based on keywords in user messages, even when tool usage patterns - would suggest a different classification. - """ - - def setUp(self): - """Set up test fixtures.""" - self.temp_dir = tempfile.mkdtemp() - self.project_root = Path(self.temp_dir) - - (self.project_root / ".claude" / "tools" / "amplihack").mkdir(parents=True, exist_ok=True) - (self.project_root / ".claude" / "runtime" / "power-steering").mkdir( - parents=True, exist_ok=True - ) - - config_path = ( - self.project_root / ".claude" / "tools" / "amplihack" / ".power_steering_config" - ) - config = {"enabled": True, "version": "2.1.0", "phase": 2} - config_path.write_text(json.dumps(config, indent=2)) - - self.checker = PowerSteeringChecker(self.project_root) - - def tearDown(self): - """Clean up test fixtures.""" - import shutil - - shutil.rmtree(self.temp_dir) - - def test_investigate_keyword_triggers_investigation(self): - """'Investigate' keyword should classify as INVESTIGATION.""" - transcript = [ - { - "type": "user", - "message": {"content": "Investigate why the SSH connection is failing"}, - }, - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Bash", - "input": {"command": "ssh user@host"}, - }, - ] - }, - }, - ] - - session_type = self.checker.detect_session_type(transcript) - self.assertEqual(session_type, "INVESTIGATION") - - def test_troubleshoot_keyword_triggers_investigation(self): - """'Troubleshoot' keyword should classify as INVESTIGATION.""" - transcript = [ - { - "type": "user", - "message": {"content": "Troubleshoot the deployment failure"}, - }, - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Bash", - "input": {"command": "docker logs app"}, - }, - ] - }, - }, - ] - - session_type = self.checker.detect_session_type(transcript) - self.assertEqual(session_type, "INVESTIGATION") - - def test_diagnose_keyword_triggers_investigation(self): - """'Diagnose' keyword should classify as INVESTIGATION.""" - transcript = [ - { - "type": "user", - "message": {"content": "Diagnose the memory leak issue"}, - }, - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Bash", - "input": {"command": "ps aux | grep python"}, - }, - ] - }, - }, - ] - - session_type = self.checker.detect_session_type(transcript) - self.assertEqual(session_type, "INVESTIGATION") - - def test_debug_keyword_triggers_investigation(self): - """'Debug' keyword should classify as INVESTIGATION.""" - transcript = [ - { - "type": "user", - "message": {"content": "Debug the authentication error"}, - }, - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Read", - "input": {"file_path": "logs/auth.log"}, - }, - ] - }, - }, - ] - - session_type = self.checker.detect_session_type(transcript) - self.assertEqual(session_type, "INVESTIGATION") - - def test_figure_out_phrase_triggers_investigation(self): - """'Figure out' phrase should classify as INVESTIGATION.""" - transcript = [ - { - "type": "user", - "message": {"content": "Figure out why the tests are failing"}, - }, - ] - - session_type = self.checker.detect_session_type(transcript) - self.assertEqual(session_type, "INVESTIGATION") - - def test_why_does_phrase_triggers_investigation(self): - """'Why does' phrase should classify as INVESTIGATION.""" - transcript = [ - { - "type": "user", - "message": {"content": "Why does the API return 500 errors?"}, - }, - ] - - session_type = self.checker.detect_session_type(transcript) - self.assertEqual(session_type, "INVESTIGATION") - - def test_root_cause_phrase_triggers_investigation(self): - """'Root cause' phrase should classify as INVESTIGATION.""" - transcript = [ - { - "type": "user", - "message": {"content": "Find the root cause of the crash"}, - }, - ] - - session_type = self.checker.detect_session_type(transcript) - self.assertEqual(session_type, "INVESTIGATION") - - def test_keyword_takes_priority_over_doc_updates(self): - """Investigation keyword should take priority even with doc updates. - - This is the core issue from #1604 - troubleshooting sessions that - update DISCOVERIES.md should still be classified as INVESTIGATION. - """ - transcript = [ - { - "type": "user", - "message": {"content": "Troubleshoot the SSH connection issue"}, - }, - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Bash", - "input": {"command": "ssh user@host"}, - }, - ] - }, - }, - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Edit", - "input": {"file_path": ".claude/context/DISCOVERIES.md"}, - }, - ] - }, - }, - ] - - session_type = self.checker.detect_session_type(transcript) - self.assertEqual( - session_type, - "INVESTIGATION", - "Troubleshooting session with doc updates should still be INVESTIGATION", - ) - - def test_keyword_takes_priority_over_git_operations(self): - """Investigation keyword should take priority even with git operations.""" - transcript = [ - { - "type": "user", - "message": {"content": "Investigate why the VM connection fails"}, - }, - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Bash", - "input": {"command": "ssh azureuser@vm.example.com"}, - }, - ] - }, - }, - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Bash", - "input": {"command": "git commit -m 'Fix: update SSH key'"}, - }, - ] - }, - }, - ] - - session_type = self.checker.detect_session_type(transcript) - self.assertEqual( - session_type, - "INVESTIGATION", - "Investigation session with git commit should still be INVESTIGATION", - ) - - def test_keyword_detection_case_insensitive(self): - """Keyword detection should be case-insensitive.""" - transcript = [ - { - "type": "user", - "message": {"content": "INVESTIGATE the connection failure"}, - }, - ] - - session_type = self.checker.detect_session_type(transcript) - self.assertEqual(session_type, "INVESTIGATION") - - def test_has_investigation_keywords_helper(self): - """Test the _has_investigation_keywords helper method directly.""" - # With keyword - transcript_with_keyword = [ - { - "type": "user", - "message": {"content": "Investigate the issue"}, - }, - ] - self.assertTrue(self.checker._has_investigation_keywords(transcript_with_keyword)) - - # Without keyword - transcript_without_keyword = [ - { - "type": "user", - "message": {"content": "Add a new feature"}, - }, - ] - self.assertFalse(self.checker._has_investigation_keywords(transcript_without_keyword)) - - def test_keyword_detection_checks_first_5_messages(self): - """Keyword detection should only check first 5 user messages.""" - # Keyword in 6th message should not trigger - transcript = [] - for i in range(6): - transcript.append( - { - "type": "user", - "message": {"content": f"Do something {i}"}, - } - ) - - # Add investigation keyword in 6th user message - transcript[5]["message"]["content"] = "Investigate the issue" - - # Should NOT detect as investigation because keyword is in 6th message - self.assertFalse(self.checker._has_investigation_keywords(transcript)) - - def test_analyze_keyword_triggers_investigation(self): - """'Analyze' keyword should classify as INVESTIGATION.""" - transcript = [ - { - "type": "user", - "message": {"content": "Analyze the performance metrics"}, - }, - ] - - session_type = self.checker.detect_session_type(transcript) - self.assertEqual(session_type, "INVESTIGATION") - - def test_research_keyword_triggers_investigation(self): - """'Research' keyword should classify as INVESTIGATION.""" - transcript = [ - { - "type": "user", - "message": {"content": "Research how authentication works"}, - }, - ] - - session_type = self.checker.detect_session_type(transcript) - self.assertEqual(session_type, "INVESTIGATION") - - def test_explore_keyword_triggers_investigation(self): - """'Explore' keyword should classify as INVESTIGATION.""" - transcript = [ - { - "type": "user", - "message": {"content": "Explore the codebase structure"}, - }, - ] - - session_type = self.checker.detect_session_type(transcript) - self.assertEqual(session_type, "INVESTIGATION") - - def test_understand_keyword_triggers_investigation(self): - """'Understand' keyword should classify as INVESTIGATION.""" - transcript = [ - { - "type": "user", - "message": {"content": "Help me understand how the API works"}, - }, - ] - - session_type = self.checker.detect_session_type(transcript) - self.assertEqual(session_type, "INVESTIGATION") - - def test_explain_keyword_triggers_investigation(self): - """'Explain' keyword should classify as INVESTIGATION.""" - transcript = [ - { - "type": "user", - "message": {"content": "Explain why this test is failing"}, - }, - ] - - session_type = self.checker.detect_session_type(transcript) - self.assertEqual(session_type, "INVESTIGATION") - - def test_how_does_phrase_triggers_investigation(self): - """'How does X work?' phrase should classify as INVESTIGATION. - - This test was added per review feedback to explicitly verify that - questions about how things work are classified as INVESTIGATION. - """ - transcript = [ - { - "type": "user", - "message": {"content": "How does this authentication module work?"}, - }, - ] - - session_type = self.checker.detect_session_type(transcript) - self.assertEqual(session_type, "INVESTIGATION") - - def test_no_false_positive_for_development_task(self): - """Development tasks without keywords should still be DEVELOPMENT.""" - transcript = [ - { - "type": "user", - "message": {"content": "Add JWT authentication to the API"}, - }, - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Write", - "input": {"file_path": "src/auth.py"}, - }, - ] - }, - }, - { - "type": "assistant", - "message": { - "content": [ - { - "type": "tool_use", - "name": "Bash", - "input": {"command": "pytest tests/test_auth.py"}, - }, - ] - }, - }, - ] - - session_type = self.checker.detect_session_type(transcript) - self.assertEqual(session_type, "DEVELOPMENT") - - -if __name__ == "__main__": - unittest.main() diff --git a/amplifier-bundle/tools/amplihack/hooks/tests/test_settings_migrator.py b/amplifier-bundle/tools/amplihack/hooks/tests/test_settings_migrator.py deleted file mode 100644 index 08d0a41d1..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/tests/test_settings_migrator.py +++ /dev/null @@ -1,871 +0,0 @@ -#!/usr/bin/env python3 -""" -Comprehensive tests for settings_migrator.py - -Tests follow TDD pyramid: -- 60% Unit tests (isolated, fast, heavily mocked) -- 30% Integration tests (real filesystem, multiple components) -- 10% E2E tests (complete user scenarios) - -Philosophy: -- Zero-BS: Every test works, no stubs -- Fast execution: All tests complete in seconds -- Clear assertions: Single responsibility per test -- Realistic fixtures: Real-world scenarios -""" - -import json -import sys -from pathlib import Path -from unittest.mock import mock_open, patch - -import pytest - -# Add parent directory to path for imports -sys.path.insert(0, str(Path(__file__).parent.parent)) - -from settings_migrator import ( - SettingsMigrator, - migrate_global_hooks, -) - -# ============================================================================ -# FIXTURES -# ============================================================================ - - -@pytest.fixture -def tmp_project_root(tmp_path): - """Create temporary project root with .claude marker.""" - project_root = tmp_path / "project" - project_root.mkdir() - (project_root / ".claude").mkdir() - return project_root - - -@pytest.fixture -def tmp_home(tmp_path): - """Create temporary home directory.""" - home = tmp_path / "home" - home.mkdir() - return home - - -@pytest.fixture -def global_settings_with_amplihack_stop_hook(tmp_home): - """Global settings with amplihack Stop hook (absolute path).""" - settings = { - "hooks": { - "Stop": [ - { - "hooks": [ - { - "type": "command", - "command": "/home/user/.claude/tools/amplihack/hooks/stop.py", - "timeout": 30000, - } - ] - } - ] - } - } - global_settings = tmp_home / ".claude" / "settings.json" - global_settings.parent.mkdir(parents=True) - global_settings.write_text(json.dumps(settings, indent=2)) - return global_settings - - -@pytest.fixture -def global_settings_with_multiple_amplihack_hooks(tmp_home): - """Global settings with multiple amplihack hooks.""" - settings = { - "hooks": { - "Stop": [ - { - "hooks": [ - { - "type": "command", - "command": "amplihack/hooks/stop.py", - "timeout": 30000, - } - ] - } - ], - "SessionStart": [ - { - "hooks": [ - { - "type": "command", - "command": ".claude/tools/amplihack/hooks/session_start.py", - "timeout": 30000, - } - ] - } - ], - } - } - global_settings = tmp_home / ".claude" / "settings.json" - global_settings.parent.mkdir(parents=True) - global_settings.write_text(json.dumps(settings, indent=2)) - return global_settings - - -@pytest.fixture -def global_settings_with_mixed_hooks(tmp_home): - """Global settings with both amplihack and non-amplihack hooks.""" - settings = { - "hooks": { - "Stop": [ - { - "hooks": [ - { - "type": "command", - "command": "amplihack/hooks/stop.py", - "timeout": 30000, - }, - { - "type": "command", - "command": "/usr/local/bin/custom_hook.py", - "timeout": 30000, - }, - ] - } - ] - } - } - global_settings = tmp_home / ".claude" / "settings.json" - global_settings.parent.mkdir(parents=True) - global_settings.write_text(json.dumps(settings, indent=2)) - return global_settings - - -@pytest.fixture -def global_settings_no_hooks(tmp_home): - """Global settings without any hooks.""" - settings = {"some_setting": "value"} - global_settings = tmp_home / ".claude" / "settings.json" - global_settings.parent.mkdir(parents=True) - global_settings.write_text(json.dumps(settings, indent=2)) - return global_settings - - -@pytest.fixture -def project_settings_exists(tmp_project_root): - """Project settings file exists.""" - settings = { - "hooks": { - "Stop": [ - { - "hooks": [ - { - "type": "command", - "command": ".claude/tools/amplihack/hooks/stop.py", - "timeout": 30000, - } - ] - } - ] - } - } - project_settings = tmp_project_root / ".claude" / "settings.json" - project_settings.write_text(json.dumps(settings, indent=2)) - return project_settings - - -# ============================================================================ -# UNIT TESTS (60% - Fast, heavily mocked) -# ============================================================================ - - -class TestSettingsMigratorInit: - """Test SettingsMigrator initialization.""" - - def test_init_with_explicit_project_root(self, tmp_project_root, tmp_home): - """Initialize with explicit project root.""" - with patch("pathlib.Path.home", return_value=tmp_home): - migrator = SettingsMigrator(tmp_project_root) - - assert migrator.project_root == tmp_project_root - assert migrator.global_settings_path == tmp_home / ".claude" / "settings.json" - assert migrator.project_settings_path == tmp_project_root / ".claude" / "settings.json" - - def test_init_auto_detect_project_root(self, tmp_project_root, tmp_home): - """Initialize with auto-detected project root.""" - with patch("pathlib.Path.home", return_value=tmp_home): - with patch.object( - SettingsMigrator, "_detect_project_root", return_value=tmp_project_root - ): - migrator = SettingsMigrator() - - assert migrator.project_root == tmp_project_root - - -class TestDetectAmplihackHooks: - """Test amplihack hook detection (unit tests with mocking).""" - - def test_detect_stop_hook_absolute_path(self, tmp_project_root, tmp_home): - """Detect Stop hook with absolute path.""" - settings = { - "hooks": { - "Stop": [ - {"hooks": [{"command": "/home/user/.claude/tools/amplihack/hooks/stop.py"}]} - ] - } - } - - with patch("pathlib.Path.home", return_value=tmp_home): - migrator = SettingsMigrator(tmp_project_root) - - with patch("pathlib.Path.exists", return_value=True): - with patch("builtins.open", mock_open(read_data=json.dumps(settings))): - result = migrator.detect_global_amplihack_hooks() - - assert result is True - - def test_detect_stop_hook_relative_path(self, tmp_project_root, tmp_home): - """Detect Stop hook with relative path.""" - settings = {"hooks": {"Stop": [{"hooks": [{"command": "amplihack/hooks/stop.py"}]}]}} - - with patch("pathlib.Path.home", return_value=tmp_home): - migrator = SettingsMigrator(tmp_project_root) - - with patch("pathlib.Path.exists", return_value=True): - with patch("builtins.open", mock_open(read_data=json.dumps(settings))): - result = migrator.detect_global_amplihack_hooks() - - assert result is True - - def test_detect_no_amplihack_hooks(self, tmp_project_root, tmp_home): - """Detect no amplihack hooks present.""" - settings = {"hooks": {"Stop": [{"hooks": [{"command": "/usr/local/bin/custom_hook.py"}]}]}} - - with patch("pathlib.Path.home", return_value=tmp_home): - migrator = SettingsMigrator(tmp_project_root) - - with patch("pathlib.Path.exists", return_value=True): - with patch("builtins.open", mock_open(read_data=json.dumps(settings))): - result = migrator.detect_global_amplihack_hooks() - - assert result is False - - def test_detect_multiple_amplihack_hooks(self, tmp_project_root, tmp_home): - """Detect multiple amplihack hooks.""" - settings = { - "hooks": { - "Stop": [{"hooks": [{"command": "amplihack/hooks/stop.py"}]}], - "SessionStart": [ - {"hooks": [{"command": ".claude/tools/amplihack/hooks/session_start.py"}]} - ], - } - } - - with patch("pathlib.Path.home", return_value=tmp_home): - migrator = SettingsMigrator(tmp_project_root) - - with patch("pathlib.Path.exists", return_value=True): - with patch("builtins.open", mock_open(read_data=json.dumps(settings))): - result = migrator.detect_global_amplihack_hooks() - - assert result is True - - def test_detect_preserves_non_amplihack_hooks(self, tmp_project_root, tmp_home): - """Ensure non-amplihack hooks are not detected as amplihack hooks.""" - settings = { - "hooks": { - "Stop": [ - { - "hooks": [ - {"command": "/usr/local/bin/my_custom_hook.py"}, - {"command": "/opt/tools/another_hook.sh"}, - ] - } - ] - } - } - - with patch("pathlib.Path.home", return_value=tmp_home): - migrator = SettingsMigrator(tmp_project_root) - - with patch("pathlib.Path.exists", return_value=True): - with patch("builtins.open", mock_open(read_data=json.dumps(settings))): - result = migrator.detect_global_amplihack_hooks() - - assert result is False - - def test_detect_handles_missing_global_settings(self, tmp_project_root, tmp_home): - """Handle missing global settings file.""" - with patch("pathlib.Path.home", return_value=tmp_home): - migrator = SettingsMigrator(tmp_project_root) - - with patch("pathlib.Path.exists", return_value=False): - result = migrator.detect_global_amplihack_hooks() - - assert result is False - - def test_detect_handles_missing_hooks_key(self, tmp_project_root, tmp_home): - """Handle missing 'hooks' key in settings.""" - settings = {"some_setting": "value"} - - with patch("pathlib.Path.home", return_value=tmp_home): - migrator = SettingsMigrator(tmp_project_root) - - with patch("pathlib.Path.exists", return_value=True): - with patch("builtins.open", mock_open(read_data=json.dumps(settings))): - result = migrator.detect_global_amplihack_hooks() - - assert result is False - - def test_detect_handles_empty_hooks_array(self, tmp_project_root, tmp_home): - """Handle empty hooks array.""" - settings = {"hooks": {"Stop": []}} - - with patch("pathlib.Path.home", return_value=tmp_home): - migrator = SettingsMigrator(tmp_project_root) - - with patch("pathlib.Path.exists", return_value=True): - with patch("builtins.open", mock_open(read_data=json.dumps(settings))): - result = migrator.detect_global_amplihack_hooks() - - assert result is False - - def test_detect_handles_malformed_json(self, tmp_project_root, tmp_home): - """Handle malformed JSON gracefully.""" - with patch("pathlib.Path.home", return_value=tmp_home): - migrator = SettingsMigrator(tmp_project_root) - - with patch("pathlib.Path.exists", return_value=True): - with patch("builtins.open", mock_open(read_data="not valid json {{")): - result = migrator.detect_global_amplihack_hooks() - - assert result is False - - -class TestSafeJsonUpdate: - """Test safe JSON update with atomic write.""" - - def test_safe_json_update_creates_temp_file(self, tmp_project_root, tmp_home): - """Ensure temp file is created during update.""" - with patch("pathlib.Path.home", return_value=tmp_home): - migrator = SettingsMigrator(tmp_project_root) - - target_file = tmp_home / ".claude" / "test.json" - target_file.parent.mkdir(parents=True, exist_ok=True) - - data = {"test": "value"} - - with patch("builtins.open", mock_open()) as mock_file: - with patch("os.replace"): - result = migrator.safe_json_update(target_file, data) - - assert result is True - # Verify temp file path - temp_file_path = target_file.parent / f".{target_file.name}.tmp" - mock_file.assert_called_once_with(temp_file_path, "w") - - def test_safe_json_update_atomic_write(self, tmp_project_root, tmp_home): - """Verify atomic write using os.replace.""" - with patch("pathlib.Path.home", return_value=tmp_home): - migrator = SettingsMigrator(tmp_project_root) - - target_file = tmp_home / ".claude" / "test.json" - target_file.parent.mkdir(parents=True, exist_ok=True) - - data = {"test": "value"} - - with patch("builtins.open", mock_open()): - with patch("os.replace") as mock_replace: - result = migrator.safe_json_update(target_file, data) - - assert result is True - temp_file_path = target_file.parent / f".{target_file.name}.tmp" - mock_replace.assert_called_once_with(temp_file_path, target_file) - - def test_safe_json_update_handles_write_failure(self, tmp_project_root, tmp_home): - """Handle write failure gracefully.""" - with patch("pathlib.Path.home", return_value=tmp_home): - migrator = SettingsMigrator(tmp_project_root) - - target_file = tmp_home / ".claude" / "test.json" - target_file.parent.mkdir(parents=True, exist_ok=True) - - data = {"test": "value"} - - with patch("builtins.open", side_effect=OSError("Write failed")): - result = migrator.safe_json_update(target_file, data) - - assert result is False - - def test_safe_json_update_cleans_up_temp_on_failure(self, tmp_project_root, tmp_home): - """Ensure temp file is cleaned up on failure.""" - with patch("pathlib.Path.home", return_value=tmp_home): - migrator = SettingsMigrator(tmp_project_root) - - target_file = tmp_home / ".claude" / "test.json" - target_file.parent.mkdir(parents=True, exist_ok=True) - - # Create temp file - temp_file_path = target_file.parent / f".{target_file.name}.tmp" - temp_file_path.write_text("temp content") - - data = {"test": "value"} - - with patch("builtins.open", mock_open()): - with patch("os.replace", side_effect=OSError("Replace failed")): - with patch("pathlib.Path.unlink"): - result = migrator.safe_json_update(target_file, data) - - assert result is False - - -class TestBackupCreation: - """Test backup creation before modification.""" - - def test_create_backup_with_timestamp(self, tmp_project_root, tmp_home): - """Create backup with timestamp.""" - with patch("pathlib.Path.home", return_value=tmp_home): - migrator = SettingsMigrator(tmp_project_root) - - # Create global settings - global_settings = tmp_home / ".claude" / "settings.json" - global_settings.parent.mkdir(parents=True) - global_settings.write_text('{"test": "data"}') - - with patch("time.time", return_value=1234567890): - backup_path = migrator._create_backup() - - assert backup_path is not None - assert backup_path.name == "settings.json.backup.1234567890" - assert backup_path.exists() - - def test_create_backup_handles_missing_file(self, tmp_project_root, tmp_home): - """Handle missing global settings gracefully.""" - with patch("pathlib.Path.home", return_value=tmp_home): - migrator = SettingsMigrator(tmp_project_root) - - backup_path = migrator._create_backup() - - assert backup_path is None - - def test_create_backup_handles_copy_failure(self, tmp_project_root, tmp_home): - """Handle backup copy failure gracefully.""" - with patch("pathlib.Path.home", return_value=tmp_home): - migrator = SettingsMigrator(tmp_project_root) - - # Create global settings - global_settings = tmp_home / ".claude" / "settings.json" - global_settings.parent.mkdir(parents=True) - global_settings.write_text('{"test": "data"}') - - with patch("shutil.copy2", side_effect=OSError("Copy failed")): - backup_path = migrator._create_backup() - - assert backup_path is None - - -# ============================================================================ -# INTEGRATION TESTS (30% - Real filesystem, multiple components) -# ============================================================================ - - -class TestMigrationWorkflow: - """Test full migration workflow with real filesystem.""" - - def test_migrate_removes_global_adds_local_verification( - self, tmp_project_root, tmp_home, global_settings_with_amplihack_stop_hook - ): - """Full migration: remove global, ensure local exists.""" - # Create project settings - project_settings = tmp_project_root / ".claude" / "settings.json" - project_settings.write_text('{"hooks": {}}') - - with patch("pathlib.Path.home", return_value=tmp_home): - migrator = SettingsMigrator(tmp_project_root) - - result = migrator.migrate_to_project_local() - - # Verify results - assert result.success is True - assert result.global_hooks_found is True - assert result.global_hooks_removed is True - assert result.project_hook_ensured is True - assert result.backup_created is not None - - # Verify global settings no longer has amplihack hooks - with open(global_settings_with_amplihack_stop_hook) as f: - global_settings = json.load(f) - assert "Stop" not in global_settings.get("hooks", {}) - - def test_migration_idempotency( - self, tmp_project_root, tmp_home, global_settings_with_amplihack_stop_hook - ): - """Migration is idempotent - running twice is safe.""" - project_settings = tmp_project_root / ".claude" / "settings.json" - project_settings.write_text('{"hooks": {}}') - - with patch("pathlib.Path.home", return_value=tmp_home): - migrator = SettingsMigrator(tmp_project_root) - - # First migration - result1 = migrator.migrate_to_project_local() - assert result1.success is True - assert result1.global_hooks_removed is True - - # Second migration (should be no-op) - result2 = migrator.migrate_to_project_local() - assert result2.success is True - assert result2.global_hooks_found is False - assert result2.global_hooks_removed is False - - def test_migration_preserves_other_hooks( - self, tmp_project_root, tmp_home, global_settings_with_mixed_hooks - ): - """Migration preserves non-amplihack hooks.""" - project_settings = tmp_project_root / ".claude" / "settings.json" - project_settings.write_text('{"hooks": {}}') - - with patch("pathlib.Path.home", return_value=tmp_home): - migrator = SettingsMigrator(tmp_project_root) - - result = migrator.migrate_to_project_local() - - assert result.success is True - - # Verify non-amplihack hook preserved - with open(global_settings_with_mixed_hooks) as f: - global_settings = json.load(f) - stop_hooks = global_settings.get("hooks", {}).get("Stop", []) - assert len(stop_hooks) == 1 - assert len(stop_hooks[0]["hooks"]) == 1 - assert "custom_hook.py" in stop_hooks[0]["hooks"][0]["command"] - - def test_migration_multiple_hook_types( - self, tmp_project_root, tmp_home, global_settings_with_multiple_amplihack_hooks - ): - """Migration handles multiple hook types.""" - project_settings = tmp_project_root / ".claude" / "settings.json" - project_settings.write_text('{"hooks": {}}') - - with patch("pathlib.Path.home", return_value=tmp_home): - migrator = SettingsMigrator(tmp_project_root) - - result = migrator.migrate_to_project_local() - - assert result.success is True - - # Verify all amplihack hooks removed - with open(global_settings_with_multiple_amplihack_hooks) as f: - global_settings = json.load(f) - assert "Stop" not in global_settings.get("hooks", {}) - assert "SessionStart" not in global_settings.get("hooks", {}) - - -class TestBackupAndRecovery: - """Test backup creation and recovery scenarios.""" - - def test_backup_created_before_modification( - self, tmp_project_root, tmp_home, global_settings_with_amplihack_stop_hook - ): - """Backup is created before any modifications.""" - project_settings = tmp_project_root / ".claude" / "settings.json" - project_settings.write_text('{"hooks": {}}') - - with patch("pathlib.Path.home", return_value=tmp_home): - migrator = SettingsMigrator(tmp_project_root) - - # Record original content - original_content = global_settings_with_amplihack_stop_hook.read_text() - - result = migrator.migrate_to_project_local() - - assert result.success is True - assert result.backup_created is not None - - # Verify backup contains original content - backup_content = result.backup_created.read_text() - assert backup_content == original_content - - def test_no_backup_if_no_global_settings(self, tmp_project_root, tmp_home): - """No backup created if global settings don't exist.""" - with patch("pathlib.Path.home", return_value=tmp_home): - migrator = SettingsMigrator(tmp_project_root) - - result = migrator.migrate_to_project_local() - - assert result.success is True - assert result.backup_created is None - - -class TestProjectRootDetection: - """Test project root auto-detection.""" - - def test_detect_project_root_from_hooks_directory(self, tmp_project_root): - """Detect project root from hooks directory.""" - # Simulate being in hooks directory - hooks_dir = tmp_project_root / ".claude" / "tools" / "amplihack" / "hooks" - hooks_dir.mkdir(parents=True) - - with patch("pathlib.Path.cwd", return_value=hooks_dir): - migrator = SettingsMigrator() - - # Should find project root by traversing up - assert (migrator.project_root / ".claude").exists() - - def test_detect_project_root_fails_gracefully(self, tmp_path): - """Fail gracefully if no .claude marker found.""" - # Create directory without .claude marker - no_marker_dir = tmp_path / "no_marker" - no_marker_dir.mkdir() - - # Mock _detect_project_root to raise ValueError - with patch.object( - SettingsMigrator, - "_detect_project_root", - side_effect=ValueError("Could not find project root with .claude marker"), - ): - with pytest.raises(ValueError, match="Could not find project root"): - SettingsMigrator() - - -# ============================================================================ -# E2E TESTS (10% - Complete user scenarios) -# ============================================================================ - - -class TestEndToEndScenarios: - """Test complete user scenarios from start to finish.""" - - def test_user_scenario_first_time_migration(self, tmp_project_root, tmp_home, capsys): - """Complete scenario: User's first migration.""" - # Setup: User has global hooks, no project settings - global_settings = tmp_home / ".claude" / "settings.json" - global_settings.parent.mkdir(parents=True) - global_settings.write_text( - json.dumps( - { - "hooks": { - "Stop": [ - { - "hooks": [ - { - "type": "command", - "command": "amplihack/hooks/stop.py", - "timeout": 30000, - } - ] - } - ] - } - } - ) - ) - - # Create project settings - project_settings = tmp_project_root / ".claude" / "settings.json" - project_settings.write_text('{"hooks": {}}') - - with patch("pathlib.Path.home", return_value=tmp_home): - # Run migration via convenience function - result = migrate_global_hooks(tmp_project_root) - - # User expectations - assert result.success is True - assert result.global_hooks_found is True - assert result.global_hooks_removed is True - assert result.backup_created is not None - assert result.error is None - - # Verify user-visible outcome - captured = capsys.readouterr() - assert "[settings_migrator]" in captured.err - - def test_user_scenario_no_migration_needed( - self, tmp_project_root, tmp_home, global_settings_no_hooks, capsys - ): - """Scenario: User has no amplihack hooks.""" - project_settings = tmp_project_root / ".claude" / "settings.json" - project_settings.write_text('{"hooks": {}}') - - with patch("pathlib.Path.home", return_value=tmp_home): - result = migrate_global_hooks(tmp_project_root) - - # User expectations - assert result.success is True - assert result.global_hooks_found is False - assert result.global_hooks_removed is False - assert result.backup_created is None - - # Verify user-visible outcome - captured = capsys.readouterr() - assert "No global amplihack hooks found" in captured.err - - def test_user_scenario_migration_failure_recovery(self, tmp_project_root, tmp_home, capsys): - """Scenario: Migration fails, user gets clear error.""" - global_settings = tmp_home / ".claude" / "settings.json" - global_settings.parent.mkdir(parents=True) - global_settings.write_text("malformed json {{") - - with patch("pathlib.Path.home", return_value=tmp_home): - result = migrate_global_hooks(tmp_project_root) - - # User expectations - assert result.success is True # Detection failure is non-fatal - assert result.global_hooks_found is False - - def test_command_line_execution(self, tmp_project_root, tmp_home, capsys): - """Test command-line execution (if __name__ == '__main__').""" - # Setup - global_settings = tmp_home / ".claude" / "settings.json" - global_settings.parent.mkdir(parents=True) - global_settings.write_text( - json.dumps({"hooks": {"Stop": [{"hooks": [{"command": "amplihack/hooks/stop.py"}]}]}}) - ) - - project_settings = tmp_project_root / ".claude" / "settings.json" - project_settings.write_text('{"hooks": {}}') - - with patch("pathlib.Path.home", return_value=tmp_home): - result = migrate_global_hooks(tmp_project_root) - - # Verify command-line output format - assert result.success is True - assert result.global_hooks_found is True - assert result.global_hooks_removed is True - - -# ============================================================================ -# EDGE CASES AND ERROR CONDITIONS -# ============================================================================ - - -class TestEdgeCases: - """Test edge cases and unusual conditions.""" - - def test_empty_hooks_object(self, tmp_project_root, tmp_home): - """Handle empty hooks object.""" - settings = {"hooks": {}} - global_settings = tmp_home / ".claude" / "settings.json" - global_settings.parent.mkdir(parents=True) - global_settings.write_text(json.dumps(settings)) - - with patch("pathlib.Path.home", return_value=tmp_home): - migrator = SettingsMigrator(tmp_project_root) - result = migrator.detect_global_amplihack_hooks() - - assert result is False - - def test_hook_config_without_hooks_array(self, tmp_project_root, tmp_home): - """Handle hook config without 'hooks' array.""" - settings = {"hooks": {"Stop": [{"type": "config"}]}} - global_settings = tmp_home / ".claude" / "settings.json" - global_settings.parent.mkdir(parents=True) - global_settings.write_text(json.dumps(settings)) - - with patch("pathlib.Path.home", return_value=tmp_home): - migrator = SettingsMigrator(tmp_project_root) - result = migrator.detect_global_amplihack_hooks() - - assert result is False - - def test_hook_without_command_field(self, tmp_project_root, tmp_home): - """Handle hook without 'command' field.""" - settings = {"hooks": {"Stop": [{"hooks": [{"type": "command", "timeout": 30000}]}]}} - global_settings = tmp_home / ".claude" / "settings.json" - global_settings.parent.mkdir(parents=True) - global_settings.write_text(json.dumps(settings)) - - with patch("pathlib.Path.home", return_value=tmp_home): - migrator = SettingsMigrator(tmp_project_root) - result = migrator.detect_global_amplihack_hooks() - - assert result is False - - def test_concurrent_modification_resilience(self, tmp_project_root, tmp_home): - """Test resilience to concurrent modifications.""" - global_settings = tmp_home / ".claude" / "settings.json" - global_settings.parent.mkdir(parents=True) - global_settings.write_text( - json.dumps({"hooks": {"Stop": [{"hooks": [{"command": "amplihack/hooks/stop.py"}]}]}}) - ) - - project_settings = tmp_project_root / ".claude" / "settings.json" - project_settings.write_text('{"hooks": {}}') - - with patch("pathlib.Path.home", return_value=tmp_home): - migrator = SettingsMigrator(tmp_project_root) - - # Atomic write should handle concurrent modifications - result = migrator.migrate_to_project_local() - - assert result.success is True - - -# ============================================================================ -# PATTERN TESTS (All hook patterns) -# ============================================================================ - - -class TestAllHookPatterns: - """Test detection of all amplihack hook patterns.""" - - @pytest.mark.parametrize( - "hook_pattern", - [ - "amplihack/hooks/stop.py", - ".claude/tools/amplihack/hooks/stop.py", - "amplihack/hooks/session_start.py", - ".claude/tools/amplihack/hooks/session_start.py", - "amplihack/hooks/pre_tool_use.py", - ".claude/tools/amplihack/hooks/pre_tool_use.py", - "amplihack/hooks/post_tool_use.py", - ".claude/tools/amplihack/hooks/post_tool_use.py", - "amplihack/hooks/pre_compact.py", - ".claude/tools/amplihack/hooks/pre_compact.py", - ], - ) - def test_detect_all_hook_patterns(self, hook_pattern, tmp_project_root, tmp_home): - """Test detection of each hook pattern.""" - settings = {"hooks": {"Stop": [{"hooks": [{"command": hook_pattern}]}]}} - - global_settings = tmp_home / ".claude" / "settings.json" - global_settings.parent.mkdir(parents=True) - global_settings.write_text(json.dumps(settings)) - - with patch("pathlib.Path.home", return_value=tmp_home): - migrator = SettingsMigrator(tmp_project_root) - result = migrator.detect_global_amplihack_hooks() - - assert result is True, f"Failed to detect pattern: {hook_pattern}" - - -# ============================================================================ -# TEST SUMMARY -# ============================================================================ - -""" -Test Coverage Summary: - -UNIT TESTS (60%): -- SettingsMigrator initialization (auto-detect and explicit) -- Hook detection with various patterns -- JSON safety and atomic writes -- Backup creation -- Error handling for missing files, malformed JSON -- Edge cases (empty arrays, missing keys) - -INTEGRATION TESTS (30%): -- Full migration workflow with real filesystem -- Idempotency verification -- Preservation of non-amplihack hooks -- Multiple hook types handling -- Backup and recovery scenarios - -E2E TESTS (10%): -- First-time user migration -- No migration needed scenario -- Migration failure recovery -- Command-line execution - -Total: 45+ tests covering all public API methods and edge cases -Execution time: <5 seconds (fast, well-mocked) -Philosophy compliance: Zero-BS, every test works -""" diff --git a/amplifier-bundle/tools/amplihack/hooks/tests/test_shutdown_context.py b/amplifier-bundle/tools/amplihack/hooks/tests/test_shutdown_context.py deleted file mode 100644 index 43330b33a..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/tests/test_shutdown_context.py +++ /dev/null @@ -1,401 +0,0 @@ -#!/usr/bin/env python3 -""" -TDD Tests for Shutdown Context Module (UNIT TESTS - 60%) - -Tests the shutdown_context module which provides centralized shutdown detection -for all hooks. This module detects various shutdown contexts to prevent stdin -hangs during cleanup. - -Testing Philosophy: -- Ruthlessly Simple: Each test has single responsibility -- Zero-BS: All tests work, no stubs -- Fail-Open: Shutdown detection errs on side of safety - -Test Coverage: -- Environment variable detection -- Stdin closed detection -- Stdin detached detection -- Atexit context detection -- Mark/clear shutdown functions -- No false positives during normal operation -""" - -import os -import sys -from pathlib import Path -from unittest.mock import patch - -import pytest - -# Add hooks directory to path -sys.path.insert(0, str(Path(__file__).parent.parent)) - -# These imports will fail until implementation exists - TDD approach -try: - from shutdown_context import ( - clear_shutdown, - is_shutdown_in_progress, - mark_shutdown, - ) - - IMPLEMENTATION_EXISTS = True -except ImportError: - IMPLEMENTATION_EXISTS = False - -# Skip all tests if implementation doesn't exist yet -pytestmark = pytest.mark.skipif( - not IMPLEMENTATION_EXISTS, reason="Implementation not yet created (TDD)" -) - - -# ============================================================================= -# UNIT TESTS - Environment Variable Detection -# ============================================================================= - - -class TestEnvironmentVariableDetection: - """Test shutdown detection via AMPLIHACK_SHUTDOWN_IN_PROGRESS env var.""" - - def test_detects_shutdown_when_env_var_set_to_one(self): - """Should return True when AMPLIHACK_SHUTDOWN_IN_PROGRESS=1""" - # ARRANGE - os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - - try: - # ACT - result = is_shutdown_in_progress() - - # ASSERT - assert result is True, "Should detect shutdown when env var is '1'" - finally: - # CLEANUP - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - @patch("sys.stdin") - def test_no_shutdown_when_env_var_not_set(self, mock_stdin): - """Should return False when AMPLIHACK_SHUTDOWN_IN_PROGRESS not set""" - # ARRANGE - ensure env var is not set - if "AMPLIHACK_SHUTDOWN_IN_PROGRESS" in os.environ: - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - # Mock stdin as healthy - mock_stdin.closed = False - mock_stdin.fileno.return_value = 0 - - # ACT - result = is_shutdown_in_progress() - - # ASSERT - assert result is False, "Should not detect shutdown when env var absent" - - @patch("sys.stdin") - def test_no_shutdown_when_env_var_set_to_zero(self, mock_stdin): - """Should return False when AMPLIHACK_SHUTDOWN_IN_PROGRESS=0""" - # ARRANGE - os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "0" - - # Mock stdin as healthy - mock_stdin.closed = False - mock_stdin.fileno.return_value = 0 - - try: - # ACT - result = is_shutdown_in_progress() - - # ASSERT - assert result is False, "Should not detect shutdown when env var is '0'" - finally: - # CLEANUP - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - @patch("sys.stdin") - def test_no_shutdown_when_env_var_empty_string(self, mock_stdin): - """Should return False when AMPLIHACK_SHUTDOWN_IN_PROGRESS=''""" - # ARRANGE - os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "" - - # Mock stdin as healthy - mock_stdin.closed = False - mock_stdin.fileno.return_value = 0 - - try: - # ACT - result = is_shutdown_in_progress() - - # ASSERT - assert result is False, "Should not detect shutdown when env var is empty" - finally: - # CLEANUP - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - -# ============================================================================= -# UNIT TESTS - Stdin Closed Detection -# ============================================================================= - - -class TestStdinClosedDetection: - """Test shutdown detection when stdin is closed.""" - - @patch("sys.stdin") - def test_detects_shutdown_when_stdin_closed(self, mock_stdin): - """Should detect shutdown when stdin.closed is True""" - # ARRANGE - mock_stdin.closed = True - - # ACT - result = is_shutdown_in_progress() - - # ASSERT - assert result is True, "Should detect shutdown when stdin is closed" - - @patch("sys.stdin") - def test_no_shutdown_when_stdin_open(self, mock_stdin): - """Should not detect shutdown when stdin.closed is False""" - # ARRANGE - mock_stdin.closed = False - - # ACT - result = is_shutdown_in_progress() - - # ASSERT - assert result is False, "Should not detect shutdown when stdin is open" - - -# ============================================================================= -# UNIT TESTS - Stdin Detached Detection -# ============================================================================= - - -class TestStdinDetachedDetection: - """Test shutdown detection when stdin is detached (no fileno).""" - - @patch("sys.stdin") - def test_detects_shutdown_when_stdin_has_no_fileno(self, mock_stdin): - """Should detect shutdown when stdin.fileno() raises ValueError""" - # ARRANGE - mock_stdin.closed = False - mock_stdin.fileno.side_effect = ValueError("I/O operation on closed file") - - # ACT - result = is_shutdown_in_progress() - - # ASSERT - assert result is True, "Should detect shutdown when stdin has no fileno" - - @patch("sys.stdin") - def test_does_not_detect_shutdown_for_stringio_mock(self, mock_stdin): - """Should NOT detect shutdown for StringIO/mock (UnsupportedOperation is normal)""" - # ARRANGE - import io - - mock_stdin.closed = False - mock_stdin.fileno.side_effect = io.UnsupportedOperation("fileno not available") - - # ACT - result = is_shutdown_in_progress() - - # ASSERT - assert result is False, "StringIO/mock with UnsupportedOperation is not shutdown" - - @patch("sys.stdin") - def test_no_shutdown_when_stdin_has_valid_fileno(self, mock_stdin): - """Should not detect shutdown when stdin.fileno() returns valid fd""" - # ARRANGE - mock_stdin.closed = False - mock_stdin.fileno.return_value = 0 # Valid file descriptor - - # ACT - result = is_shutdown_in_progress() - - # ASSERT - assert result is False, "Should not detect shutdown with valid fileno" - - -# ============================================================================= -# UNIT TESTS - Atexit Context Detection -# ============================================================================= - - -class TestAtexitContextDetection: - """Test shutdown detection during atexit handler execution.""" - - def test_detects_shutdown_during_atexit_execution(self): - """Should detect shutdown when called from atexit handler. - - Note: This is difficult to test directly since we can't easily - simulate being in an atexit handler. This test documents the - expected behavior for manual verification. - """ - # This test serves as documentation of expected behavior - # In real usage, is_shutdown_in_progress() would detect: - # - sys._getframe() inspection showing atexit module in stack - # - Or simpler: rely on env var set by signal handler - - -# ============================================================================= -# UNIT TESTS - Mark and Clear Shutdown Functions -# ============================================================================= - - -class TestMarkShutdown: - """Test mark_shutdown() function for programmatic shutdown marking.""" - - def test_mark_shutdown_sets_env_var(self): - """Should set AMPLIHACK_SHUTDOWN_IN_PROGRESS=1""" - # ARRANGE - ensure clean state - if "AMPLIHACK_SHUTDOWN_IN_PROGRESS" in os.environ: - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - try: - # ACT - mark_shutdown() - - # ASSERT - assert os.environ.get("AMPLIHACK_SHUTDOWN_IN_PROGRESS") == "1" - assert is_shutdown_in_progress() is True - finally: - # CLEANUP - if "AMPLIHACK_SHUTDOWN_IN_PROGRESS" in os.environ: - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - def test_mark_shutdown_is_idempotent(self): - """Should safely handle being called multiple times""" - # ARRANGE - ensure clean state - if "AMPLIHACK_SHUTDOWN_IN_PROGRESS" in os.environ: - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - try: - # ACT - mark_shutdown() - mark_shutdown() # Call again - mark_shutdown() # And again - - # ASSERT - assert os.environ.get("AMPLIHACK_SHUTDOWN_IN_PROGRESS") == "1" - assert is_shutdown_in_progress() is True - finally: - # CLEANUP - if "AMPLIHACK_SHUTDOWN_IN_PROGRESS" in os.environ: - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - -class TestClearShutdown: - """Test clear_shutdown() function for testing cleanup.""" - - @patch("sys.stdin") - def test_clear_shutdown_removes_env_var(self, mock_stdin): - """Should remove AMPLIHACK_SHUTDOWN_IN_PROGRESS""" - # ARRANGE - os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - - # Mock stdin as healthy - mock_stdin.closed = False - mock_stdin.fileno.return_value = 0 - - # ACT - clear_shutdown() - - # ASSERT - assert "AMPLIHACK_SHUTDOWN_IN_PROGRESS" not in os.environ - assert is_shutdown_in_progress() is False - - @patch("sys.stdin") - def test_clear_shutdown_is_idempotent(self, mock_stdin): - """Should safely handle being called when env var not set""" - # ARRANGE - ensure env var not set - if "AMPLIHACK_SHUTDOWN_IN_PROGRESS" in os.environ: - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - # Mock stdin as healthy - mock_stdin.closed = False - mock_stdin.fileno.return_value = 0 - - # ACT - should not raise error - clear_shutdown() - clear_shutdown() # Call again - - # ASSERT - assert is_shutdown_in_progress() is False - - -# ============================================================================= -# UNIT TESTS - No False Positives -# ============================================================================= - - -class TestNoFalsePositives: - """Test that shutdown detection does not trigger false positives.""" - - @patch("sys.stdin") - def test_normal_operation_with_open_stdin(self, mock_stdin): - """Should not detect shutdown during normal operation""" - # ARRANGE - normal stdin state - mock_stdin.closed = False - mock_stdin.fileno.return_value = 0 - - # Ensure env var not set - if "AMPLIHACK_SHUTDOWN_IN_PROGRESS" in os.environ: - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - # ACT - result = is_shutdown_in_progress() - - # ASSERT - assert result is False, "Should not falsely detect shutdown" - - @patch("sys.stdin") - def test_no_false_positive_with_various_env_vars(self, mock_stdin): - """Should only respond to exact AMPLIHACK_SHUTDOWN_IN_PROGRESS=1""" - # ARRANGE - set various other env vars - os.environ["SHUTDOWN"] = "1" - os.environ["AMPLIHACK_SHUTDOWN"] = "1" - os.environ["IN_PROGRESS"] = "1" - - # Ensure target env var not set - if "AMPLIHACK_SHUTDOWN_IN_PROGRESS" in os.environ: - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - # Mock stdin as healthy - mock_stdin.closed = False - mock_stdin.fileno.return_value = 0 - - try: - # ACT - result = is_shutdown_in_progress() - - # ASSERT - assert result is False, "Should not respond to similar env vars" - finally: - # CLEANUP - for key in ["SHUTDOWN", "AMPLIHACK_SHUTDOWN", "IN_PROGRESS"]: - if key in os.environ: - del os.environ[key] - - -# ============================================================================= -# TEST FIXTURES -# ============================================================================= - - -@pytest.fixture(autouse=True) -def cleanup_env_var(): - """Ensure AMPLIHACK_SHUTDOWN_IN_PROGRESS is cleaned up after each test. - - This fixture runs automatically for every test to prevent test pollution - from env var state leaking between tests. - """ - yield - # Cleanup after test - if "AMPLIHACK_SHUTDOWN_IN_PROGRESS" in os.environ: - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - -# ============================================================================= -# TEST CONFIGURATION -# ============================================================================= - - -def pytest_configure(config): - """Register custom pytest markers""" - config.addinivalue_line("markers", "unit: marks tests as unit tests") diff --git a/amplifier-bundle/tools/amplihack/hooks/tests/test_stop_hook_integration.py b/amplifier-bundle/tools/amplihack/hooks/tests/test_stop_hook_integration.py deleted file mode 100644 index 2f2df9dc3..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/tests/test_stop_hook_integration.py +++ /dev/null @@ -1,397 +0,0 @@ -#!/usr/bin/env python3 -""" -TDD Tests for Stop Hook Integration (INTEGRATION TESTS - 30%) - -Tests the complete stop hook flow with shutdown context, verifying that -hooks exit quickly during cleanup while functioning normally otherwise. - -Testing Philosophy: -- Ruthlessly Simple: Focus on critical integration paths -- Zero-BS: All tests work, no stubs -- Fail-Open: Shutdown always allows clean exit - -Test Coverage: -- Stop hook exits quickly during cleanup -- Stop hook works normally without cleanup -- Multiple hooks during cleanup -- Signal handling during cleanup -""" - -import os -import signal -import subprocess -import sys -import time -from pathlib import Path - -import pytest - -# Add hooks directory to path -sys.path.insert(0, str(Path(__file__).parent.parent)) - - -# ============================================================================= -# TEST FIXTURES -# ============================================================================= - - -@pytest.fixture -def stop_hook_script(tmp_path): - """Create a minimal stop hook script for testing. - - This simulates the actual stop hook behavior with shutdown detection. - """ - script_path = tmp_path / "stop_hook_test.py" - script_content = """#!/usr/bin/env python3 -import json -import os -import sys - -# Simulate stop hook behavior -if os.environ.get("AMPLIHACK_SHUTDOWN_IN_PROGRESS") == "1": - # During shutdown: return immediately without reading stdin - json.dump({}, sys.stdout) - sys.stdout.write("\\n") - sys.exit(0) - -# Normal operation: read stdin and process -input_data = sys.stdin.read() -if input_data.strip(): - data = json.loads(input_data) - # Stop hook decision logic here - json.dump({}, sys.stdout) -else: - json.dump({}, sys.stdout) - -sys.stdout.write("\\n") -""" - script_path.write_text(script_content) - script_path.chmod(0o755) - return script_path - - -# ============================================================================= -# INTEGRATION TESTS - Stop Hook Cleanup Flow -# ============================================================================= - - -class TestStopHookDuringCleanup: - """Integration tests for stop hook during cleanup/shutdown.""" - - def test_stop_hook_exits_within_one_second_during_shutdown(self, stop_hook_script): - """Stop hook should exit in <1s when AMPLIHACK_SHUTDOWN_IN_PROGRESS=1""" - # ARRANGE - env = os.environ.copy() - env["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - - # ACT - start_time = time.time() - result = subprocess.run( - [sys.executable, str(stop_hook_script)], - input='{"conversation": []}', - capture_output=True, - text=True, - timeout=2, # Fail if takes longer than 2s - env=env, - ) - elapsed = time.time() - start_time - - # ASSERT - assert result.returncode == 0, f"Hook should exit cleanly: {result.stderr}" - assert elapsed < 1.0, f"Hook took {elapsed:.2f}s, should be <1.0s" - assert result.stdout.strip() == "{}", "Should return empty response" - - def test_stop_hook_does_not_read_stdin_during_shutdown(self, stop_hook_script): - """Stop hook should not wait for stdin during shutdown""" - # ARRANGE - env = os.environ.copy() - env["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - - # ACT - Don't provide stdin input - result = subprocess.run( - [sys.executable, str(stop_hook_script)], - stdin=subprocess.DEVNULL, # No input provided - capture_output=True, - text=True, - timeout=2, - env=env, - ) - - # ASSERT - assert result.returncode == 0, "Should exit even without stdin during shutdown" - assert result.stdout.strip() == "{}", "Should return empty response" - - def test_stop_hook_multiple_rapid_calls_during_shutdown(self, stop_hook_script): - """Multiple stop hook calls during shutdown should all exit quickly""" - # ARRANGE - env = os.environ.copy() - env["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - - # ACT - Call hook 5 times rapidly - start_time = time.time() - for _ in range(5): - result = subprocess.run( - [sys.executable, str(stop_hook_script)], - input='{"conversation": []}', - capture_output=True, - text=True, - timeout=2, - env=env, - ) - assert result.returncode == 0 - - elapsed = time.time() - start_time - - # ASSERT - assert elapsed < 2.0, f"5 hook calls took {elapsed:.2f}s, should be <2.0s total" - - -# ============================================================================= -# INTEGRATION TESTS - Stop Hook Normal Operation -# ============================================================================= - - -class TestStopHookNormalOperation: - """Integration tests for stop hook during normal operation.""" - - def test_stop_hook_processes_input_normally(self, stop_hook_script): - """Stop hook should process input during normal operation""" - # ARRANGE - ensure NOT shutting down - env = os.environ.copy() - if "AMPLIHACK_SHUTDOWN_IN_PROGRESS" in env: - del env["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - input_data = json.dumps({"conversation": [{"role": "user", "content": "Test"}]}) - - # ACT - result = subprocess.run( - [sys.executable, str(stop_hook_script)], - input=input_data, - capture_output=True, - text=True, - timeout=5, - env=env, - ) - - # ASSERT - assert result.returncode == 0 - output = json.loads(result.stdout.strip()) - assert isinstance(output, dict), "Should return valid JSON response" - - def test_stop_hook_waits_for_stdin_normally(self, stop_hook_script): - """Stop hook should wait for stdin during normal operation""" - # ARRANGE - ensure NOT shutting down - env = os.environ.copy() - if "AMPLIHACK_SHUTDOWN_IN_PROGRESS" in env: - del env["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - # ACT - Provide stdin with slight delay - proc = subprocess.Popen( - [sys.executable, str(stop_hook_script)], - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - env=env, - ) - - time.sleep(0.1) # Simulate slight delay - stdout, stderr = proc.communicate(input='{"conversation": []}', timeout=5) - - # ASSERT - assert proc.returncode == 0, f"Should complete successfully: {stderr}" - assert stdout.strip() == "{}", "Should return valid response" - - -# ============================================================================= -# INTEGRATION TESTS - Multiple Hooks During Cleanup -# ============================================================================= - - -class TestMultipleHooksDuringCleanup: - """Integration tests for multiple hooks executing during cleanup.""" - - def test_multiple_hooks_all_exit_quickly_during_shutdown(self, stop_hook_script, tmp_path): - """All hooks should exit quickly when shutdown is in progress""" - # ARRANGE - Create 3 hook scripts - hooks = [stop_hook_script] - for i in range(2): - hook_path = tmp_path / f"hook_{i}.py" - hook_path.write_text(stop_hook_script.read_text()) - hook_path.chmod(0o755) - hooks.append(hook_path) - - env = os.environ.copy() - env["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - - # ACT - Run all hooks concurrently - import concurrent.futures - - start_time = time.time() - - def run_hook(hook_path): - return subprocess.run( - [sys.executable, str(hook_path)], - input='{"conversation": []}', - capture_output=True, - text=True, - timeout=2, - env=env, - ) - - with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor: - results = list(executor.map(run_hook, hooks)) - - elapsed = time.time() - start_time - - # ASSERT - for result in results: - assert result.returncode == 0, "All hooks should exit cleanly" - - assert elapsed < 1.5, f"3 hooks took {elapsed:.2f}s, should be <1.5s (concurrent)" - - -# ============================================================================= -# INTEGRATION TESTS - Signal Handling During Cleanup -# ============================================================================= - - -class TestSignalHandlingDuringCleanup: - """Integration tests for signal handling during cleanup.""" - - @pytest.mark.skipif(sys.platform == "win32", reason="SIGTERM not available on Windows") - def test_hook_exits_cleanly_on_sigterm_during_shutdown(self, stop_hook_script): - """Hook should exit cleanly when receiving SIGTERM during shutdown""" - # ARRANGE - env = os.environ.copy() - env["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - - # ACT - Start hook process - proc = subprocess.Popen( - [sys.executable, str(stop_hook_script)], - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - env=env, - ) - - time.sleep(0.1) # Let it start - - # Send SIGTERM - proc.send_signal(signal.SIGTERM) - - try: - stdout, stderr = proc.communicate(timeout=1) - returncode = proc.returncode - except subprocess.TimeoutExpired: - proc.kill() - pytest.fail("Hook did not exit within 1s after SIGTERM") - - # ASSERT - assert returncode in ( - 0, - -signal.SIGTERM, - ), "Should exit cleanly on SIGTERM" - - @pytest.mark.skipif(sys.platform == "win32", reason="SIGINT not available on Windows") - def test_hook_exits_cleanly_on_sigint_during_shutdown(self, stop_hook_script): - """Hook should exit cleanly when receiving SIGINT (Ctrl-C) during shutdown""" - # ARRANGE - env = os.environ.copy() - env["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - - # ACT - proc = subprocess.Popen( - [sys.executable, str(stop_hook_script)], - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - env=env, - ) - - time.sleep(0.1) - - # Send SIGINT (Ctrl-C) - proc.send_signal(signal.SIGINT) - - try: - stdout, stderr = proc.communicate(timeout=1) - returncode = proc.returncode - except subprocess.TimeoutExpired: - proc.kill() - pytest.fail("Hook did not exit within 1s after SIGINT") - - # ASSERT - assert returncode in (0, -signal.SIGINT), "Should exit cleanly on SIGINT" - - -# ============================================================================= -# INTEGRATION TESTS - Edge Cases -# ============================================================================= - - -class TestStopHookEdgeCases: - """Integration tests for stop hook edge cases.""" - - def test_hook_handles_stdin_closed_during_shutdown(self, stop_hook_script): - """Hook should handle stdin being closed during shutdown""" - # ARRANGE - env = os.environ.copy() - env["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - - # ACT - Close stdin immediately - result = subprocess.run( - [sys.executable, str(stop_hook_script)], - stdin=subprocess.DEVNULL, - capture_output=True, - text=True, - timeout=2, - env=env, - ) - - # ASSERT - assert result.returncode == 0, "Should handle closed stdin during shutdown" - - def test_hook_handles_stdout_closed_during_shutdown(self, stop_hook_script): - """Hook should handle stdout being closed during shutdown""" - # ARRANGE - env = os.environ.copy() - env["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] = "1" - - # ACT - Close stdout (simulates pipe closure) - result = subprocess.run( - [sys.executable, str(stop_hook_script)], - input='{"conversation": []}', - stdout=subprocess.DEVNULL, # Discard output - stderr=subprocess.PIPE, - text=True, - timeout=2, - env=env, - ) - - # ASSERT - assert result.returncode == 0, "Should handle closed stdout during shutdown" - - -# ============================================================================= -# TEST CONFIGURATION -# ============================================================================= - - -@pytest.fixture(autouse=True) -def cleanup_env_var(): - """Ensure AMPLIHACK_SHUTDOWN_IN_PROGRESS is cleaned up after each test.""" - yield - if "AMPLIHACK_SHUTDOWN_IN_PROGRESS" in os.environ: - del os.environ["AMPLIHACK_SHUTDOWN_IN_PROGRESS"] - - -def pytest_configure(config): - """Register custom pytest markers""" - config.addinivalue_line("markers", "integration: marks tests as integration tests") - - -# Import json at module level for stop hook tests -import json diff --git a/amplifier-bundle/tools/amplihack/hooks/tests/verify_implementation.py b/amplifier-bundle/tools/amplihack/hooks/tests/verify_implementation.py deleted file mode 100755 index edafbec71..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/tests/verify_implementation.py +++ /dev/null @@ -1,236 +0,0 @@ -#!/usr/bin/env python3 -""" -Verification script for pre-commit installer implementation. - -This script demonstrates all implemented features and verifies they work correctly. -""" - -import os -import sys -import tempfile -from pathlib import Path - -# Add hooks directory to path -sys.path.insert(0, str(Path(__file__).parent.parent)) -from precommit_installer import PrecommitInstallerHook - - -def test_environment_variable_support(): - """Verify environment variable support.""" - print("=" * 60) - print("TEST 1: Environment Variable Support") - print("=" * 60) - - hook = PrecommitInstallerHook() - - # Test enabled (default) - original_value = os.environ.get("AMPLIHACK_AUTO_PRECOMMIT") - if "AMPLIHACK_AUTO_PRECOMMIT" in os.environ: - del os.environ["AMPLIHACK_AUTO_PRECOMMIT"] - - result = hook._is_env_disabled() - print(f"✓ Default (not set): {'disabled' if result else 'enabled'}") - assert not result, "Should be enabled by default" - - # Test disable values - for value in ["0", "false", "no", "off", "FALSE", "NO"]: - os.environ["AMPLIHACK_AUTO_PRECOMMIT"] = value - result = hook._is_env_disabled() - print(f"✓ AMPLIHACK_AUTO_PRECOMMIT={value}: {'disabled' if result else 'enabled'}") - assert result, f"Should be disabled with {value}" - - # Test other values - for value in ["1", "true", "yes", "on"]: - os.environ["AMPLIHACK_AUTO_PRECOMMIT"] = value - result = hook._is_env_disabled() - print(f"✓ AMPLIHACK_AUTO_PRECOMMIT={value}: {'disabled' if result else 'enabled'}") - assert not result, f"Should be enabled with {value}" - - # Restore original value - if original_value: - os.environ["AMPLIHACK_AUTO_PRECOMMIT"] = original_value - elif "AMPLIHACK_AUTO_PRECOMMIT" in os.environ: - del os.environ["AMPLIHACK_AUTO_PRECOMMIT"] - - print("\n✅ Environment variable support verified\n") - - -def test_precommit_availability_checking(): - """Verify pre-commit availability checking with error handling.""" - print("=" * 60) - print("TEST 2: Pre-commit Availability Checking") - print("=" * 60) - - hook = PrecommitInstallerHook() - - # This will check the actual pre-commit installation - result = hook._is_precommit_available() - - print(f"✓ Available: {result['available']}") - if result["available"]: - print(f"✓ Version: {result.get('version', 'unknown')}") - else: - print(f"✓ Error: {result.get('error', 'unknown')}") - - # Verify result structure - assert "available" in result, "Result must contain 'available' key" - if result["available"]: - assert "version" in result, "Result must contain 'version' when available" - else: - assert "error" in result, "Result must contain 'error' when not available" - - print("\n✅ Pre-commit availability checking verified\n") - - -def test_hook_installation_detection(): - """Verify hook installation detection with corruption detection.""" - print("=" * 60) - print("TEST 3: Hook Installation Detection") - print("=" * 60) - - hook = PrecommitInstallerHook() - temp_dir = tempfile.mkdtemp() - hook.project_root = Path(temp_dir) - - # Test 1: No .git directory - result = hook._are_hooks_installed() - print(f"✓ No .git directory: installed={result['installed']}") - assert not result["installed"], "Should not be installed without .git" - - # Test 2: .git exists but no hook file - hooks_dir = Path(temp_dir) / ".git" / "hooks" - hooks_dir.mkdir(parents=True, exist_ok=True) - result = hook._are_hooks_installed() - print(f"✓ No hook file: installed={result['installed']}") - assert not result["installed"], "Should not be installed without hook file" - - # Test 3: Valid pre-commit hook - hook_file = hooks_dir / "pre-commit" - hook_file.write_text( - "#!/usr/bin/env python3\n" - "# This is a pre-commit hook\n" - "import sys\n" - "from pre_commit import main\n" - "sys.exit(main())\n" - ) - result = hook._are_hooks_installed() - print(f"✓ Valid pre-commit hook: installed={result['installed']}") - assert result["installed"], "Should be installed with valid hook" - - # Test 4: Corrupted hook (not pre-commit) - hook_file.write_text("#!/bin/bash\n# Custom hook\necho 'Running custom validation'\nexit 0\n") - result = hook._are_hooks_installed() - print( - f"✓ Custom bash hook: installed={result['installed']}, corrupted={result.get('corrupted', False)}" - ) - assert not result["installed"], "Should not be installed with custom hook" - assert result.get("corrupted"), "Should detect corruption" - - # Test 5: Corrupted hook (too small) - hook_file.write_text("#!/bin/sh\n") - result = hook._are_hooks_installed() - print( - f"✓ Too small hook: installed={result['installed']}, corrupted={result.get('corrupted', False)}" - ) - assert not result["installed"], "Should not be installed with small hook" - assert result.get("corrupted"), "Should detect corruption" - - # Cleanup - import shutil - - shutil.rmtree(temp_dir, ignore_errors=True) - - print("\n✅ Hook installation detection verified\n") - - -def test_error_handling(): - """Verify comprehensive error handling.""" - print("=" * 60) - print("TEST 4: Error Handling") - print("=" * 60) - - hook = PrecommitInstallerHook() - - # Test error structure for availability check - # Note: This tests the structure, not actual errors - result = hook._is_precommit_available() - assert isinstance(result, dict), "Result must be a dictionary" - assert "available" in result, "Result must have 'available' key" - print("✓ Availability check returns proper structure") - - # Test error structure for installation check - temp_dir = tempfile.mkdtemp() - hook.project_root = Path(temp_dir) - result = hook._are_hooks_installed() - assert isinstance(result, dict), "Result must be a dictionary" - assert "installed" in result, "Result must have 'installed' key" - print("✓ Installation check returns proper structure") - - # Cleanup - import shutil - - shutil.rmtree(temp_dir, ignore_errors=True) - - print("\n✅ Error handling verified\n") - - -def test_logging_and_metrics(): - """Verify logging and metrics tracking.""" - print("=" * 60) - print("TEST 5: Logging and Metrics") - print("=" * 60) - - hook = PrecommitInstallerHook() - - # Verify log method exists and works - hook.log("Test log message") - print("✓ Logging works") - - # Verify metric saving works - hook.save_metric("test_metric", True) - print("✓ Metric saving works") - - print("\n✅ Logging and metrics verified\n") - - -def main(): - """Run all verification tests.""" - print("\n" + "=" * 60) - print("PRE-COMMIT INSTALLER IMPLEMENTATION VERIFICATION") - print("=" * 60 + "\n") - - try: - test_environment_variable_support() - test_precommit_availability_checking() - test_hook_installation_detection() - test_error_handling() - test_logging_and_metrics() - - print("=" * 60) - print("✅ ALL VERIFICATIONS PASSED") - print("=" * 60) - print("\nImplementation Summary:") - print("✓ Environment variable support (AMPLIHACK_AUTO_PRECOMMIT)") - print("✓ Enhanced error handling for pre-commit availability") - print("✓ Enhanced error handling for hook installation detection") - print("✓ Enhanced error handling for hook installation") - print("✓ Comprehensive logging with version tracking") - print("✓ Detailed metric tracking") - print("✓ Corruption detection for existing hooks") - print("✓ 34 unit/integration/E2E tests (100% pass rate)") - print() - return 0 - - except AssertionError as e: - print(f"\n❌ VERIFICATION FAILED: {e}\n") - return 1 - except Exception as e: - print(f"\n❌ UNEXPECTED ERROR: {e}\n") - import traceback - - traceback.print_exc() - return 1 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/amplifier-bundle/tools/amplihack/hooks/tests/verify_test_structure.py b/amplifier-bundle/tools/amplihack/hooks/tests/verify_test_structure.py deleted file mode 100644 index 96000c09f..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/tests/verify_test_structure.py +++ /dev/null @@ -1,158 +0,0 @@ -#!/usr/bin/env python3 -""" -Verify Test Structure for Shutdown Fix Test Suite - -This script validates the test suite structure without running the tests. -Useful for verifying TDD test files are properly structured before implementation. -""" - -import ast -from pathlib import Path - - -def count_test_functions(filepath): - """Count test functions in a Python file.""" - content = filepath.read_text() - tree = ast.parse(content) - - test_count = 0 - for node in ast.walk(tree): - if isinstance(node, ast.FunctionDef) and node.name.startswith("test_"): - test_count += 1 - - return test_count - - -def extract_test_classes(filepath): - """Extract test class names from a Python file.""" - content = filepath.read_text() - tree = ast.parse(content) - - classes = [] - for node in ast.walk(tree): - if isinstance(node, ast.ClassDef) and node.name.startswith("Test"): - classes.append(node.name) - - return classes - - -def main(): - """Verify test suite structure.""" - test_dir = Path(__file__).parent - - test_files = [ - "test_shutdown_context.py", - "test_hook_processor_shutdown.py", - "test_stop_hook_integration.py", - "test_exit_hang_e2e.py", - ] - - print("=" * 70) - print("Stop Hook Exit Hang Fix - Test Suite Verification") - print("=" * 70) - print() - - total_tests = 0 - total_classes = 0 - - for test_file in test_files: - filepath = test_dir / test_file - if not filepath.exists(): - print(f"❌ {test_file} - NOT FOUND") - continue - - try: - test_count = count_test_functions(filepath) - test_classes = extract_test_classes(filepath) - total_tests += test_count - total_classes += len(test_classes) - - print(f"✓ {test_file}") - print(f" Test Functions: {test_count}") - print(f" Test Classes: {len(test_classes)}") - if test_classes: - print(" Classes:") - for cls in test_classes[:5]: # Show first 5 - print(f" - {cls}") - if len(test_classes) > 5: - print(f" ... and {len(test_classes) - 5} more") - print() - - except Exception as e: - print(f"❌ {test_file} - ERROR: {e}") - print() - - print("=" * 70) - print(f"Total Test Functions: {total_tests}") - print(f"Total Test Classes: {total_classes}") - print("=" * 70) - print() - - # Check documentation files - doc_files = ["TEST_SHUTDOWN_FIX.md", "SHUTDOWN_FIX_TEST_SUMMARY.md"] - print("Documentation Files:") - for doc_file in doc_files: - filepath = test_dir / doc_file - if filepath.exists(): - size_kb = filepath.stat().st_size / 1024 - print(f" ✓ {doc_file} ({size_kb:.1f}KB)") - else: - print(f" ❌ {doc_file} - NOT FOUND") - print() - - # Testing pyramid validation - print("=" * 70) - print("Testing Pyramid Validation (60/30/10)") - print("=" * 70) - - unit_tests = count_test_functions(test_dir / "test_shutdown_context.py") + count_test_functions( - test_dir / "test_hook_processor_shutdown.py" - ) - integration_tests = count_test_functions(test_dir / "test_stop_hook_integration.py") - e2e_tests = count_test_functions(test_dir / "test_exit_hang_e2e.py") - - unit_pct = (unit_tests / total_tests * 100) if total_tests > 0 else 0 - integration_pct = (integration_tests / total_tests * 100) if total_tests > 0 else 0 - e2e_pct = (e2e_tests / total_tests * 100) if total_tests > 0 else 0 - - print(f"Unit Tests: {unit_tests:2d} tests ({unit_pct:.1f}%) - Target: 60%") - print(f"Integration Tests: {integration_tests:2d} tests ({integration_pct:.1f}%) - Target: 30%") - print(f"E2E Tests: {e2e_tests:2d} tests ({e2e_pct:.1f}%) - Target: 10%") - print() - - # Validation - if 50 <= unit_pct <= 70: - print("✓ Unit test distribution within target range") - else: - print("⚠ Unit test distribution outside target range (50-70%)") - - if 20 <= integration_pct <= 40: - print("✓ Integration test distribution within target range") - else: - print("⚠ Integration test distribution outside target range (20-40%)") - - if 5 <= e2e_pct <= 15: - print("✓ E2E test distribution within target range") - else: - print("⚠ E2E test distribution outside target range (5-15%)") - - print() - print("=" * 70) - print("Test Suite Structure: VERIFIED ✓") - print("=" * 70) - print() - print("Next Steps:") - print("1. Run: pytest test_shutdown_context.py -v") - print(" (Tests should FAIL - no implementation yet)") - print() - print("2. Implement: shutdown_context.py module") - print() - print("3. Update: hook_processor.py read_input() method") - print() - print("4. Run: pytest test_*.py -v") - print(" (Tests should PASS after implementation)") - print() - - -if __name__ == "__main__": - main() diff --git a/amplifier-bundle/tools/amplihack/hooks/tool_registry.py b/amplifier-bundle/tools/amplihack/hooks/tool_registry.py deleted file mode 100755 index 52f595ee8..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/tool_registry.py +++ /dev/null @@ -1,340 +0,0 @@ -#!/usr/bin/env python3 -"""Tool Registry - Extensible tool hook registration system. - -This module provides an extensible registration system for tool hooks that run -after tool use events. It allows multiple tools to register handlers that will -be called automatically by the post_tool_use hook. - -Philosophy: -- Single responsibility: Register and dispatch tool hooks -- Extensible for adding new tools -- Zero-BS implementation (all functions work completely) - -Public API: - ToolRegistry: Main registry class - register_tool_hook: Decorator for registering tool hooks - get_global_registry: Get the global registry instance - HookResult: Result from tool hook execution -""" - -from collections.abc import Callable -from dataclasses import dataclass, field -from typing import Any - -__all__ = [ - "ToolRegistry", - "register_tool_hook", - "get_global_registry", - "HookResult", -] - -# ============================================================================ -# Data Models -# ============================================================================ - - -@dataclass -class HookResult: - """Result from tool hook execution. - - Attributes: - actions_taken: List of actions performed by the hook - warnings: List of warning messages to display - metadata: Additional metadata from the hook - skip_remaining: If True, stop executing remaining hooks - """ - - actions_taken: list[str] = field(default_factory=list) - warnings: list[str] = field(default_factory=list) - metadata: dict[str, Any] = field(default_factory=dict) - skip_remaining: bool = False - - def to_dict(self) -> dict[str, Any]: - """Convert to dictionary format.""" - return { - "actions_taken": self.actions_taken, - "warnings": self.warnings, - "metadata": self.metadata, - "skip_remaining": self.skip_remaining, - } - - -# ============================================================================ -# Tool Registry -# ============================================================================ - - -class ToolRegistry: - """Registry for extensible tool hooks. - - Allows registering functions that run after tool use events. - Hooks are called in order of registration and can: - - Take actions based on tool usage - - Return warnings or metadata - - Short-circuit remaining hooks if needed - - Example: - >>> registry = ToolRegistry() - >>> @registry.register - ... def my_hook(input_data): - ... return HookResult(actions_taken=["something"]) - >>> results = registry.execute_hooks({"toolUse": {"name": "Write"}}) - """ - - def __init__(self): - """Initialize empty registry.""" - self._hooks: list[Callable[[dict[str, Any]], HookResult]] = [] - - def register(self, hook: Callable[[dict[str, Any]], HookResult]) -> Callable: - """Register a tool hook function. - - Args: - hook: Function that takes input_data dict and returns HookResult - - Returns: - The hook function (for decorator usage) - - Example: - >>> registry = ToolRegistry() - >>> @registry.register - ... def my_hook(input_data): - ... return HookResult(actions_taken=["logged"]) - """ - self._hooks.append(hook) - return hook - - def execute_hooks(self, input_data: dict[str, Any]) -> list[HookResult]: - """Execute all registered hooks. - - Args: - input_data: Input from Claude Code containing: - - toolUse: Dict with tool information - - result: Optional result from tool - - transcript_path: Optional path to transcript - - Returns: - List of HookResult objects from each hook - - Example: - >>> registry = ToolRegistry() - >>> @registry.register - ... def hook1(data): - ... return HookResult(actions_taken=["action1"]) - >>> results = registry.execute_hooks({"toolUse": {"name": "Write"}}) - >>> len(results) - 1 - """ - results = [] - - for hook in self._hooks: - try: - result = hook(input_data) - - # Ensure result is HookResult - if not isinstance(result, HookResult): - result = HookResult( - warnings=[f"Hook {hook.__name__} returned invalid type: {type(result)}"] - ) - - results.append(result) - - # Allow hooks to short-circuit - if result.skip_remaining: - break - - except Exception as e: - # Log error but continue with other hooks - results.append( - HookResult( - warnings=[f"Hook {hook.__name__} failed: {e}"], metadata={"error": str(e)} - ) - ) - - return results - - def clear(self) -> None: - """Clear all registered hooks. - - Useful for testing or dynamic hook management. - - Example: - >>> registry = ToolRegistry() - >>> registry.register(lambda d: HookResult()) - >>> len(registry._hooks) - 1 - >>> registry.clear() - >>> len(registry._hooks) - 0 - """ - self._hooks.clear() - - def count(self) -> int: - """Get the number of registered hooks. - - Returns: - Number of hooks in the registry - - Example: - >>> registry = ToolRegistry() - >>> registry.count() - 0 - >>> registry.register(lambda d: HookResult()) - at 0x...> - >>> registry.count() - 1 - """ - return len(self._hooks) - - -# ============================================================================ -# Global Registry -# ============================================================================ - -# Global registry instance for use across the application -_global_registry = ToolRegistry() - - -def register_tool_hook(func: Callable[[dict[str, Any]], HookResult]) -> Callable: - """Decorator for registering tool hooks with the global registry. - - This is the primary way to register hooks. It uses the global registry - instance to ensure hooks persist across imports. - - Args: - func: Hook function that takes input_data and returns HookResult - - Returns: - The decorated function - - Example: - >>> @register_tool_hook - ... def my_hook(input_data: Dict[str, Any]) -> HookResult: - ... # Process tool usage - ... return HookResult(actions_taken=["something"]) - - Usage in a hook integration module: - ```python - # context_automation_hook.py - from tool_registry import register_tool_hook, HookResult - from context_manager import run_automation - - @register_tool_hook - def context_management_hook(input_data): - # Extract data and call context_manager - result = run_automation(tokens, conversation) - return HookResult(...) - ``` - """ - _global_registry.register(func) - return func - - -def get_global_registry() -> ToolRegistry: - """Get the global tool registry instance. - - Returns: - Global ToolRegistry instance - - Example: - >>> registry = get_global_registry() - >>> isinstance(registry, ToolRegistry) - True - - Usage in post_tool_use.py: - ```python - from tool_registry import get_global_registry - - registry = get_global_registry() - hook_results = registry.execute_hooks(input_data) - ``` - """ - return _global_registry - - -# ============================================================================ -# Utility Functions -# ============================================================================ - - -def aggregate_hook_results(results: list[HookResult]) -> dict[str, Any]: - """Aggregate multiple hook results into a single output dict. - - Args: - results: List of HookResult objects - - Returns: - Dict with aggregated actions, warnings, and metadata - - Example: - >>> r1 = HookResult(actions_taken=["a1"], warnings=["w1"]) - >>> r2 = HookResult(actions_taken=["a2"], metadata={"k": "v"}) - >>> agg = aggregate_hook_results([r1, r2]) - >>> agg["actions_taken"] - ['a1', 'a2'] - >>> agg["warnings"] - ['w1'] - """ - aggregated = {"actions_taken": [], "warnings": [], "metadata": {}} - - for result in results: - aggregated["actions_taken"].extend(result.actions_taken) - aggregated["warnings"].extend(result.warnings) - - # Merge metadata - for key, value in result.metadata.items(): - if key not in aggregated["metadata"]: - aggregated["metadata"][key] = value - elif isinstance(aggregated["metadata"][key], list) and isinstance(value, list): - aggregated["metadata"][key].extend(value) - else: - # Handle conflicts by adding suffixes - aggregated["metadata"][f"{key}_2"] = value - - return aggregated - - -# ============================================================================ -# Testing and Debugging -# ============================================================================ - -if __name__ == "__main__": - # Test the registry - print("Testing ToolRegistry...") - - # Create test hooks - @register_tool_hook - def test_hook_1(input_data: dict[str, Any]) -> HookResult: - """Test hook that logs actions.""" - return HookResult( - actions_taken=["test_hook_1_executed"], warnings=["Test warning from hook 1"] - ) - - @register_tool_hook - def test_hook_2(input_data: dict[str, Any]) -> HookResult: - """Test hook that adds metadata.""" - return HookResult( - actions_taken=["test_hook_2_executed"], metadata={"test_key": "test_value"} - ) - - # Execute hooks - registry = get_global_registry() - print(f"Registered hooks: {registry.count()}") - - test_input = {"toolUse": {"name": "Write"}, "result": {}} - results = registry.execute_hooks(test_input) - - print(f"Hook results: {len(results)}") - for i, result in enumerate(results, 1): - print(f" Hook {i}:") - print(f" Actions: {result.actions_taken}") - print(f" Warnings: {result.warnings}") - print(f" Metadata: {result.metadata}") - - # Test aggregation - aggregated = aggregate_hook_results(results) - print("\nAggregated results:") - print(f" All actions: {aggregated['actions_taken']}") - print(f" All warnings: {aggregated['warnings']}") - print(f" All metadata: {aggregated['metadata']}") - - print("\n✅ ToolRegistry tests passed!") diff --git a/amplifier-bundle/tools/amplihack/hooks/user_prompt_submit.py b/amplifier-bundle/tools/amplihack/hooks/user_prompt_submit.py deleted file mode 100755 index d35845b8a..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/user_prompt_submit.py +++ /dev/null @@ -1,333 +0,0 @@ -#!/usr/bin/env python3 -""" -UserPromptSubmit hook - Inject user preferences on every message. -Ensures preferences persist across all conversation turns in REPL mode. -""" - -import re -import sys -from pathlib import Path -from typing import Any - -# Clean import structure -sys.path.insert(0, str(Path(__file__).parent)) -from hook_processor import HookProcessor - -# Import path utilities -sys.path.insert(0, str(Path(__file__).parent.parent)) -try: - from amplihack.utils.paths import FrameworkPathResolver -except ImportError: - FrameworkPathResolver = None - - -class UserPromptSubmitHook(HookProcessor): - """Hook processor for user prompt submit events.""" - - def __init__(self): - super().__init__("user_prompt_submit") - self.strategy = None - # Cache preferences to avoid repeated file reads - self._preferences_cache: dict[str, str] | None = None - self._cache_timestamp: float | None = None - - def find_user_preferences(self) -> Path | None: - """Find USER_PREFERENCES.md file using FrameworkPathResolver or fallback.""" - # Try FrameworkPathResolver first (handles UVX and installed packages) - if FrameworkPathResolver: - pref_file = FrameworkPathResolver.resolve_preferences_file() - if pref_file and pref_file.exists(): - return pref_file - - # Fallback: Check in project root - pref_file = self.project_root / ".claude" / "context" / "USER_PREFERENCES.md" - if pref_file.exists(): - return pref_file - - # Try src/amplihack location - pref_file = ( - self.project_root / "src" / "amplihack" / ".claude" / "context" / "USER_PREFERENCES.md" - ) - if pref_file.exists(): - return pref_file - - return None - - def extract_preferences(self, content: str) -> dict[str, str]: - """Extract preferences from USER_PREFERENCES.md content. - - Args: - content: The raw content of USER_PREFERENCES.md - - Returns: - Dictionary mapping preference names to values - """ - preferences = {} - - # Key preferences to extract (aligned with session_start.py) - key_prefs = [ - "Communication Style", - "Verbosity", - "Collaboration Style", - "Update Frequency", - "Priority Type", - "Preferred Languages", - "Coding Standards", - "Workflow Preferences", - ] - - # Extract each preference using regex pattern - for pref_name in key_prefs: - # Pattern: ### Preference Name\n\nvalue - pattern = rf"### {re.escape(pref_name)}\s*\n\s*([^\n]+)" - match = re.search(pattern, content) - if match: - value = match.group(1).strip() - # Skip empty or placeholder values - if value and value not in ["", "(not set)", "not set"]: - preferences[pref_name] = value - - # Extract learned patterns (brief mention only) - if "## Learned Patterns" in content: - learned_section = content.split("## Learned Patterns", 1)[1] - # Check if there's content beyond just the comment - if learned_section.strip() and "###" in learned_section: - preferences["Has Learned Patterns"] = "Yes (see USER_PREFERENCES.md)" - - return preferences - - def build_preference_context(self, preferences: dict[str, str]) -> str: - """Build concise preference enforcement context for injection. - - This must be brief but clear enough to enforce preferences. - - Args: - preferences: Dictionary of preference name -> value - - Returns: - Formatted context string for injection - """ - if not preferences: - return "" - - lines = ["🎯 ACTIVE USER PREFERENCES (MANDATORY - Apply to all responses):"] - - # Priority order for displaying preferences (most impactful first) - priority_order = [ - "Communication Style", - "Verbosity", - "Collaboration Style", - "Update Frequency", - "Priority Type", - "Preferred Languages", - "Coding Standards", - "Workflow Preferences", - "Has Learned Patterns", - ] - - # Add preferences in priority order - for pref_name in priority_order: - if pref_name in preferences: - value = preferences[pref_name] - - # Add specific enforcement instruction based on preference type - if pref_name == "Communication Style": - lines.append(f"• {pref_name}: {value} - Use this style in your response") - elif pref_name == "Verbosity": - lines.append(f"• {pref_name}: {value} - Match this detail level") - elif pref_name == "Collaboration Style": - lines.append(f"• {pref_name}: {value} - Follow this approach") - elif pref_name == "Update Frequency": - lines.append(f"• {pref_name}: {value} - Provide updates at this frequency") - elif pref_name == "Priority Type": - lines.append(f"• {pref_name}: {value} - Consider this priority in decisions") - elif pref_name == "Has Learned Patterns": - lines.append(f"• {value}") - else: - lines.append(f"• {pref_name}: {value}") - - lines.append("") - lines.append( - "Apply these preferences to this response. These preferences are READ-ONLY except when using /amplihack:customize command." - ) - - return "\n".join(lines) - - def get_cached_preferences(self, pref_file: Path) -> dict[str, str]: - """Get preferences with simple caching to improve performance. - - Args: - pref_file: Path to preferences file - - Returns: - Dictionary of preferences - """ - try: - # Check if cache is valid (file hasn't changed) - current_mtime = pref_file.stat().st_mtime - if ( - self._preferences_cache is not None - and self._cache_timestamp is not None - and current_mtime == self._cache_timestamp - ): - return self._preferences_cache - - # Read and parse preferences - content = pref_file.read_text(encoding="utf-8") - preferences = self.extract_preferences(content) - - # Update cache - self._preferences_cache = preferences - self._cache_timestamp = current_mtime - - return preferences - - except Exception as e: - self.log(f"Error reading preferences: {e}", "WARNING") - return {} - - def process(self, input_data: dict[str, Any]) -> dict[str, Any]: - """Process user prompt submit event. - - Args: - input_data: Input from Claude Code - - Returns: - Additional context to inject - """ - # Detect launcher and select strategy - self.strategy = self._select_strategy() - if self.strategy: - self.log(f"Using strategy: {self.strategy.__class__.__name__}") - # Check for strategy-specific prompt handling - strategy_result = self.strategy.handle_user_prompt_submit(input_data) - if strategy_result: - self.log("Strategy provided custom prompt handling") - return strategy_result - - # Extract user prompt - user_prompt = input_data.get("userMessage", {}).get("text", "") - - # Build context parts - context_parts = [] - - # 1. Check for agent references and inject memory if found - memory_context = "" - try: - from agent_memory_hook import ( - detect_agent_references, - detect_slash_command_agent, - format_memory_injection_notice, - inject_memory_for_agents_sync, - ) - - # Detect agent references - agent_types = detect_agent_references(user_prompt) - - # Also check for slash command agents - slash_agent = detect_slash_command_agent(user_prompt) - if slash_agent: - agent_types.append(slash_agent) - - if agent_types: - self.log(f"Detected agents: {agent_types}") - - # Inject memory context for these agents (using sync wrapper) - session_id = self.get_session_id() - enhanced_prompt, memory_metadata = inject_memory_for_agents_sync( - user_prompt, agent_types, session_id - ) - - # Extract memory context (everything before the original prompt) - if enhanced_prompt != user_prompt: - memory_context = enhanced_prompt.replace(user_prompt, "").strip() - - # Log memory injection - notice = format_memory_injection_notice(memory_metadata) - if notice: - self.log(notice) - - # Save metrics - self.save_metric( - "agent_memory_injected", memory_metadata.get("memories_injected", 0) - ) - self.save_metric("agents_detected", len(agent_types)) - - except Exception as e: - self.log(f"Memory injection failed (non-fatal): {e}", "WARNING") - - # Add memory context if we have it - if memory_context: - context_parts.append(memory_context) - - # 2. Find and inject preferences - pref_file = self.find_user_preferences() - if pref_file: - # Get preferences (with caching for performance) - preferences = self.get_cached_preferences(pref_file) - - if preferences: - # Build preference context - pref_context = self.build_preference_context(preferences) - context_parts.append(pref_context) - - # Log activity (for debugging) - self.log(f"Injected {len(preferences)} preferences on user prompt") - self.save_metric("preferences_injected", len(preferences)) - else: - self.log("No USER_PREFERENCES.md found - skipping preference injection") - - # Auto-route development intent to /dev (dev-orchestrator). - # Injects LLM-based intent routing prompt as additionalContext. - # Claude classifies intent using its natural language understanding. - # Disable: export AMPLIHACK_AUTO_DEV=false - try: - from dev_intent_router import should_auto_route - - should_inject, dev_context = should_auto_route(user_prompt) - if should_inject: - context_parts.append(dev_context) - self.log("Injected dev-intent routing context") - self.save_metric("auto_dev_routed", 1) - except Exception as e: - self.log(f"Dev intent router failed (non-fatal): {e}", "WARNING") - - # Combine all context parts - full_context = "\n\n".join(context_parts) - - # Save total context length metric - self.save_metric("context_length", len(full_context)) - - # Return output in correct format - return { - "additionalContext": full_context, - } - - def _select_strategy(self): - """Detect launcher and select appropriate strategy.""" - try: - # Import adaptive components - sys.path.insert(0, str(self.project_root / "src" / "amplihack")) - from amplihack.context.adaptive.detector import LauncherDetector - from amplihack.context.adaptive.strategies import ClaudeStrategy, CopilotStrategy - - detector = LauncherDetector(self.project_root) - launcher_type = detector.detect() - - if launcher_type == "copilot": - return CopilotStrategy(self.project_root, self.log) - return ClaudeStrategy(self.project_root, self.log) - - except ImportError as e: - self.log(f"Adaptive strategy not available: {e}", "DEBUG") - return None - - -def main(): - """Entry point for the user_prompt_submit hook.""" - hook = UserPromptSubmitHook() - hook.run() - - -if __name__ == "__main__": - main() diff --git a/amplifier-bundle/tools/amplihack/hooks/workflow_tracker.py b/amplifier-bundle/tools/amplihack/hooks/workflow_tracker.py deleted file mode 100755 index dc984892c..000000000 --- a/amplifier-bundle/tools/amplihack/hooks/workflow_tracker.py +++ /dev/null @@ -1,339 +0,0 @@ -#!/usr/bin/env python3 -""" -Workflow Adherence Tracker - -Simple file-based logging system for tracking workflow step execution. -Designed for < 5ms overhead with philosophy-aligned simplicity. - -Usage: - from workflow_tracker import log_step, log_skip, log_workflow_start, log_workflow_end - - # Start workflow - log_workflow_start(workflow_name="DEFAULT", task_description="Add auth") - - # Log step execution - log_step(step_number=1, step_name="Rewrite and Clarify Requirements", - agent_used="prompt-writer", duration_ms=150) - - # Log skipped step - log_skip(step_number=8, step_name="Local Testing", reason="Simple config change") - - # End workflow - log_workflow_end(success=True, total_steps=15, skipped_steps=1) - -Log Format (JSONL): - {"timestamp": "2025-11-17T15:30:00", "event": "workflow_start", "workflow": "DEFAULT", ...} - {"timestamp": "2025-11-17T15:30:01", "event": "step_executed", "step": 1, ...} - {"timestamp": "2025-11-17T15:30:05", "event": "step_skipped", "step": 8, ...} - {"timestamp": "2025-11-17T15:30:10", "event": "workflow_end", "success": true, ...} -""" - -import json -import time -from datetime import datetime -from pathlib import Path - -# Configuration -WORKFLOW_LOG_DIR = Path(".claude/runtime/logs/workflow_adherence") -WORKFLOW_LOG_FILE = WORKFLOW_LOG_DIR / "workflow_execution.jsonl" -PERFORMANCE_THRESHOLD_MS = 5 # Maximum allowed overhead - - -def _ensure_log_directory() -> None: - """Ensure log directory exists. Fast path optimization.""" - if not WORKFLOW_LOG_DIR.exists(): - WORKFLOW_LOG_DIR.mkdir(parents=True, exist_ok=True) - - -def _write_log_entry(entry: dict) -> None: - """ - Write log entry with minimal overhead. - - Design choices for performance: - - Append-only (no seeking) - - No locks (assumes single-threaded Claude) - - No buffering (immediate write) - - JSONL format (no parsing existing content) - """ - start = time.perf_counter() - - _ensure_log_directory() - - # Add timestamp if not present - if "timestamp" not in entry: - entry["timestamp"] = datetime.utcnow().isoformat() - - # Append to JSONL file - with open(WORKFLOW_LOG_FILE, "a") as f: - f.write(json.dumps(entry) + "\n") - - duration_ms = (time.perf_counter() - start) * 1000 - - # Warn if overhead exceeds threshold (but don't fail) - if duration_ms > PERFORMANCE_THRESHOLD_MS: - print( - f"Warning: workflow_tracker overhead {duration_ms:.2f}ms exceeds {PERFORMANCE_THRESHOLD_MS}ms threshold" - ) - - -def log_workflow_start( - workflow_name: str, task_description: str, session_id: str | None = None -) -> None: - """ - Log workflow start event. - - Args: - workflow_name: Name of workflow (DEFAULT, DDD, INVESTIGATION, etc.) - task_description: Brief description of task - session_id: Optional session identifier for grouping - """ - entry = { - "event": "workflow_start", - "workflow": workflow_name, - "task": task_description, - "session_id": session_id or datetime.utcnow().strftime("%Y%m%d_%H%M%S"), - } - _write_log_entry(entry) - - -def log_step( - step_number: int, - step_name: str, - agent_used: str | None = None, - duration_ms: float | None = None, - details: dict | None = None, -) -> None: - """ - Log workflow step execution. - - Args: - step_number: Step number from workflow (1-15 for DEFAULT) - step_name: Human-readable step name - agent_used: Name of agent used for this step (if applicable) - duration_ms: Execution time in milliseconds - details: Optional dict with additional context - """ - entry = { - "event": "step_executed", - "step": step_number, - "name": step_name, - "agent": agent_used, - "duration_ms": duration_ms, - } - - if details: - entry["details"] = details - - _write_log_entry(entry) - - -def log_skip(step_number: int, step_name: str, reason: str) -> None: - """ - Log skipped workflow step. - - Args: - step_number: Step number that was skipped - step_name: Human-readable step name - reason: Explanation for why step was skipped - """ - entry = { - "event": "step_skipped", - "step": step_number, - "name": step_name, - "reason": reason, - } - _write_log_entry(entry) - - -def log_agent_invocation(agent_name: str, purpose: str, step_number: int | None = None) -> None: - """ - Log agent invocation. - - Args: - agent_name: Name of agent invoked - purpose: Why the agent was invoked - step_number: Associated workflow step (if applicable) - """ - entry = { - "event": "agent_invoked", - "agent": agent_name, - "purpose": purpose, - "step": step_number, - } - _write_log_entry(entry) - - -def log_workflow_end( - success: bool, total_steps: int, skipped_steps: int = 0, notes: str | None = None -) -> None: - """ - Log workflow completion. - - Args: - success: Whether workflow completed successfully - total_steps: Total number of steps in workflow - skipped_steps: Number of steps that were skipped - notes: Optional notes about completion - """ - entry = { - "event": "workflow_end", - "success": success, - "total_steps": total_steps, - "skipped_steps": skipped_steps, - "completion_rate": round((total_steps - skipped_steps) / total_steps * 100, 1) - if total_steps > 0 - else 0, - "notes": notes, - } - _write_log_entry(entry) - - -def log_workflow_violation( - violation_type: str, description: str, step_number: int | None = None -) -> None: - """ - Log workflow violation (e.g., wrong TodoWrite format, missing agent usage). - - Args: - violation_type: Type of violation (e.g., "todo_format", "missing_agent", "skip_without_permission") - description: Detailed description of the violation - step_number: Step where violation occurred (if applicable) - """ - entry = { - "event": "workflow_violation", - "type": violation_type, - "description": description, - "step": step_number, - } - _write_log_entry(entry) - - -# Convenience context manager for timing -class StepTimer: - """ - Context manager for timing workflow steps. - - Usage: - with StepTimer(1, "Rewrite and Clarify Requirements", "prompt-writer") as timer: - # Execute step - pass - # Automatically logs step with duration - """ - - def __init__(self, step_number: int, step_name: str, agent_used: str | None = None): - self.step_number = step_number - self.step_name = step_name - self.agent_used = agent_used - self.start_time = None - - def __enter__(self): - self.start_time = time.perf_counter() - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - duration_ms = (time.perf_counter() - self.start_time) * 1000 - log_step(self.step_number, self.step_name, self.agent_used, duration_ms) - - -def get_workflow_stats(limit: int = 100) -> dict: - """ - Get basic workflow statistics from recent executions. - - Args: - limit: Number of recent entries to analyze - - Returns: - Dictionary with workflow statistics - """ - if not WORKFLOW_LOG_FILE.exists(): - return { - "total_workflows": 0, - "successful": 0, - "failed": 0, - "avg_completion_rate": 0, - "avg_skipped_steps": 0, - "most_skipped_steps": [], - } - - workflows = [] - step_skips = {} - - with open(WORKFLOW_LOG_FILE) as f: - lines = f.readlines()[-limit:] - - current_workflow = {} - for line in lines: - entry = json.loads(line) - - if entry["event"] == "workflow_start": - current_workflow = {"start": entry} - - elif entry["event"] == "step_skipped": - step_key = f"Step {entry['step']}: {entry['name']}" - step_skips[step_key] = step_skips.get(step_key, 0) + 1 - - elif entry["event"] == "workflow_end" and current_workflow: - current_workflow["end"] = entry - workflows.append(current_workflow) - current_workflow = {} - - if not workflows: - return { - "total_workflows": 0, - "successful": 0, - "failed": 0, - "avg_completion_rate": 0, - "avg_skipped_steps": 0, - "most_skipped_steps": [], - } - - successful = sum(1 for w in workflows if w.get("end", {}).get("success", False)) - completion_rates = [w["end"]["completion_rate"] for w in workflows if "end" in w] - skipped_steps = [w["end"]["skipped_steps"] for w in workflows if "end" in w] - - most_skipped = sorted(step_skips.items(), key=lambda x: x[1], reverse=True)[:5] - - return { - "total_workflows": len(workflows), - "successful": successful, - "failed": len(workflows) - successful, - "avg_completion_rate": round(sum(completion_rates) / len(completion_rates), 1) - if completion_rates - else 0, - "avg_skipped_steps": round(sum(skipped_steps) / len(skipped_steps), 1) - if skipped_steps - else 0, - "most_skipped_steps": most_skipped, - } - - -if __name__ == "__main__": - # Performance test - print("Testing workflow_tracker performance...") - - iterations = 100 - start = time.perf_counter() - - for i in range(iterations): - log_step( - step_number=1, - step_name="Test Step", - agent_used="test-agent", - duration_ms=100, - ) - - total_time = (time.perf_counter() - start) * 1000 - avg_time = total_time / iterations - - print(f"Average overhead: {avg_time:.3f}ms per log entry") - print(f"Total time for {iterations} entries: {total_time:.1f}ms") - - if avg_time < PERFORMANCE_THRESHOLD_MS: - print(f"✓ Performance target met (< {PERFORMANCE_THRESHOLD_MS}ms)") - else: - print(f"✗ Performance target missed (>= {PERFORMANCE_THRESHOLD_MS}ms)") - - # Display stats - print("\nCurrent workflow statistics:") - stats = get_workflow_stats() - print(json.dumps(stats, indent=2)) diff --git a/pyproject.toml b/pyproject.toml index 0522ead73..4a86f2b74 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ backend-path = ["."] [project] name = "amplihack" -version = "0.5.110" +version = "0.5.111" description = "Amplifier bundle for agentic coding with comprehensive skills, recipes, and workflows" requires-python = ">=3.11" dependencies = [ diff --git a/scripts/check_drift.py b/scripts/check_drift.py index 85545ddcd..19bf30181 100644 --- a/scripts/check_drift.py +++ b/scripts/check_drift.py @@ -1,18 +1,24 @@ #!/usr/bin/env python3 -"""Detect drift between triplicated skill and agent files. +"""Detect drift between triplicated skill and agent files, and verify hooks symlink. Skills and agents exist in up to 3 locations that must stay in sync: 1. .claude/skills/ and .claude/agents/amplihack/ (source of truth) 2. amplifier-bundle/skills/ and amplifier-bundle/agents/ (distribution) 3. docs/claude/skills/ and docs/claude/agents/ (documentation, optional) +Hooks: .claude/tools/amplihack/hooks/ is the canonical source of truth. + amplifier-bundle/tools/amplihack/hooks is a symlink to the canonical location. + This script verifies the symlink is intact. + This script compares checksums between these locations and reports any drift. Severity levels: - MISSING/EXTRA: printed as warnings, do NOT cause exit 1 (intentional structural differences) - CHANGED: printed as errors, cause exit 1 (content drift must be fixed) + - SYMLINK BROKEN: printed as error, causes exit 1 (hooks symlink must point to .claude/) References: https://github.com/microsoft/amplihack/issues/2820 + https://github.com/microsoft/amplihack/issues/2845 """ import hashlib @@ -138,6 +144,42 @@ def main() -> int: if errors: all_errors.append((".claude/agents vs amplifier-bundle/agents", errors)) + # --- Hooks symlink verification --- + hooks_symlink = repo_root / "amplifier-bundle" / "tools" / "amplihack" / "hooks" + hooks_canonical = repo_root / ".claude" / "tools" / "amplihack" / "hooks" + expected_target = "../../../.claude/tools/amplihack/hooks" + + if hooks_symlink.is_symlink(): + actual_target = str(hooks_symlink.readlink()) + if actual_target != expected_target: + all_errors.append(( + "hooks symlink target", + [f" Expected: {expected_target}", f" Actual: {actual_target}"], + )) + elif not hooks_symlink.resolve().is_dir(): + all_errors.append(( + "hooks symlink resolution", + [f" Symlink exists but target does not resolve to a directory"], + )) + else: + print("Hooks symlink OK: amplifier-bundle/tools/amplihack/hooks -> .claude/tools/amplihack/hooks") + elif hooks_symlink.is_dir(): + # Directory exists instead of symlink -- drift has occurred + all_errors.append(( + "hooks symlink missing", + [ + " amplifier-bundle/tools/amplihack/hooks/ is a directory, not a symlink.", + " It should be a symlink to ../../../.claude/tools/amplihack/hooks", + " Run: rm -rf amplifier-bundle/tools/amplihack/hooks && " + "ln -s ../../../.claude/tools/amplihack/hooks amplifier-bundle/tools/amplihack/hooks", + ], + )) + elif not hooks_symlink.exists(): + all_errors.append(( + "hooks symlink missing", + [f" amplifier-bundle/tools/amplihack/hooks does not exist at all"], + )) + # --- Report warnings (MISSING/EXTRA) --- if all_warnings: total_warnings = sum(len(items) for _, items in all_warnings) @@ -159,7 +201,7 @@ def main() -> int: print(item) print() print( - "Source of truth is .claude/skills/ and .claude/agents/. " + "Source of truth is .claude/skills/, .claude/agents/, and .claude/tools/amplihack/hooks/. " "Copy changed files to the other locations to fix." ) return 1