diff --git a/.claude/tools/amplihack/considerations.yaml b/.claude/tools/amplihack/considerations.yaml index de5b8ddbc..65b273084 100644 --- a/.claude/tools/amplihack/considerations.yaml +++ b/.claude/tools/amplihack/considerations.yaml @@ -115,7 +115,7 @@ severity: warning checker: _check_documentation_updates enabled: true - applicable_session_types: ["DEVELOPMENT", "MAINTENANCE", "INVESTIGATION"] + applicable_session_types: ["DEVELOPMENT", "MAINTENANCE"] guidance: | This check should only fire when PUBLIC-FACING code was changed. @@ -170,7 +170,7 @@ severity: blocker checker: _check_next_steps enabled: true - applicable_session_types: ["DEVELOPMENT", "INVESTIGATION"] + applicable_session_types: ["DEVELOPMENT"] guidance: | This check is about distinguishing COMPLETION SUMMARIES from REMAINING WORK. @@ -222,7 +222,30 @@ severity: blocker checker: _check_workflow_invocation enabled: true - applicable_session_types: ["DEVELOPMENT", "INVESTIGATION"] + applicable_session_types: ["DEVELOPMENT"] + +- id: skill_invocation + category: Workflow Process Adherence + question: If the session was started with a skill command (e.g. /pm-architect, /dev), did the agent actually invoke that skill using the Skill tool? + description: Detects when a user requests a skill via slash command but the agent bypasses it and responds directly without invoking the skill (Issue #2914) + severity: blocker + checker: _check_skill_invocation + enabled: true + applicable_session_types: ["DEVELOPMENT", "INVESTIGATION", "MAINTENANCE"] + guidance: | + Check if a tag appears in the transcript (indicating the user + ran a slash command that maps to a skill). If so, check if the Skill tool + was called for that skill name. + + SATISFIED when: + - No tag in transcript (no skill was requested) + - A tag exists AND the Skill tool was called for that skill + - The skill was loaded directly into the conversation (some skills inject + their content via the command system without needing a Skill tool call) + + NOT SATISFIED when: + - A tag exists but the agent never called the Skill tool + and instead responded directly, bypassing the skill entirely - id: no_direct_main_commit category: Workflow Process Adherence diff --git a/.claude/tools/amplihack/hooks/power_steering_checker/checks_workflow.py b/.claude/tools/amplihack/hooks/power_steering_checker/checks_workflow.py index f374b0793..c928487f1 100644 --- a/.claude/tools/amplihack/hooks/power_steering_checker/checks_workflow.py +++ b/.claude/tools/amplihack/hooks/power_steering_checker/checks_workflow.py @@ -120,6 +120,54 @@ def _check_workflow_invocation(self, transcript: list[dict], session_id: str) -> self._log(f"Error in _check_workflow_invocation: {e}", "WARNING", exc_info=True) return True + def _check_skill_invocation(self, transcript: list[dict], session_id: str) -> bool: + """Check if a requested skill was actually invoked. + + If the session was started with a slash command (indicated by a + tag in the transcript), verify the Skill tool was + called for that skill. If no command-name tag exists, the check + is automatically satisfied (no skill was requested). (Issue #2914) + + Args: + transcript: List of message dictionaries + session_id: Session identifier + + Returns: + True if no skill was requested or if the requested skill was invoked + """ + # Find tag in user messages + requested_skill = None + for msg in transcript: + if msg.get("type") != "user": + continue + content_str = str(msg.get("message", {}).get("content", "")) + match = re.search(r"/?([\w:.-]+)", content_str) + if match: + requested_skill = match.group(1) + break + + if not requested_skill: + return True # No skill requested — check not applicable + + # Check if the Skill tool was called for this skill + for msg in transcript: + if msg.get("type") != "assistant": + continue + content = msg.get("message", {}).get("content", []) + if not isinstance(content, list): + continue + for block in content: + if not isinstance(block, dict) or block.get("type") != "tool_use": + continue + if block.get("name") == "Skill": + invoked = block.get("input", {}).get("skill", "") + if invoked == requested_skill: + self._log(f"Skill '{requested_skill}' was invoked", "DEBUG") + return True + + self._log(f"Skill '{requested_skill}' was requested but not invoked", "WARNING") + return False + def _check_no_direct_main_commit(self, transcript: list[dict], session_id: str) -> bool: """Check that the agent did not commit directly to main. diff --git a/amplifier-bundle/tools/amplihack/considerations.yaml b/amplifier-bundle/tools/amplihack/considerations.yaml index de5b8ddbc..65b273084 100644 --- a/amplifier-bundle/tools/amplihack/considerations.yaml +++ b/amplifier-bundle/tools/amplihack/considerations.yaml @@ -115,7 +115,7 @@ severity: warning checker: _check_documentation_updates enabled: true - applicable_session_types: ["DEVELOPMENT", "MAINTENANCE", "INVESTIGATION"] + applicable_session_types: ["DEVELOPMENT", "MAINTENANCE"] guidance: | This check should only fire when PUBLIC-FACING code was changed. @@ -170,7 +170,7 @@ severity: blocker checker: _check_next_steps enabled: true - applicable_session_types: ["DEVELOPMENT", "INVESTIGATION"] + applicable_session_types: ["DEVELOPMENT"] guidance: | This check is about distinguishing COMPLETION SUMMARIES from REMAINING WORK. @@ -222,7 +222,30 @@ severity: blocker checker: _check_workflow_invocation enabled: true - applicable_session_types: ["DEVELOPMENT", "INVESTIGATION"] + applicable_session_types: ["DEVELOPMENT"] + +- id: skill_invocation + category: Workflow Process Adherence + question: If the session was started with a skill command (e.g. /pm-architect, /dev), did the agent actually invoke that skill using the Skill tool? + description: Detects when a user requests a skill via slash command but the agent bypasses it and responds directly without invoking the skill (Issue #2914) + severity: blocker + checker: _check_skill_invocation + enabled: true + applicable_session_types: ["DEVELOPMENT", "INVESTIGATION", "MAINTENANCE"] + guidance: | + Check if a tag appears in the transcript (indicating the user + ran a slash command that maps to a skill). If so, check if the Skill tool + was called for that skill name. + + SATISFIED when: + - No tag in transcript (no skill was requested) + - A tag exists AND the Skill tool was called for that skill + - The skill was loaded directly into the conversation (some skills inject + their content via the command system without needing a Skill tool call) + + NOT SATISFIED when: + - A tag exists but the agent never called the Skill tool + and instead responded directly, bypassing the skill entirely - id: no_direct_main_commit category: Workflow Process Adherence diff --git a/amplifier-bundle/tools/amplihack/hooks/power_steering_checker/checks_workflow.py b/amplifier-bundle/tools/amplihack/hooks/power_steering_checker/checks_workflow.py index f374b0793..c928487f1 100644 --- a/amplifier-bundle/tools/amplihack/hooks/power_steering_checker/checks_workflow.py +++ b/amplifier-bundle/tools/amplihack/hooks/power_steering_checker/checks_workflow.py @@ -120,6 +120,54 @@ def _check_workflow_invocation(self, transcript: list[dict], session_id: str) -> self._log(f"Error in _check_workflow_invocation: {e}", "WARNING", exc_info=True) return True + def _check_skill_invocation(self, transcript: list[dict], session_id: str) -> bool: + """Check if a requested skill was actually invoked. + + If the session was started with a slash command (indicated by a + tag in the transcript), verify the Skill tool was + called for that skill. If no command-name tag exists, the check + is automatically satisfied (no skill was requested). (Issue #2914) + + Args: + transcript: List of message dictionaries + session_id: Session identifier + + Returns: + True if no skill was requested or if the requested skill was invoked + """ + # Find tag in user messages + requested_skill = None + for msg in transcript: + if msg.get("type") != "user": + continue + content_str = str(msg.get("message", {}).get("content", "")) + match = re.search(r"/?([\w:.-]+)", content_str) + if match: + requested_skill = match.group(1) + break + + if not requested_skill: + return True # No skill requested — check not applicable + + # Check if the Skill tool was called for this skill + for msg in transcript: + if msg.get("type") != "assistant": + continue + content = msg.get("message", {}).get("content", []) + if not isinstance(content, list): + continue + for block in content: + if not isinstance(block, dict) or block.get("type") != "tool_use": + continue + if block.get("name") == "Skill": + invoked = block.get("input", {}).get("skill", "") + if invoked == requested_skill: + self._log(f"Skill '{requested_skill}' was invoked", "DEBUG") + return True + + self._log(f"Skill '{requested_skill}' was requested but not invoked", "WARNING") + return False + def _check_no_direct_main_commit(self, transcript: list[dict], session_id: str) -> bool: """Check that the agent did not commit directly to main. diff --git a/pyproject.toml b/pyproject.toml index 3bab4a28a..0c1917182 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ backend-path = ["."] [project] name = "amplihack" -version = "0.5.113" +version = "0.5.114" description = "Amplifier bundle for agentic coding with comprehensive skills, recipes, and workflows" requires-python = ">=3.11" dependencies = [