From 10d1914090bf06ec2547701e0c39ddcdc78cd33e Mon Sep 17 00:00:00 2001 From: "Michael J. Jabbour" Date: Sun, 18 Jan 2026 00:27:24 -0500 Subject: [PATCH] fix: prevent orphaned tool results during interrupted tool execution The _check_tool_pair_removable() method was incorrectly marking tool pairs as removable even when not all tool_results had been received yet. This caused the assistant message with tool_calls to be removed prematurely, leaving orphaned tool_result messages that triggered API errors. Root cause: - Method only checked if tool_results existed in the block range - Did not verify that EVERY tool_call had a corresponding tool_result - Returned all_removable=True after finding just one result Bug trigger scenario: 1. Assistant makes tool_calls [A, B, C] 2. Result A arrives 3. User interrupts with 'continue' message 4. Compaction runs, finds only result A 5. BUG: Returns all_removable=True anyway 6. Removes assistant message containing all tool_calls 7. Results B and C arrive later as orphans 8. API error: 'tool_use ids were found without tool_result blocks' The fix: - Added 'found' flag to track each individual tool_call - Only set all_removable=True if ALL tool_calls have results - Added detailed docstring explaining the critical validation - Added regression tests to prevent reintroduction of this bug Impact: - Fixes session crashes when users interrupt tool execution - Prevents data loss from dropped tool results - No API changes, backward compatible --- amplifier_module_context_simple/__init__.py | 18 +++- tests/test_tool_pair_compaction.py | 108 ++++++++++++++++++++ 2 files changed, 125 insertions(+), 1 deletion(-) diff --git a/amplifier_module_context_simple/__init__.py b/amplifier_module_context_simple/__init__.py index e064e0d..fc0ffdb 100644 --- a/amplifier_module_context_simple/__init__.py +++ b/amplifier_module_context_simple/__init__.py @@ -912,19 +912,35 @@ def _check_tool_pair_removable( assistant_msg: dict[str, Any], protected_indices: set[int], ) -> tuple[bool, list[int]]: - """Check if all tool results for an assistant can be removed. Returns (all_removable, result_indices).""" + """Check if all tool results for an assistant can be removed. Returns (all_removable, result_indices). + + CRITICAL FIX: This function now verifies that ALL tool_calls have corresponding tool_results. + If any tool_call is missing its result, we must NOT remove the assistant, because: + 1. The tool result might be added later (e.g., user types "continue" during execution) + 2. Removing the assistant would orphan that future tool result + 3. This causes: "tool_use ids were found without tool_result blocks" error + """ all_removable = True tool_result_indices = [] for tc in assistant_msg.get("tool_calls", []): tc_id = tc.get("id") or tc.get("tool_call_id") if tc_id: + found = False # Track if we found this specific result for k, m in enumerate(messages): if m.get("tool_call_id") == tc_id: + found = True # Mark as found if k in protected_indices: all_removable = False else: tool_result_indices.append(k) + + # CRITICAL: If we didn't find a result for this tool_call, + # we CANNOT remove the assistant safely. The tool result might + # be added later, and removing the assistant now would create + # an orphaned tool_result with no matching tool_use. + if not found: + all_removable = False return all_removable, tool_result_indices diff --git a/tests/test_tool_pair_compaction.py b/tests/test_tool_pair_compaction.py index 2c2155d..9cf6787 100644 --- a/tests/test_tool_pair_compaction.py +++ b/tests/test_tool_pair_compaction.py @@ -281,3 +281,111 @@ async def test_compact_with_multiple_tool_calls_in_one_message(): f"Expected 6 tool results after assistant with 6 tool_calls, " f"but found only {tool_result_count}. This is the bug!" ) + + +@pytest.mark.asyncio +async def test_incomplete_tool_results_not_removed(): + """Test that assistant with incomplete tool results is NOT removed during compaction. + + Regression test for bug where compaction would remove assistant messages + even when not all tool_results had been added yet, causing orphaned + tool_results and API errors. + """ + context = SimpleContextManager( + max_tokens=1000, + compact_threshold=0.01, # Force compaction + target_usage=0.5, + ) + + # Scenario: Assistant makes 2 tool calls, but only 1 result added so far + await context.add_message({ + "role": "assistant", + "content": "Let me check both", + "tool_calls": [ + {"id": "call_A", "type": "function", "function": {"name": "tool_a", "arguments": "{}"}}, + {"id": "call_B", "type": "function", "function": {"name": "tool_b", "arguments": "{}"}}, + ] + }) + + # Only result A added + await context.add_message({ + "role": "tool", + "tool_call_id": "call_A", + "content": "Result from tool A" + }) + + # User interrupts (like typing "continue") + await context.add_message({ + "role": "user", + "content": "continue" + }) + + # Result B hasn't been added yet! + + # Force compaction + compacted = await context._compact_ephemeral(budget=1000, source_messages=context.messages) + + # CRITICAL: Assistant message should still be present + # because call_B is missing its result + assistant_messages = [m for m in compacted if m.get("role") == "assistant" and m.get("tool_calls")] + + assert len(assistant_messages) == 1, "Assistant with incomplete tool results should NOT be removed" + + # Verify the assistant message has both tool_calls + assert len(assistant_messages[0]["tool_calls"]) == 2 + + +@pytest.mark.asyncio +async def test_complete_tool_results_can_be_removed(): + """Test that assistant with ALL tool results CAN be removed during compaction.""" + context = SimpleContextManager( + max_tokens=1000, + compact_threshold=0.01, # Force compaction + target_usage=0.5, + ) + + # Scenario: Assistant makes 2 tool calls, BOTH results added + await context.add_message({ + "role": "assistant", + "content": "Let me check both", + "tool_calls": [ + {"id": "call_A", "type": "function", "function": {"name": "tool_a", "arguments": "{}"}}, + {"id": "call_B", "type": "function", "function": {"name": "tool_b", "arguments": "{}"}}, + ] + }) + + # Both results added + await context.add_message({ + "role": "tool", + "tool_call_id": "call_A", + "content": "Result A" + }) + await context.add_message({ + "role": "tool", + "tool_call_id": "call_B", + "content": "Result B" + }) + + # Add more messages to trigger compaction + for i in range(20): + await context.add_message({"role": "user", "content": f"Message {i}" * 100}) + await context.add_message({"role": "assistant", "content": f"Response {i}" * 100}) + + # Force compaction - should be able to remove the old tool pair + compacted = await context._compact_ephemeral(budget=1000, source_messages=context.messages) + + # The old assistant with tool_calls might be removed (that's OK now) + # We just verify no orphaned tool_results exist + tool_use_ids = set() + for msg in compacted: + if msg.get("tool_calls"): + for tc in msg["tool_calls"]: + tc_id = tc.get("id") or tc.get("tool_call_id") + if tc_id: + tool_use_ids.add(tc_id) + + # Check all tool results have matching tool_uses + for msg in compacted: + if msg.get("role") == "tool": + tc_id = msg.get("tool_call_id") + assert tc_id in tool_use_ids, f"Orphaned tool_result with tool_call_id: {tc_id}"