From 10d1914090bf06ec2547701e0c39ddcdc78cd33e Mon Sep 17 00:00:00 2001
From: "Michael J. Jabbour" <michael.jabbour@gmail.com>
Date: Sun, 18 Jan 2026 00:27:24 -0500
Subject: [PATCH] fix: prevent orphaned tool results during interrupted tool
 execution

The _check_tool_pair_removable() method was incorrectly marking tool pairs
as removable even when not all tool_results had been received yet. This
caused the assistant message with tool_calls to be removed prematurely,
leaving orphaned tool_result messages that triggered API errors.

Root cause:
- Method only checked if tool_results existed in the block range
- Did not verify that EVERY tool_call had a corresponding tool_result
- Returned all_removable=True after finding just one result

Bug trigger scenario:
1. Assistant makes tool_calls [A, B, C]
2. Result A arrives
3. User interrupts with 'continue' message
4. Compaction runs, finds only result A
5. BUG: Returns all_removable=True anyway
6. Removes assistant message containing all tool_calls
7. Results B and C arrive later as orphans
8. API error: 'tool_use ids were found without tool_result blocks'

The fix:
- Added 'found' flag to track each individual tool_call
- Only set all_removable=True if ALL tool_calls have results
- Added detailed docstring explaining the critical validation
- Added regression tests to prevent reintroduction of this bug

Impact:
- Fixes session crashes when users interrupt tool execution
- Prevents data loss from dropped tool results
- No API changes, backward compatible
---
 amplifier_module_context_simple/__init__.py |  18 +++-
 tests/test_tool_pair_compaction.py          | 108 ++++++++++++++++++++
 2 files changed, 125 insertions(+), 1 deletion(-)

diff --git a/amplifier_module_context_simple/__init__.py b/amplifier_module_context_simple/__init__.py
index e064e0d..fc0ffdb 100644
--- a/amplifier_module_context_simple/__init__.py
+++ b/amplifier_module_context_simple/__init__.py
@@ -912,19 +912,35 @@ def _check_tool_pair_removable(
         assistant_msg: dict[str, Any],
         protected_indices: set[int],
     ) -> tuple[bool, list[int]]:
-        """Check if all tool results for an assistant can be removed. Returns (all_removable, result_indices)."""
+        """Check if all tool results for an assistant can be removed. Returns (all_removable, result_indices).
+        
+        CRITICAL FIX: This function now verifies that ALL tool_calls have corresponding tool_results.
+        If any tool_call is missing its result, we must NOT remove the assistant, because:
+        1. The tool result might be added later (e.g., user types "continue" during execution)
+        2. Removing the assistant would orphan that future tool result
+        3. This causes: "tool_use ids were found without tool_result blocks" error
+        """
         all_removable = True
         tool_result_indices = []
 
         for tc in assistant_msg.get("tool_calls", []):
             tc_id = tc.get("id") or tc.get("tool_call_id")
             if tc_id:
+                found = False  # Track if we found this specific result
                 for k, m in enumerate(messages):
                     if m.get("tool_call_id") == tc_id:
+                        found = True  # Mark as found
                         if k in protected_indices:
                             all_removable = False
                         else:
                             tool_result_indices.append(k)
+                
+                # CRITICAL: If we didn't find a result for this tool_call,
+                # we CANNOT remove the assistant safely. The tool result might
+                # be added later, and removing the assistant now would create
+                # an orphaned tool_result with no matching tool_use.
+                if not found:
+                    all_removable = False
 
         return all_removable, tool_result_indices
 
diff --git a/tests/test_tool_pair_compaction.py b/tests/test_tool_pair_compaction.py
index 2c2155d..9cf6787 100644
--- a/tests/test_tool_pair_compaction.py
+++ b/tests/test_tool_pair_compaction.py
@@ -281,3 +281,111 @@ async def test_compact_with_multiple_tool_calls_in_one_message():
         f"Expected 6 tool results after assistant with 6 tool_calls, "
         f"but found only {tool_result_count}. This is the bug!"
     )
+
+
+@pytest.mark.asyncio
+async def test_incomplete_tool_results_not_removed():
+    """Test that assistant with incomplete tool results is NOT removed during compaction.
+    
+    Regression test for bug where compaction would remove assistant messages
+    even when not all tool_results had been added yet, causing orphaned
+    tool_results and API errors.
+    """
+    context = SimpleContextManager(
+        max_tokens=1000,
+        compact_threshold=0.01,  # Force compaction
+        target_usage=0.5,
+    )
+    
+    # Scenario: Assistant makes 2 tool calls, but only 1 result added so far
+    await context.add_message({
+        "role": "assistant",
+        "content": "Let me check both",
+        "tool_calls": [
+            {"id": "call_A", "type": "function", "function": {"name": "tool_a", "arguments": "{}"}},
+            {"id": "call_B", "type": "function", "function": {"name": "tool_b", "arguments": "{}"}},
+        ]
+    })
+    
+    # Only result A added
+    await context.add_message({
+        "role": "tool",
+        "tool_call_id": "call_A",
+        "content": "Result from tool A"
+    })
+    
+    # User interrupts (like typing "continue")
+    await context.add_message({
+        "role": "user",
+        "content": "continue"
+    })
+    
+    # Result B hasn't been added yet!
+    
+    # Force compaction
+    compacted = await context._compact_ephemeral(budget=1000, source_messages=context.messages)
+    
+    # CRITICAL: Assistant message should still be present
+    # because call_B is missing its result
+    assistant_messages = [m for m in compacted if m.get("role") == "assistant" and m.get("tool_calls")]
+    
+    assert len(assistant_messages) == 1, "Assistant with incomplete tool results should NOT be removed"
+    
+    # Verify the assistant message has both tool_calls
+    assert len(assistant_messages[0]["tool_calls"]) == 2
+
+
+@pytest.mark.asyncio  
+async def test_complete_tool_results_can_be_removed():
+    """Test that assistant with ALL tool results CAN be removed during compaction."""
+    context = SimpleContextManager(
+        max_tokens=1000,
+        compact_threshold=0.01,  # Force compaction
+        target_usage=0.5,
+    )
+    
+    # Scenario: Assistant makes 2 tool calls, BOTH results added
+    await context.add_message({
+        "role": "assistant",
+        "content": "Let me check both",
+        "tool_calls": [
+            {"id": "call_A", "type": "function", "function": {"name": "tool_a", "arguments": "{}"}},
+            {"id": "call_B", "type": "function", "function": {"name": "tool_b", "arguments": "{}"}},
+        ]
+    })
+    
+    # Both results added
+    await context.add_message({
+        "role": "tool",
+        "tool_call_id": "call_A",
+        "content": "Result A"
+    })
+    await context.add_message({
+        "role": "tool",
+        "tool_call_id": "call_B",
+        "content": "Result B"
+    })
+    
+    # Add more messages to trigger compaction
+    for i in range(20):
+        await context.add_message({"role": "user", "content": f"Message {i}" * 100})
+        await context.add_message({"role": "assistant", "content": f"Response {i}" * 100})
+    
+    # Force compaction - should be able to remove the old tool pair
+    compacted = await context._compact_ephemeral(budget=1000, source_messages=context.messages)
+    
+    # The old assistant with tool_calls might be removed (that's OK now)
+    # We just verify no orphaned tool_results exist
+    tool_use_ids = set()
+    for msg in compacted:
+        if msg.get("tool_calls"):
+            for tc in msg["tool_calls"]:
+                tc_id = tc.get("id") or tc.get("tool_call_id")
+                if tc_id:
+                    tool_use_ids.add(tc_id)
+    
+    # Check all tool results have matching tool_uses
+    for msg in compacted:
+        if msg.get("role") == "tool":
+            tc_id = msg.get("tool_call_id")
+            assert tc_id in tool_use_ids, f"Orphaned tool_result with tool_call_id: {tc_id}"