Skip to content

Commit d6b03de

Browse files
Bot777claude
andcommitted
feat: add 7 Claude Code-inspired compaction features
- LLM Summarizer: API-based conversation summarization with deterministic fallback - Plan/Task Re-injection: track and re-inject active plans/tasks after compaction - Skill Schema Re-injection: track and re-inject recently used tool schemas - Plugin Hooks: PreCompact/PostCompact/PreSummarize/PostSummarize/PreBudget/PostBudget - Content Stripper: strip base64 images, markdown/HTML images, document blocks - CLAW_AUTOCOMPACT_PCT_OVERRIDE: env var to override compaction trigger threshold - Cache Prefix Manager: prompt cache prefix computation and reuse with annotations Integration: all features wired into compact() with backward-compatible API. Tests: 86 new tests (unit + integration + edge cases), 1783 total passed. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 8d26315 commit d6b03de

File tree

9 files changed

+2816
-17
lines changed

9 files changed

+2816
-17
lines changed

scripts/lib/fusion/__init__.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,3 +56,21 @@
5656
"compact",
5757
"determine_level",
5858
]
59+
60+
# v8.1: Additional Claude Code-inspired features
61+
from claw_compactor.fusion.llm_summarizer import LLMSummarizer
62+
from claw_compactor.fusion.plan_reinjection import PlanTaskTracker
63+
from claw_compactor.fusion.skill_reinjection import SkillSchemaTracker
64+
from claw_compactor.fusion.compact_hooks import HookRegistry, HookPhase
65+
from claw_compactor.fusion.content_stripper import strip_images_and_docs
66+
from claw_compactor.fusion.cache_prefix import CachePrefixManager
67+
68+
__all__ += [
69+
"LLMSummarizer",
70+
"PlanTaskTracker",
71+
"SkillSchemaTracker",
72+
"HookRegistry",
73+
"HookPhase",
74+
"strip_images_and_docs",
75+
"CachePrefixManager",
76+
]

scripts/lib/fusion/cache_prefix.py

Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
"""CachePrefix — prompt cache prefix management for compaction.
2+
3+
Inspired by Claude Code's cache prefix reuse: the compaction loop and the
4+
main conversation loop share a common prompt cache prefix. This avoids
5+
re-processing the system prompt and early conversation turns that remain
6+
unchanged after compaction.
7+
8+
The cache prefix is the longest common prefix of system messages and
9+
early conversation turns that hasn't changed between compaction rounds.
10+
11+
Usage::
12+
13+
from claw_compactor.fusion.cache_prefix import CachePrefixManager
14+
15+
manager = CachePrefixManager()
16+
prefix_info = manager.compute_prefix(messages)
17+
# Use prefix_info['prefix_hash'] and prefix_info['prefix_tokens']
18+
# to enable API-level prompt caching.
19+
20+
Part of claw-compactor v8. License: MIT.
21+
"""
22+
from __future__ import annotations
23+
24+
import hashlib
25+
import json
26+
from typing import Any, Optional
27+
28+
from claw_compactor.tokens import estimate_tokens
29+
30+
31+
# Maximum prefix length in tokens.
32+
MAX_PREFIX_TOKENS = 50_000
33+
34+
35+
class CachePrefixManager:
36+
"""Manages prompt cache prefix computation and reuse.
37+
38+
Tracks the stable prefix of a conversation — system messages and early
39+
turns that don't change between compaction rounds — so that API-level
40+
prompt caching can skip re-processing them.
41+
"""
42+
43+
def __init__(self) -> None:
44+
self._last_prefix_hash: Optional[str] = None
45+
self._last_prefix_length: int = 0
46+
self._cache_hits: int = 0
47+
self._cache_misses: int = 0
48+
49+
def compute_prefix(
50+
self,
51+
messages: list[dict[str, Any]],
52+
max_tokens: int = MAX_PREFIX_TOKENS,
53+
) -> dict[str, Any]:
54+
"""Compute the cacheable prefix of a message list.
55+
56+
The prefix includes all leading system messages plus any messages
57+
that appear before the first user message (or the first N messages
58+
that fit within max_tokens).
59+
60+
Parameters
61+
----------
62+
messages:
63+
OpenAI-format message list.
64+
max_tokens:
65+
Maximum tokens for the prefix.
66+
67+
Returns
68+
-------
69+
dict with:
70+
prefix_messages — the messages that form the prefix
71+
prefix_tokens — total tokens in the prefix
72+
prefix_hash — hash of the prefix content (for cache key)
73+
prefix_length — number of messages in the prefix
74+
cache_hit — whether the prefix matches the last computation
75+
"""
76+
prefix_messages: list[dict[str, Any]] = []
77+
total_tokens = 0
78+
79+
for msg in messages:
80+
role = msg.get("role", "")
81+
content = msg.get("content", "")
82+
83+
# Include system messages and compact_boundary messages.
84+
if role == "system":
85+
msg_tokens = estimate_tokens(
86+
content if isinstance(content, str) else str(content)
87+
)
88+
if total_tokens + msg_tokens > max_tokens:
89+
break
90+
prefix_messages.append(msg)
91+
total_tokens += msg_tokens
92+
else:
93+
# Stop at first non-system message (user/assistant/tool).
94+
break
95+
96+
# Compute hash for cache key.
97+
prefix_content = json.dumps(
98+
[
99+
{"role": m.get("role"), "content": m.get("content")}
100+
for m in prefix_messages
101+
],
102+
sort_keys=True,
103+
ensure_ascii=False,
104+
)
105+
prefix_hash = hashlib.sha256(prefix_content.encode("utf-8")).hexdigest()[:16]
106+
107+
# Check cache hit.
108+
cache_hit = prefix_hash == self._last_prefix_hash
109+
if cache_hit:
110+
self._cache_hits += 1
111+
else:
112+
self._cache_misses += 1
113+
114+
self._last_prefix_hash = prefix_hash
115+
self._last_prefix_length = len(prefix_messages)
116+
117+
return {
118+
"prefix_messages": prefix_messages,
119+
"prefix_tokens": total_tokens,
120+
"prefix_hash": prefix_hash,
121+
"prefix_length": len(prefix_messages),
122+
"cache_hit": cache_hit,
123+
}
124+
125+
def annotate_messages_for_caching(
126+
self,
127+
messages: list[dict[str, Any]],
128+
max_tokens: int = MAX_PREFIX_TOKENS,
129+
) -> list[dict[str, Any]]:
130+
"""Annotate messages with cache_control markers for API caching.
131+
132+
Adds ``cache_control: {"type": "ephemeral"}`` to the last message
133+
in the stable prefix, following the Anthropic prompt caching format.
134+
135+
Parameters
136+
----------
137+
messages:
138+
OpenAI-format message list.
139+
max_tokens:
140+
Maximum tokens for the prefix.
141+
142+
Returns
143+
-------
144+
New message list with cache_control annotations.
145+
"""
146+
prefix_info = self.compute_prefix(messages, max_tokens)
147+
prefix_length = prefix_info["prefix_length"]
148+
149+
if prefix_length == 0:
150+
return list(messages)
151+
152+
result = list(messages)
153+
# Mark the last prefix message with cache_control.
154+
last_prefix_idx = prefix_length - 1
155+
msg = dict(result[last_prefix_idx])
156+
157+
# Handle multipart content.
158+
content = msg.get("content")
159+
if isinstance(content, str):
160+
msg["content"] = [
161+
{
162+
"type": "text",
163+
"text": content,
164+
"cache_control": {"type": "ephemeral"},
165+
}
166+
]
167+
elif isinstance(content, list):
168+
# Add cache_control to the last text block.
169+
new_content = list(content)
170+
for i in range(len(new_content) - 1, -1, -1):
171+
if isinstance(new_content[i], dict) and new_content[i].get("type") == "text":
172+
new_content[i] = {
173+
**new_content[i],
174+
"cache_control": {"type": "ephemeral"},
175+
}
176+
break
177+
msg["content"] = new_content
178+
179+
result[last_prefix_idx] = msg
180+
return result
181+
182+
@property
183+
def stats(self) -> dict[str, Any]:
184+
"""Return cache statistics."""
185+
total = self._cache_hits + self._cache_misses
186+
return {
187+
"cache_hits": self._cache_hits,
188+
"cache_misses": self._cache_misses,
189+
"hit_rate": round(self._cache_hits / total, 3) if total > 0 else 0.0,
190+
"last_prefix_hash": self._last_prefix_hash,
191+
"last_prefix_length": self._last_prefix_length,
192+
}
193+
194+
def reset(self) -> None:
195+
"""Reset cache state."""
196+
self._last_prefix_hash = None
197+
self._last_prefix_length = 0
198+
self._cache_hits = 0
199+
self._cache_misses = 0

0 commit comments

Comments
 (0)