Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,31 +22,104 @@
* system → messages.
*
* @author Mark Pollack
* @author Soby Chacko
* @since 1.1.0
*/
public enum AnthropicCacheStrategy {

/**
* No caching (default behavior).
* No caching (default behavior). All content is processed fresh on each request.
* <p>
* Use this when:
* <ul>
* <li>Requests are one-off or highly variable</li>
* <li>Content doesn't meet minimum token requirements (1024+ tokens)</li>
* <li>You want to avoid caching overhead</li>
* </ul>
*/
NONE,

/**
* Cache tool definitions only. Places a cache breakpoint on the last tool, while
* system messages and conversation history remain uncached and are processed fresh on
* each request.
* <p>
* Use this when:
* <ul>
* <li>Tool definitions are large and stable (5000+ tokens)</li>
* <li>System prompts change frequently or are small (&lt;500 tokens)</li>
* <li>You want to share cached tools across different system contexts (e.g.,
* multi-tenant applications, A/B testing system prompts)</li>
* <li>Tool definitions rarely change</li>
* </ul>
* <p>
* <strong>Important:</strong> Changing any tool definition will invalidate this cache
* entry. Due to Anthropic's cascade invalidation, tool changes will also invalidate
* any downstream cache breakpoints (system, messages) if used in combination with
* other strategies.
*/
TOOLS_ONLY,

/**
* Cache system instructions only. Places a cache breakpoint on the system message
* content.
* content. Tools are cached implicitly via Anthropic's automatic ~20-block lookback
* mechanism (content before the cache breakpoint is included in the cache).
* <p>
* Use this when:
* <ul>
* <li>System prompts are large and stable (1024+ tokens)</li>
* <li>Tool definitions are relatively small (&lt;20 tools)</li>
* <li>You want simple, single-breakpoint caching</li>
* </ul>
* <p>
* <strong>Note:</strong> Changing tools will invalidate the cache since tools are
* part of the cache prefix (they appear before system in the request hierarchy).
*/
SYSTEM_ONLY,

/**
* Cache system instructions and tool definitions. Places cache breakpoints on the
* last tool and system message content.
* last tool (breakpoint 1) and system message content (breakpoint 2).
* <p>
* Use this when:
* <ul>
* <li>Both tools and system prompts are large and stable</li>
* <li>You have many tools (20+ tools, beyond the automatic lookback window)</li>
* <li>You want deterministic, explicit caching of both components</li>
* <li>System prompts may change independently of tools</li>
* </ul>
* <p>
* <strong>Behavior:</strong>
* <ul>
* <li>If only tools change: Both caches invalidated (tools + system)</li>
* <li>If only system changes: Tools cache remains valid, system cache
* invalidated</li>
* </ul>
* This allows efficient reuse of tool cache when only system prompts are updated.
*/
SYSTEM_AND_TOOLS,

/**
* Cache the entire conversation history up to (but not including) the current user
* question. This is ideal for multi-turn conversations where you want to reuse the
* conversation context while asking new questions.
* question. Places a cache breakpoint on the last user message in the conversation
* history, enabling incremental caching as the conversation grows.
* <p>
* Use this when:
* <ul>
* <li>Building multi-turn conversational applications (chatbots, assistants)</li>
* <li>Conversation history is large and grows over time</li>
* <li>You want to reuse conversation context while asking new questions</li>
* <li>Using chat memory advisors or conversation persistence</li>
* </ul>
* <p>
* <strong>Behavior:</strong> Each turn builds on the previous cached prefix. The
* cache grows incrementally: Request 1 caches [Message1], Request 2 caches [Message1
* + Message2], etc. This provides significant cost savings (90%+) and performance
* improvements for long conversations.
* <p>
* <strong>Important:</strong> Changing tools or system prompts will invalidate the
* entire conversation cache due to cascade invalidation. Tool and system stability is
* critical for this strategy.
*/
CONVERSATION_HISTORY

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
* definition messages.
*
* @author Austin Dase
* @author Soby Chacko
* @since 1.1.0
**/
public class CacheEligibilityResolver {
Expand Down Expand Up @@ -84,6 +85,7 @@ private static Set<MessageType> extractEligibleMessageTypes(AnthropicCacheStrate
return switch (anthropicCacheStrategy) {
case NONE -> Set.of();
case SYSTEM_ONLY, SYSTEM_AND_TOOLS -> Set.of(MessageType.SYSTEM);
case TOOLS_ONLY -> Set.of(); // No message types cached, only tool definitions
case CONVERSATION_HISTORY -> Set.of(MessageType.values());
};
}
Expand All @@ -108,11 +110,17 @@ public AnthropicApi.ChatCompletionRequest.CacheControl resolve(MessageType messa
}

public AnthropicApi.ChatCompletionRequest.CacheControl resolveToolCacheControl() {
// Tool definitions are only cache-eligible when caching is enabled and
// the strategy includes SYSTEM messages (SYSTEM_ONLY, SYSTEM_AND_TOOLS, or
// CONVERSATION_HISTORY). When NONE, tools must not be cached.
if (!isCachingEnabled() || !this.cacheEligibleMessageTypes.contains(TOOL_DEFINITION_MESSAGE_TYPE)
|| this.cacheBreakpointTracker.allBreakpointsAreUsed()) {
// Tool definitions are cache-eligible for TOOLS_ONLY, SYSTEM_AND_TOOLS, and
// CONVERSATION_HISTORY strategies. SYSTEM_ONLY caches only system messages,
// relying on Anthropic's cache hierarchy to implicitly cache tools.
if (this.cacheStrategy != AnthropicCacheStrategy.TOOLS_ONLY
&& this.cacheStrategy != AnthropicCacheStrategy.SYSTEM_AND_TOOLS
&& this.cacheStrategy != AnthropicCacheStrategy.CONVERSATION_HISTORY) {
logger.debug("Caching not enabled for tool definition, cacheStrategy={}", this.cacheStrategy);
return null;
}

if (this.cacheBreakpointTracker.allBreakpointsAreUsed()) {
logger.debug("Caching not enabled for tool definition, usedBreakpoints={}",
this.cacheBreakpointTracker.getCount());
return null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,17 @@ void testSystemOnlyCacheStrategy() throws Exception {
this.mockWebServer
.enqueue(new MockResponse().setBody(mockResponse).setHeader("Content-Type", "application/json"));

// Create tool callback to test that tools are NOT cached with SYSTEM_ONLY
var toolMethod = ReflectionUtils.findMethod(TestTools.class, "getWeather", String.class);
MethodToolCallback toolCallback = MethodToolCallback.builder()
.toolDefinition(ToolDefinitions.builder(toolMethod).description("Get weather for a location").build())
.toolMethod(toolMethod)
.build();

// Test with SYSTEM_ONLY cache strategy
AnthropicChatOptions options = AnthropicChatOptions.builder()
.cacheOptions(AnthropicCacheOptions.builder().strategy(AnthropicCacheStrategy.SYSTEM_ONLY).build())
.toolCallbacks(List.of(toolCallback))
.build();

Prompt prompt = new Prompt(
Expand All @@ -130,6 +138,18 @@ void testSystemOnlyCacheStrategy() throws Exception {
assertThat(lastSystemBlock.get("cache_control").get("type").asText()).isEqualTo("ephemeral");
}

// Verify tools exist but DO NOT have cache_control (key difference from
// SYSTEM_AND_TOOLS)
if (requestBody.has("tools")) {
JsonNode toolsArray = requestBody.get("tools");
assertThat(toolsArray.isArray()).isTrue();
// Verify NO tool has cache_control
for (int i = 0; i < toolsArray.size(); i++) {
JsonNode tool = toolsArray.get(i);
assertThat(tool.has("cache_control")).isFalse();
}
}

// Verify response
assertThat(response).isNotNull();
assertThat(response.getResult().getOutput().getText()).contains("Hello!");
Expand Down
Loading