Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions context_scribe/evaluator/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@
"copilot": CopilotEvaluator,
}

# Register AnthropicEvaluator only if the SDK is available
try:
from .anthropic_llm import AnthropicEvaluator
EVALUATOR_REGISTRY["anthropic"] = AnthropicEvaluator
except ImportError:
pass


def get_evaluator(name: str) -> BaseEvaluator:
"""Return an evaluator instance by name. Raises ValueError for unknown names."""
Expand Down
40 changes: 40 additions & 0 deletions context_scribe/evaluator/anthropic_llm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import logging
import os

import anthropic

from context_scribe.evaluator.base_evaluator import BaseEvaluator

logger = logging.getLogger(__name__)


class AnthropicEvaluator(BaseEvaluator):
"""Evaluator that uses the Anthropic SDK directly for rule extraction.

Requires the ANTHROPIC_API_KEY environment variable to be set.
Uses claude-haiku by default for cost efficiency.
"""

def __init__(self, model: str = "claude-haiku-4-5-20251001"):
super().__init__()

api_key = os.environ.get("ANTHROPIC_API_KEY")
if not api_key:
raise ValueError(
"ANTHROPIC_API_KEY environment variable is required for AnthropicEvaluator."
)

self._client = anthropic.Anthropic(api_key=api_key, timeout=120.0)
self._model = model

def _execute_cli(self, prompt: str) -> str:
"""Call the Anthropic API instead of a CLI subprocess."""
message = self._client.messages.create(
model=self._model,
max_tokens=4096,
messages=[{"role": "user", "content": prompt}],
)
# Extract text from the response content blocks
return "".join(
block.text for block in message.content if block.type == "text"
)
10 changes: 7 additions & 3 deletions context_scribe/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,10 +190,14 @@ def _detect_evaluator(preferred_tool: Optional[str] = None) -> str:
if shutil.which(cli_cmd):
return cli_to_evaluator[cli_cmd]

# 3. Fail fast if no evaluator is found
# 3. Fall back to Anthropic SDK if API key is set
if os.environ.get("ANTHROPIC_API_KEY") and "anthropic" in EVALUATOR_REGISTRY:
return "anthropic"

# 4. Fail fast if no evaluator is found
raise click.ClickException(
"No supported evaluator CLI found (claude, copilot, or gemini). "
"Please install one of these tools to use context-scribe."
"No supported evaluator found. Install a CLI tool (claude, copilot, or gemini) "
"or set ANTHROPIC_API_KEY to use the Anthropic SDK evaluator."
)


Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ dependencies = [
]

[project.optional-dependencies]
anthropic = [
"anthropic>=0.40.0"
]
test = [
"pytest",
"pytest-asyncio",
Expand Down
88 changes: 88 additions & 0 deletions tests/test_anthropic_evaluator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import json
import sys
from unittest.mock import MagicMock, patch

import pytest

from context_scribe.models.interaction import Interaction
from datetime import datetime


def _make_interaction(content="I prefer tabs over spaces"):
return Interaction(
timestamp=datetime.now(),
role="user",
content=content,
project_name="test-project",
metadata={},
)


@pytest.fixture
def mock_anthropic():
"""Mock the anthropic SDK at sys.modules level, then import AnthropicEvaluator."""
mock_module = MagicMock()
mock_client = MagicMock()
mock_module.Anthropic.return_value = mock_client

with patch.dict(sys.modules, {"anthropic": mock_module}):
with patch.dict("os.environ", {"ANTHROPIC_API_KEY": "test-key"}):
# Force re-import so the module picks up our mock
if "context_scribe.evaluator.anthropic_llm" in sys.modules:
del sys.modules["context_scribe.evaluator.anthropic_llm"]
from context_scribe.evaluator.anthropic_llm import AnthropicEvaluator
evaluator = AnthropicEvaluator()
yield evaluator, mock_client


def test_anthropic_evaluator_extracts_rule(mock_anthropic):
evaluator, mock_client = mock_anthropic
rule_json = json.dumps({
"scope": "GLOBAL",
"description": "Indentation preference",
"rules": ["- Use tabs for indentation"],
})
text_block = MagicMock()
text_block.type = "text"
text_block.text = rule_json
mock_client.messages.create.return_value = MagicMock(content=[text_block])

result = evaluator.evaluate_interaction(_make_interaction(), "", "")
assert result is not None
assert result.scope == "GLOBAL"
assert "tabs" in result.content


def test_anthropic_evaluator_returns_none_for_no_rule(mock_anthropic):
evaluator, mock_client = mock_anthropic
text_block = MagicMock()
text_block.type = "text"
text_block.text = "NO_RULE"
mock_client.messages.create.return_value = MagicMock(content=[text_block])

result = evaluator.evaluate_interaction(_make_interaction("hello"), "", "")
assert result is None


def test_anthropic_evaluator_passes_correct_model(mock_anthropic):
evaluator, mock_client = mock_anthropic
text_block = MagicMock()
text_block.type = "text"
text_block.text = "NO_RULE"
mock_client.messages.create.return_value = MagicMock(content=[text_block])

evaluator.evaluate_interaction(_make_interaction(), "", "")
call_kwargs = mock_client.messages.create.call_args[1]
assert call_kwargs["model"] == "claude-haiku-4-5-20251001"
assert call_kwargs["max_tokens"] == 4096


def test_anthropic_evaluator_missing_api_key():
mock_module = MagicMock()
with patch.dict(sys.modules, {"anthropic": mock_module}):
with patch.dict("os.environ", {}, clear=True):
if "context_scribe.evaluator.anthropic_llm" in sys.modules:
del sys.modules["context_scribe.evaluator.anthropic_llm"]
from context_scribe.evaluator.anthropic_llm import AnthropicEvaluator
with pytest.raises(ValueError, match="ANTHROPIC_API_KEY"):
AnthropicEvaluator()
12 changes: 6 additions & 6 deletions tests/test_daemons.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,16 @@
from context_scribe.main import run_daemon

@pytest.mark.asyncio
@pytest.mark.parametrize("tool, provider_class, evaluator_class, bootstrap_func, evaluator_name", [
("gemini-cli", "GeminiCliProvider", "GeminiCliEvaluator", "bootstrap_global_config", "gemini"),
("copilot", "CopilotProvider", "CopilotEvaluator", "bootstrap_copilot_config", "copilot"),
("claude", "ClaudeProvider", "ClaudeEvaluator", "bootstrap_claude_config", "claude"),
@pytest.mark.parametrize("tool, provider_class, bootstrap_func, evaluator_name", [
("gemini-cli", "GeminiCliProvider", "bootstrap_global_config", "gemini"),
("copilot", "CopilotProvider", "bootstrap_copilot_config", "copilot"),
("claude", "ClaudeProvider", "bootstrap_claude_config", "claude"),
])
async def test_run_daemon_tools(tool, provider_class, evaluator_class, bootstrap_func, evaluator_name, daemon_mocks):
async def test_run_daemon_tools(tool, provider_class, bootstrap_func, evaluator_name, daemon_mocks):
"""Test the daemon run loop for all supported tools."""

with patch(f"context_scribe.main.{provider_class}", return_value=daemon_mocks.provider):
with patch(f"context_scribe.main.{evaluator_class}", return_value=daemon_mocks.evaluator):
with patch("context_scribe.main.get_evaluator", return_value=daemon_mocks.evaluator):
with patch("context_scribe.main.MemoryBankClient", return_value=daemon_mocks.mcp):
with patch(f"context_scribe.main.{bootstrap_func}"):
# Mock Live to avoid rich rendering logic completely
Expand Down
16 changes: 13 additions & 3 deletions tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,19 @@ def test_detect_evaluator_fails_if_none_found():
"""Test that it raises ClickException if no tools are found."""
with patch("shutil.which") as mock_which:
mock_which.return_value = None
with pytest.raises(click.ClickException) as excinfo:
_detect_evaluator()
assert "No supported evaluator CLI found" in str(excinfo.value)
with patch.dict("os.environ", {}, clear=True):
with pytest.raises(click.ClickException) as excinfo:
_detect_evaluator()
assert "No supported evaluator found" in str(excinfo.value)

def test_detect_evaluator_falls_back_to_anthropic_sdk():
"""Test that _detect_evaluator returns 'anthropic' when no CLI tools found but API key is set."""
with patch("shutil.which") as mock_which:
mock_which.return_value = None
with patch.dict("os.environ", {"ANTHROPIC_API_KEY": "test-key"}):
with patch.dict("context_scribe.main.EVALUATOR_REGISTRY", {"anthropic": object}):
result = _detect_evaluator()
assert result == "anthropic"

def test_bootstrap_global_config_creates_file(tmp_path):
# Mock home directory to our temp path
Expand Down