diff --git a/context_scribe/evaluator/__init__.py b/context_scribe/evaluator/__init__.py index 7685c4c..2bbd7f8 100644 --- a/context_scribe/evaluator/__init__.py +++ b/context_scribe/evaluator/__init__.py @@ -12,6 +12,13 @@ "copilot": CopilotEvaluator, } +# Register AnthropicEvaluator only if the SDK is available +try: + from .anthropic_llm import AnthropicEvaluator + EVALUATOR_REGISTRY["anthropic"] = AnthropicEvaluator +except ImportError: + pass + def get_evaluator(name: str) -> BaseEvaluator: """Return an evaluator instance by name. Raises ValueError for unknown names.""" diff --git a/context_scribe/evaluator/anthropic_llm.py b/context_scribe/evaluator/anthropic_llm.py new file mode 100644 index 0000000..41f8e35 --- /dev/null +++ b/context_scribe/evaluator/anthropic_llm.py @@ -0,0 +1,40 @@ +import logging +import os + +import anthropic + +from context_scribe.evaluator.base_evaluator import BaseEvaluator + +logger = logging.getLogger(__name__) + + +class AnthropicEvaluator(BaseEvaluator): + """Evaluator that uses the Anthropic SDK directly for rule extraction. + + Requires the ANTHROPIC_API_KEY environment variable to be set. + Uses claude-haiku by default for cost efficiency. + """ + + def __init__(self, model: str = "claude-haiku-4-5-20251001"): + super().__init__() + + api_key = os.environ.get("ANTHROPIC_API_KEY") + if not api_key: + raise ValueError( + "ANTHROPIC_API_KEY environment variable is required for AnthropicEvaluator." + ) + + self._client = anthropic.Anthropic(api_key=api_key, timeout=120.0) + self._model = model + + def _execute_cli(self, prompt: str) -> str: + """Call the Anthropic API instead of a CLI subprocess.""" + message = self._client.messages.create( + model=self._model, + max_tokens=4096, + messages=[{"role": "user", "content": prompt}], + ) + # Extract text from the response content blocks + return "".join( + block.text for block in message.content if block.type == "text" + ) diff --git a/context_scribe/main.py b/context_scribe/main.py index 801c829..85560bb 100644 --- a/context_scribe/main.py +++ b/context_scribe/main.py @@ -190,10 +190,14 @@ def _detect_evaluator(preferred_tool: Optional[str] = None) -> str: if shutil.which(cli_cmd): return cli_to_evaluator[cli_cmd] - # 3. Fail fast if no evaluator is found + # 3. Fall back to Anthropic SDK if API key is set + if os.environ.get("ANTHROPIC_API_KEY") and "anthropic" in EVALUATOR_REGISTRY: + return "anthropic" + + # 4. Fail fast if no evaluator is found raise click.ClickException( - "No supported evaluator CLI found (claude, copilot, or gemini). " - "Please install one of these tools to use context-scribe." + "No supported evaluator found. Install a CLI tool (claude, copilot, or gemini) " + "or set ANTHROPIC_API_KEY to use the Anthropic SDK evaluator." ) diff --git a/pyproject.toml b/pyproject.toml index 6320661..260b91f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,9 @@ dependencies = [ ] [project.optional-dependencies] +anthropic = [ + "anthropic>=0.40.0" +] test = [ "pytest", "pytest-asyncio", diff --git a/tests/test_anthropic_evaluator.py b/tests/test_anthropic_evaluator.py new file mode 100644 index 0000000..780b335 --- /dev/null +++ b/tests/test_anthropic_evaluator.py @@ -0,0 +1,88 @@ +import json +import sys +from unittest.mock import MagicMock, patch + +import pytest + +from context_scribe.models.interaction import Interaction +from datetime import datetime + + +def _make_interaction(content="I prefer tabs over spaces"): + return Interaction( + timestamp=datetime.now(), + role="user", + content=content, + project_name="test-project", + metadata={}, + ) + + +@pytest.fixture +def mock_anthropic(): + """Mock the anthropic SDK at sys.modules level, then import AnthropicEvaluator.""" + mock_module = MagicMock() + mock_client = MagicMock() + mock_module.Anthropic.return_value = mock_client + + with patch.dict(sys.modules, {"anthropic": mock_module}): + with patch.dict("os.environ", {"ANTHROPIC_API_KEY": "test-key"}): + # Force re-import so the module picks up our mock + if "context_scribe.evaluator.anthropic_llm" in sys.modules: + del sys.modules["context_scribe.evaluator.anthropic_llm"] + from context_scribe.evaluator.anthropic_llm import AnthropicEvaluator + evaluator = AnthropicEvaluator() + yield evaluator, mock_client + + +def test_anthropic_evaluator_extracts_rule(mock_anthropic): + evaluator, mock_client = mock_anthropic + rule_json = json.dumps({ + "scope": "GLOBAL", + "description": "Indentation preference", + "rules": ["- Use tabs for indentation"], + }) + text_block = MagicMock() + text_block.type = "text" + text_block.text = rule_json + mock_client.messages.create.return_value = MagicMock(content=[text_block]) + + result = evaluator.evaluate_interaction(_make_interaction(), "", "") + assert result is not None + assert result.scope == "GLOBAL" + assert "tabs" in result.content + + +def test_anthropic_evaluator_returns_none_for_no_rule(mock_anthropic): + evaluator, mock_client = mock_anthropic + text_block = MagicMock() + text_block.type = "text" + text_block.text = "NO_RULE" + mock_client.messages.create.return_value = MagicMock(content=[text_block]) + + result = evaluator.evaluate_interaction(_make_interaction("hello"), "", "") + assert result is None + + +def test_anthropic_evaluator_passes_correct_model(mock_anthropic): + evaluator, mock_client = mock_anthropic + text_block = MagicMock() + text_block.type = "text" + text_block.text = "NO_RULE" + mock_client.messages.create.return_value = MagicMock(content=[text_block]) + + evaluator.evaluate_interaction(_make_interaction(), "", "") + call_kwargs = mock_client.messages.create.call_args[1] + assert call_kwargs["model"] == "claude-haiku-4-5-20251001" + assert call_kwargs["max_tokens"] == 4096 + + +def test_anthropic_evaluator_missing_api_key(): + mock_module = MagicMock() + with patch.dict(sys.modules, {"anthropic": mock_module}): + with patch.dict("os.environ", {}, clear=True): + if "context_scribe.evaluator.anthropic_llm" in sys.modules: + del sys.modules["context_scribe.evaluator.anthropic_llm"] + from context_scribe.evaluator.anthropic_llm import AnthropicEvaluator + with pytest.raises(ValueError, match="ANTHROPIC_API_KEY"): + AnthropicEvaluator() diff --git a/tests/test_daemons.py b/tests/test_daemons.py index c42de2e..06b0542 100644 --- a/tests/test_daemons.py +++ b/tests/test_daemons.py @@ -4,16 +4,16 @@ from context_scribe.main import run_daemon @pytest.mark.asyncio -@pytest.mark.parametrize("tool, provider_class, evaluator_class, bootstrap_func, evaluator_name", [ - ("gemini-cli", "GeminiCliProvider", "GeminiCliEvaluator", "bootstrap_global_config", "gemini"), - ("copilot", "CopilotProvider", "CopilotEvaluator", "bootstrap_copilot_config", "copilot"), - ("claude", "ClaudeProvider", "ClaudeEvaluator", "bootstrap_claude_config", "claude"), +@pytest.mark.parametrize("tool, provider_class, bootstrap_func, evaluator_name", [ + ("gemini-cli", "GeminiCliProvider", "bootstrap_global_config", "gemini"), + ("copilot", "CopilotProvider", "bootstrap_copilot_config", "copilot"), + ("claude", "ClaudeProvider", "bootstrap_claude_config", "claude"), ]) -async def test_run_daemon_tools(tool, provider_class, evaluator_class, bootstrap_func, evaluator_name, daemon_mocks): +async def test_run_daemon_tools(tool, provider_class, bootstrap_func, evaluator_name, daemon_mocks): """Test the daemon run loop for all supported tools.""" with patch(f"context_scribe.main.{provider_class}", return_value=daemon_mocks.provider): - with patch(f"context_scribe.main.{evaluator_class}", return_value=daemon_mocks.evaluator): + with patch("context_scribe.main.get_evaluator", return_value=daemon_mocks.evaluator): with patch("context_scribe.main.MemoryBankClient", return_value=daemon_mocks.mcp): with patch(f"context_scribe.main.{bootstrap_func}"): # Mock Live to avoid rich rendering logic completely diff --git a/tests/test_main.py b/tests/test_main.py index 9a85465..ee6abf6 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -28,9 +28,19 @@ def test_detect_evaluator_fails_if_none_found(): """Test that it raises ClickException if no tools are found.""" with patch("shutil.which") as mock_which: mock_which.return_value = None - with pytest.raises(click.ClickException) as excinfo: - _detect_evaluator() - assert "No supported evaluator CLI found" in str(excinfo.value) + with patch.dict("os.environ", {}, clear=True): + with pytest.raises(click.ClickException) as excinfo: + _detect_evaluator() + assert "No supported evaluator found" in str(excinfo.value) + +def test_detect_evaluator_falls_back_to_anthropic_sdk(): + """Test that _detect_evaluator returns 'anthropic' when no CLI tools found but API key is set.""" + with patch("shutil.which") as mock_which: + mock_which.return_value = None + with patch.dict("os.environ", {"ANTHROPIC_API_KEY": "test-key"}): + with patch.dict("context_scribe.main.EVALUATOR_REGISTRY", {"anthropic": object}): + result = _detect_evaluator() + assert result == "anthropic" def test_bootstrap_global_config_creates_file(tmp_path): # Mock home directory to our temp path