Joaolfelicio · don-petry · Apr 4, 2026 · Apr 4, 2026 · Apr 5, 2026 · Apr 5, 2026
diff --git a/context_scribe/evaluator/__init__.py b/context_scribe/evaluator/__init__.py
@@ -12,6 +12,13 @@
     "copilot": CopilotEvaluator,
 }
 
+# Register AnthropicEvaluator only if the SDK is available
+try:
+    from .anthropic_llm import AnthropicEvaluator
+    EVALUATOR_REGISTRY["anthropic"] = AnthropicEvaluator
+except ImportError:
+    pass
+
 
 def get_evaluator(name: str) -> BaseEvaluator:
     """Return an evaluator instance by name. Raises ValueError for unknown names."""

diff --git a/context_scribe/evaluator/anthropic_llm.py b/context_scribe/evaluator/anthropic_llm.py
@@ -0,0 +1,40 @@
+import logging
+import os
+
+import anthropic
+
+from context_scribe.evaluator.base_evaluator import BaseEvaluator
+
+logger = logging.getLogger(__name__)
+
+
+class AnthropicEvaluator(BaseEvaluator):
+    """Evaluator that uses the Anthropic SDK directly for rule extraction.
+
+    Requires the ANTHROPIC_API_KEY environment variable to be set.
+    Uses claude-haiku by default for cost efficiency.
+    """
+
+    def __init__(self, model: str = "claude-haiku-4-5-20251001"):
+        super().__init__()
+
+        api_key = os.environ.get("ANTHROPIC_API_KEY")
+        if not api_key:
+            raise ValueError(
+                "ANTHROPIC_API_KEY environment variable is required for AnthropicEvaluator."
+            )
+
+        self._client = anthropic.Anthropic(api_key=api_key, timeout=120.0)
+        self._model = model
+
+    def _execute_cli(self, prompt: str) -> str:
+        """Call the Anthropic API instead of a CLI subprocess."""
+        message = self._client.messages.create(
+            model=self._model,
+            max_tokens=4096,
+            messages=[{"role": "user", "content": prompt}],
+        )
+        # Extract text from the response content blocks
+        return "".join(
+            block.text for block in message.content if block.type == "text"
+        )
diff --git a/context_scribe/main.py b/context_scribe/main.py
@@ -190,10 +190,14 @@ def _detect_evaluator(preferred_tool: Optional[str] = None) -> str:
         if shutil.which(cli_cmd):
             return cli_to_evaluator[cli_cmd]
 
-    # 3. Fail fast if no evaluator is found
+    # 3. Fall back to Anthropic SDK if API key is set
+    if os.environ.get("ANTHROPIC_API_KEY") and "anthropic" in EVALUATOR_REGISTRY:
+        return "anthropic"
+
+    # 4. Fail fast if no evaluator is found
     raise click.ClickException(
-        "No supported evaluator CLI found (claude, copilot, or gemini). "
-        "Please install one of these tools to use context-scribe."
+        "No supported evaluator found. Install a CLI tool (claude, copilot, or gemini) "
+        "or set ANTHROPIC_API_KEY to use the Anthropic SDK evaluator."
     )
 
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -16,6 +16,9 @@ dependencies = [
 ]
 
 [project.optional-dependencies]
+anthropic = [
+    "anthropic>=0.40.0"
+]
 test = [
     "pytest",
     "pytest-asyncio",

diff --git a/tests/test_anthropic_evaluator.py b/tests/test_anthropic_evaluator.py
@@ -0,0 +1,88 @@
+import json
+import sys
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from context_scribe.models.interaction import Interaction
+from datetime import datetime
+
+
+def _make_interaction(content="I prefer tabs over spaces"):
+    return Interaction(
+        timestamp=datetime.now(),
+        role="user",
+        content=content,
+        project_name="test-project",
+        metadata={},
+    )
+
+
+@pytest.fixture
+def mock_anthropic():
+    """Mock the anthropic SDK at sys.modules level, then import AnthropicEvaluator."""
+    mock_module = MagicMock()
+    mock_client = MagicMock()
+    mock_module.Anthropic.return_value = mock_client
+
+    with patch.dict(sys.modules, {"anthropic": mock_module}):
+        with patch.dict("os.environ", {"ANTHROPIC_API_KEY": "test-key"}):
+            # Force re-import so the module picks up our mock
+            if "context_scribe.evaluator.anthropic_llm" in sys.modules:
+                del sys.modules["context_scribe.evaluator.anthropic_llm"]
+            from context_scribe.evaluator.anthropic_llm import AnthropicEvaluator
+            evaluator = AnthropicEvaluator()
+            yield evaluator, mock_client
+
+
+def test_anthropic_evaluator_extracts_rule(mock_anthropic):
+    evaluator, mock_client = mock_anthropic
+    rule_json = json.dumps({
+        "scope": "GLOBAL",
+        "description": "Indentation preference",
+        "rules": ["- Use tabs for indentation"],
+    })
+    text_block = MagicMock()
+    text_block.type = "text"
+    text_block.text = rule_json
+    mock_client.messages.create.return_value = MagicMock(content=[text_block])
+
+    result = evaluator.evaluate_interaction(_make_interaction(), "", "")
+    assert result is not None
+    assert result.scope == "GLOBAL"
+    assert "tabs" in result.content
+
+
+def test_anthropic_evaluator_returns_none_for_no_rule(mock_anthropic):
+    evaluator, mock_client = mock_anthropic
+    text_block = MagicMock()
+    text_block.type = "text"
+    text_block.text = "NO_RULE"
+    mock_client.messages.create.return_value = MagicMock(content=[text_block])
+
+    result = evaluator.evaluate_interaction(_make_interaction("hello"), "", "")
+    assert result is None
+
+
+def test_anthropic_evaluator_passes_correct_model(mock_anthropic):
+    evaluator, mock_client = mock_anthropic
+    text_block = MagicMock()
+    text_block.type = "text"
+    text_block.text = "NO_RULE"
+    mock_client.messages.create.return_value = MagicMock(content=[text_block])
+
+    evaluator.evaluate_interaction(_make_interaction(), "", "")
+    call_kwargs = mock_client.messages.create.call_args[1]
+    assert call_kwargs["model"] == "claude-haiku-4-5-20251001"
+    assert call_kwargs["max_tokens"] == 4096
+
+
+def test_anthropic_evaluator_missing_api_key():
+    mock_module = MagicMock()
+    with patch.dict(sys.modules, {"anthropic": mock_module}):
+        with patch.dict("os.environ", {}, clear=True):
+            if "context_scribe.evaluator.anthropic_llm" in sys.modules:
+                del sys.modules["context_scribe.evaluator.anthropic_llm"]
+            from context_scribe.evaluator.anthropic_llm import AnthropicEvaluator
+            with pytest.raises(ValueError, match="ANTHROPIC_API_KEY"):
+                AnthropicEvaluator()
diff --git a/tests/test_daemons.py b/tests/test_daemons.py
@@ -4,16 +4,16 @@
 from context_scribe.main import run_daemon
 
 @pytest.mark.asyncio
-@pytest.mark.parametrize("tool, provider_class, evaluator_class, bootstrap_func, evaluator_name", [
-    ("gemini-cli", "GeminiCliProvider", "GeminiCliEvaluator", "bootstrap_global_config", "gemini"),
-    ("copilot", "CopilotProvider", "CopilotEvaluator", "bootstrap_copilot_config", "copilot"),
-    ("claude", "ClaudeProvider", "ClaudeEvaluator", "bootstrap_claude_config", "claude"),
+@pytest.mark.parametrize("tool, provider_class, bootstrap_func, evaluator_name", [
+    ("gemini-cli", "GeminiCliProvider", "bootstrap_global_config", "gemini"),
+    ("copilot", "CopilotProvider", "bootstrap_copilot_config", "copilot"),
+    ("claude", "ClaudeProvider", "bootstrap_claude_config", "claude"),
 ])
-async def test_run_daemon_tools(tool, provider_class, evaluator_class, bootstrap_func, evaluator_name, daemon_mocks):
+async def test_run_daemon_tools(tool, provider_class, bootstrap_func, evaluator_name, daemon_mocks):
     """Test the daemon run loop for all supported tools."""
 
     with patch(f"context_scribe.main.{provider_class}", return_value=daemon_mocks.provider):
-        with patch(f"context_scribe.main.{evaluator_class}", return_value=daemon_mocks.evaluator):
+        with patch("context_scribe.main.get_evaluator", return_value=daemon_mocks.evaluator):
             with patch("context_scribe.main.MemoryBankClient", return_value=daemon_mocks.mcp):
                 with patch(f"context_scribe.main.{bootstrap_func}"):
                     # Mock Live to avoid rich rendering logic completely

diff --git a/tests/test_main.py b/tests/test_main.py
@@ -28,9 +28,19 @@ def test_detect_evaluator_fails_if_none_found():
     """Test that it raises ClickException if no tools are found."""
     with patch("shutil.which") as mock_which:
         mock_which.return_value = None
-        with pytest.raises(click.ClickException) as excinfo:
-            _detect_evaluator()
-        assert "No supported evaluator CLI found" in str(excinfo.value)
+        with patch.dict("os.environ", {}, clear=True):
+            with pytest.raises(click.ClickException) as excinfo:
+                _detect_evaluator()
+            assert "No supported evaluator found" in str(excinfo.value)
+
+def test_detect_evaluator_falls_back_to_anthropic_sdk():
+    """Test that _detect_evaluator returns 'anthropic' when no CLI tools found but API key is set."""
+    with patch("shutil.which") as mock_which:
+        mock_which.return_value = None
+        with patch.dict("os.environ", {"ANTHROPIC_API_KEY": "test-key"}):
+            with patch.dict("context_scribe.main.EVALUATOR_REGISTRY", {"anthropic": object}):
+                result = _detect_evaluator()
+                assert result == "anthropic"
 
 def test_bootstrap_global_config_creates_file(tmp_path):
     # Mock home directory to our temp path