Anantys-oss · Koan-Bot · Mar 1, 2026 · Mar 9, 2026 · Mar 25, 2026 · Mar 25, 2026
diff --git a/docs/providers/local.md b/docs/providers/local.md
@@ -118,6 +118,44 @@ curl http://localhost:11434/v1/chat/completions \
   -d '{"model": "qwen2.5-coder:14b", "messages": [{"role": "user", "content": "Hello"}]}'
 ```
 
+## Ollama Launch Provider (Recommended)
+
+If you have Ollama v0.15.0+, the `ollama-launch` provider is the simplest
+way to run Koan with local models. Ollama handles server lifecycle and
+environment setup automatically:
+
+```yaml
+# config.yaml
+cli_provider: "ollama-launch"
+
+ollama_launch:
+  model: "qwen3-coder"
+```
+
+Or via environment:
+
+```bash
+KOAN_CLI_PROVIDER=ollama-launch
+KOAN_OLLAMA_LAUNCH_MODEL=qwen3-coder
+```
+
+Then start Koan normally with `make start` — no need for `make ollama`.
+
+**Advantages over `local` provider:**
+- No manual `ollama serve` — Ollama auto-starts the server
+- No env-var setup (ANTHROPIC_BASE_URL etc.)
+- `OLLAMA_NO_CLOUD=1` set by default for privacy
+- Version validation on startup
+
+**Model management via Telegram:**
+
+```
+/ollama list       — List locally available models
+/ollama pull NAME  — Download a new model
+/ollama remove NAME — Delete a local model
+/ollama status     — Server health check
+```
+
 ## Per-Project Configuration
 
 Use local LLM for specific projects (e.g., small libraries) while
@@ -160,11 +198,11 @@ that work best with Koan's agentic loop:
 
 | Model | Size | Tool Use | Notes |
 |-------|------|----------|-------|
-| `qwen2.5-coder:14b` | 14B | Good | Best balance of size and capability |
-| `qwen2.5-coder:7b` | 7B | Fair | Lighter, faster, less reliable tool use |
+| `qwen3-coder` | 14B+ | Excellent | Best choice for agentic coding workflows |
+| `qwen2.5-coder:14b` | 14B | Good | Solid balance of size and capability |
+| `glm-4.7` | 30B | Good | Lightweight deployment, strong reasoning |
 | `deepseek-coder-v2:16b` | 16B | Good | Strong coding, good function calling |
-| `codellama:34b` | 34B | Fair | Needs more RAM, variable tool use |
-| `mistral:7b` | 7B | Basic | Fast but limited tool use |
+| `qwen2.5-coder:7b` | 7B | Fair | Lighter, faster, less reliable tool use |
 
 **Hardware requirements vary by model size:**
 

diff --git a/koan/app/cli_exec.py b/koan/app/cli_exec.py
@@ -99,6 +99,27 @@ def _cleanup_prompt_file(path: Optional[str]) -> None:
             os.unlink(path)
 
 
+def _merge_provider_env(kwargs: dict) -> None:
+    """Merge provider-specific env vars into subprocess kwargs.
+
+    Providers declare extra env vars via ``get_env()`` (e.g.
+    ``OLLAMA_NO_CLOUD=1``). These are merged into the subprocess
+    environment so the protection actually applies at runtime.
+    Only merges when the caller hasn't already supplied an ``env``.
+    """
+    if "env" in kwargs:
+        return
+    try:
+        from app.provider import get_provider
+        extra = get_provider().get_env()
+        if extra:
+            env = os.environ.copy()
+            env.update(extra)
+            kwargs["env"] = env
+    except Exception:
+        pass
+
+
 def run_cli(cmd, **kwargs) -> subprocess.CompletedProcess:
     """Run a CLI command with the prompt passed via temp-file stdin.
 
@@ -107,6 +128,7 @@ def run_cli(cmd, **kwargs) -> subprocess.CompletedProcess:
     the caller does not provide one, preventing indefinite hangs.
     """
     kwargs.setdefault("timeout", DEFAULT_TIMEOUT)
+    _merge_provider_env(kwargs)
     cmd, prompt_path = prepare_prompt_file(cmd)
     if prompt_path:
         try:
@@ -129,6 +151,7 @@ def popen_cli(
     Returns ``(proc, cleanup)`` where *cleanup()* **must** be called after
     the process exits to close the file handle and delete the temp file.
     """
+    _merge_provider_env(kwargs)
     cmd, prompt_path = prepare_prompt_file(cmd)
     if prompt_path:
         stdin_file = open(prompt_path)  # noqa: SIM115

diff --git a/koan/app/ollama_client.py b/koan/app/ollama_client.py
@@ -0,0 +1,163 @@
+"""Ollama REST API client.
+
+Lightweight wrapper around Ollama's HTTP API for health checks, model
+listing, pulling, and removal.  Uses only ``urllib`` (no third-party
+deps) to keep Kōan's dependency footprint minimal.
+
+Ollama API docs: https://github.com/ollama/ollama/blob/main/docs/api.md
+"""
+
+import json
+import socket
+import urllib.request
+import urllib.error
+from typing import Any, Optional, Tuple
+
+DEFAULT_HOST = "http://localhost:11434"
+
+
+def _api_request(
+    path: str,
+    method: str = "GET",
+    body: Optional[dict] = None,
+    host: str = DEFAULT_HOST,
+    timeout: int = 10,
+) -> Tuple[bool, Any]:
+    """Unified HTTP request to the Ollama API.
+
+    Returns (success, data) where data is parsed JSON on success or an
+    error message string on failure.
+    """
+    url = f"{host.rstrip('/')}{path}"
+    data = json.dumps(body).encode() if body else None
+    headers = {"Content-Type": "application/json"} if body else {}
+
+    req = urllib.request.Request(url, data=data, headers=headers, method=method)
+    try:
+        with urllib.request.urlopen(req, timeout=timeout) as resp:
+            raw = resp.read().decode()
+            if not raw:
+                return True, {}
+            return True, json.loads(raw)
+    except urllib.error.HTTPError as e:
+        try:
+            detail = json.loads(e.read().decode()).get("error", str(e))
+        except (ValueError, UnicodeDecodeError):
+            detail = str(e)
+        return False, detail
+    except urllib.error.URLError as e:
+        return False, f"Connection failed: {e.reason}"
+    except socket.timeout:
+        return False, "Request timed out"
+    except (json.JSONDecodeError, UnicodeDecodeError) as e:
+        return False, f"Invalid response: {e}"
+    except OSError as e:
+        return False, str(e)
+
+
+def is_server_running(host: str = DEFAULT_HOST, timeout: int = 3) -> bool:
+    """Check if the Ollama server is responding."""
+    ok, _ = _api_request("/", host=host, timeout=timeout)
+    return ok
+
+
+def get_version(host: str = DEFAULT_HOST, timeout: int = 3) -> Optional[str]:
+    """Get the Ollama server version string, or None if unavailable."""
+    ok, data = _api_request("/api/version", host=host, timeout=timeout)
+    if ok and isinstance(data, dict):
+        return data.get("version")
+    return None
+
+
+def list_models(host: str = DEFAULT_HOST, timeout: int = 10) -> Tuple[bool, Any]:
+    """List locally available models.
+
+    Returns (success, list_of_models) where each model is a dict with
+    keys like 'name', 'size', 'modified_at', 'details', etc.
+    """
+    ok, data = _api_request("/api/tags", host=host, timeout=timeout)
+    if ok and isinstance(data, dict):
+        return True, data.get("models", [])
+    return ok, data
+
+
+def show_model(name: str, host: str = DEFAULT_HOST, timeout: int = 10) -> Tuple[bool, Any]:
+    """Get details about a specific model.
+
+    Returns (success, model_info_dict) with keys like 'modelfile',
+    'parameters', 'template', 'details' (family, parameter_size, etc.).
+    """
+    _validate_model_name(name)
+    return _api_request("/api/show", method="POST", body={"name": name},
+                        host=host, timeout=timeout)
+
+
+def pull_model(name: str, host: str = DEFAULT_HOST, timeout: int = 1800) -> Tuple[bool, str]:
+    """Pull (download) a model.
+
+    Uses the non-streaming API (stream=false).  This means the HTTP
+    request blocks until the entire download completes (up to
+    ``timeout`` seconds, default 30 minutes).  There is no progress
+    feedback during the download — the caller should inform the user
+    that the operation may take a while for large models (70B+ can
+    exceed 10 minutes on slower connections).
+
+    Returns (success, status_message).
+    """
+    _validate_model_name(name)
+    ok, data = _api_request(
+        "/api/pull", method="POST",
+        body={"name": name, "stream": False},
+        host=host, timeout=timeout,
+    )
+    if ok:
+        status = data.get("status", "success") if isinstance(data, dict) else "success"
+        return True, status
+    return False, str(data)
+
+
+def delete_model(name: str, host: str = DEFAULT_HOST, timeout: int = 30) -> Tuple[bool, str]:
+    """Delete a locally stored model.
+
+    Returns (success, message).
+    """
+    _validate_model_name(name)
+    ok, data = _api_request(
+        "/api/delete", method="DELETE",
+        body={"name": name},
+        host=host, timeout=timeout,
+    )
+    if ok:
+        return True, "deleted"
+    return False, str(data)
+
+
+def list_running(host: str = DEFAULT_HOST, timeout: int = 5) -> Tuple[bool, Any]:
+    """List models currently loaded in memory (running).
+
+    Returns (success, list_of_running_models).
+    """
+    ok, data = _api_request("/api/ps", host=host, timeout=timeout)
+    if ok and isinstance(data, dict):
+        return True, data.get("models", [])
+    return ok, data
+
+
+def _validate_model_name(name: str) -> None:
+    """Raise ValueError if model name is clearly invalid."""
+    if not name or not name.strip():
+        raise ValueError("Model name must not be empty")
+    if any(c in name for c in "\n\r\t"):
+        raise ValueError(f"Model name contains invalid characters: {name!r}")
+
+
+def format_model_size(size_bytes: int) -> str:
+    """Format byte count as human-readable size (e.g. '4.7 GB').
+
+    Uses 1000-based (SI) units to match ``ollama list`` output.
+    """
+    if size_bytes >= 1_000_000_000:
+        return f"{size_bytes / 1_000_000_000:.1f} GB"
+    if size_bytes >= 1_000_000:
+        return f"{size_bytes / 1_000_000:.0f} MB"
+    return f"{size_bytes} B"
diff --git a/koan/app/pid_manager.py b/koan/app/pid_manager.py
@@ -378,14 +378,19 @@ def start_ollama(koan_root: Path, verify_timeout: float = OLLAMA_VERIFY_TIMEOUT)
     # Write PID file — ollama serve is an external binary (no flock)
     acquire_pid(koan_root, "ollama", proc.pid)
 
-    # Wait briefly for ollama to start listening
+    # Wait for ollama to start listening on HTTP
     deadline = time.monotonic() + verify_timeout
     while time.monotonic() < deadline:
-        if _is_process_alive(proc.pid):
+        if not _is_process_alive(proc.pid):
+            release_pid(koan_root, "ollama")
+            return False, "ollama launched but exited immediately — check ollama logs"
+        if _ollama_http_ready():
             return True, f"ollama serve started (PID {proc.pid})"
         time.sleep(0.3)
 
-    # Clean up stale PID file — process is dead, don't leave phantom PIDs
+    # Process is alive but HTTP not ready yet — still report success
+    if _is_process_alive(proc.pid):
+        return True, f"ollama serve started (PID {proc.pid}), still warming up"
     release_pid(koan_root, "ollama")
     return False, "ollama launched but exited immediately — check ollama logs"
 
@@ -571,9 +576,24 @@ def _detect_provider(koan_root: Path) -> str:
         return "claude"
 
 
+def _ollama_http_ready() -> bool:
+    """Check if Ollama server is responding on HTTP."""
+    from app.ollama_client import is_server_running
+    return is_server_running(timeout=2)
+
+
 def _needs_ollama(provider: str) -> bool:
-    """Return True if the provider requires ollama serve."""
-    return provider in ("local", "ollama")
+    """Return True if the provider requires a manually started ollama serve.
+
+    ollama-launch manages its own server lifecycle, so it returns False.
+    """
+    if provider == "ollama-launch":
+        return False
+    try:
+        from app.provider import is_ollama_provider
+        return is_ollama_provider(provider)
+    except (ImportError, KeyError):
+        return provider in ("local", "ollama")
 
 
 def _show_startup_banner(koan_root: Path, provider: str) -> None:

diff --git a/koan/app/provider/__init__.py b/koan/app/provider/__init__.py
@@ -134,6 +134,21 @@ def get_provider_by_name(name: str) -> CLIProvider:
     return _PROVIDERS[provider_name]()
 
 
+def is_ollama_provider(name: str = "") -> bool:
+    """Return True if the given (or configured) provider uses Ollama.
+
+    Checks the ``uses_ollama`` flag on the provider class, which is the
+    single source of truth for whether a provider requires Ollama.
+    For unregistered names (e.g. legacy "ollama"), falls back to
+    name-based detection.
+    """
+    if not name:
+        name = get_provider_name()
+    if name in _PROVIDERS:
+        return _PROVIDERS[name].uses_ollama
+    return "ollama" in name.lower()
+
+
 def get_cli_binary() -> str:
     """Get the CLI binary command for the configured provider.
 

diff --git a/koan/app/provider/base.py b/koan/app/provider/base.py
@@ -1,7 +1,7 @@
 """Base class and constants for CLI provider abstraction."""
 
 import shutil
-from typing import List, Optional, Tuple
+from typing import Dict, List, Optional, Tuple
 
 
 # ---------------------------------------------------------------------------
@@ -37,6 +37,7 @@ class CLIProvider:
     """
 
     name: str = ""
+    uses_ollama: bool = False
 
     def binary(self) -> str:
         """Return the CLI binary name or path."""
@@ -257,6 +258,14 @@ def build_command(
         cmd.extend(self.build_effort_args(effort))
         return cmd
 
+    def get_env(self) -> Dict[str, str]:
+        """Return extra environment variables for subprocess invocation.
+
+        Override in subclasses that need to inject env vars (e.g.
+        OLLAMA_NO_CLOUD). Base returns empty dict.
+        """
+        return {}
+
     def check_quota_available(self, project_path: str, timeout: int = 15) -> Tuple[bool, str]:
         """Probe real API quota with a minimal CLI call.
 

diff --git a/koan/app/provider/local.py b/koan/app/provider/local.py
@@ -29,6 +29,7 @@ class LocalLLMProvider(CLIProvider):
     """
 
     name = "local"
+    uses_ollama = True
 
     def _get_config(self) -> dict:
         """Get local_llm config section from config.yaml."""