From 1a62973eaab839659b1b20d640eb40ff11466227 Mon Sep 17 00:00:00 2001
From: martin <martin@martintech.co.uk>
Date: Wed, 5 Nov 2025 18:56:58 +0000
Subject: [PATCH] added local model support, and minor fixes to work on ubuntu

---
 configs/evolution/large_budget.yaml | 11 +---
 shinka/core/runner.py               | 38 ++++++++++---
 shinka/llm/client.py                |  8 +++
 shinka/llm/embedding.py             | 36 +++++++++++-
 shinka/llm/models/__init__.py       |  2 +
 shinka/llm/models/local.py          | 88 +++++++++++++++++++++++++++++
 shinka/llm/models/pricing.py        |  7 +++
 shinka/llm/query.py                 |  4 ++
 shinka/webui/visualization.py       | 24 +++++---
 9 files changed, 191 insertions(+), 27 deletions(-)
 create mode 100644 shinka/llm/models/local.py

diff --git a/configs/evolution/large_budget.yaml b/configs/evolution/large_budget.yaml
index fb22bbe8..0d1a737d 100644
--- a/configs/evolution/large_budget.yaml
+++ b/configs/evolution/large_budget.yaml
@@ -13,12 +13,7 @@ evo_config:
   max_patch_resamples: 3
   max_patch_attempts: 3
   llm_models:
-    - "gpt-4.1"
-    - "gpt-4.1-mini"
-    - "gpt-4.1-nano"
-    - "bedrock/us.anthropic.claude-sonnet-4-20250514-v1:0"
-    - "o4-mini"
-  llm_dynamic_selection: ucb
+    - "qwen3"
   llm_kwargs:
     temperatures:
       - 0.0
@@ -27,10 +22,10 @@ evo_config:
     max_tokens: 16384
   meta_rec_interval: 10
   meta_llm_models:
-    - "gpt-4.1"
+    - "qwen3"
   meta_llm_kwargs:
     temperatures:
       - 0.0
-  embedding_model: "text-embedding-3-small"
+  embedding_model: "local-qwen3"
   results_dir: ${output_dir}
   
\ No newline at end of file
diff --git a/shinka/core/runner.py b/shinka/core/runner.py
index f1b5e947..c54f2baf 100644
--- a/shinka/core/runner.py
+++ b/shinka/core/runner.py
@@ -7,7 +7,7 @@
 from rich.table import Table
 from rich.console import Console
 import rich.box
-from typing import List, Optional, Union, cast
+from typing import List, Optional, Union, cast, Any
 from datetime import datetime
 from pathlib import Path
 from dataclasses import dataclass, field, asdict
@@ -275,19 +275,39 @@ def _save_experiment_config(
         db_config: DatabaseConfig,
     ) -> None:
         """Save experiment configuration to a YAML file."""
-        config_data = {
-            "evolution_config": asdict(evo_config),
-            "job_config": asdict(job_config),
-            "database_config": asdict(db_config),
-            "timestamp": datetime.now().isoformat(),
-            "results_directory": str(self.results_dir),
-        }
+        try:
+            # Use OmegaConf to convert to a YAML-safe structure
+            from omegaconf import OmegaConf
+            
+            config_data = {
+                "evolution_config": asdict(evo_config),
+                "job_config": asdict(job_config),
+                "database_config": asdict(db_config),
+                "timestamp": datetime.now().isoformat(),
+                "results_directory": str(self.results_dir),
+            }
+            
+            # Convert to OmegaConf and then to YAML-safe container
+            omega_conf = OmegaConf.create(config_data)
+            yaml_safe_dict = OmegaConf.to_container(omega_conf, resolve=True)
+            
+        except Exception as e:
+            # Fallback: use simple dict conversion, filtering out None values
+            logger.warning(f"Failed to use OmegaConf for config serialization: {e}, using fallback")
+            config_data = {
+                "evolution_config": {k: v for k, v in asdict(evo_config).items() if v is not None},
+                "job_config": {k: v for k, v in asdict(job_config).items() if v is not None},
+                "database_config": {k: v for k, v in asdict(db_config).items() if v is not None},
+                "timestamp": datetime.now().isoformat(),
+                "results_directory": str(self.results_dir),
+            }
+            yaml_safe_dict = config_data
 
         config_path = Path(self.results_dir) / "experiment_config.yaml"
         config_path.parent.mkdir(parents=True, exist_ok=True)
 
         with config_path.open("w", encoding="utf-8") as f:
-            yaml.dump(config_data, f, default_flow_style=False, indent=2)
+            yaml.dump(yaml_safe_dict, f, default_flow_style=False, indent=2, allow_unicode=True)
 
         logger.info(f"Experiment configuration saved to {config_path}")
 
diff --git a/shinka/llm/client.py b/shinka/llm/client.py
index eaef6123..0b223b57 100644
--- a/shinka/llm/client.py
+++ b/shinka/llm/client.py
@@ -11,6 +11,7 @@
     OPENAI_MODELS,
     DEEPSEEK_MODELS,
     GEMINI_MODELS,
+    LOCAL_MODELS,
 )
 
 env_path = Path(__file__).parent.parent.parent / ".env"
@@ -78,6 +79,13 @@ def get_client_llm(model_name: str, structured_output: bool = False) -> Tuple[An
                 client,
                 mode=instructor.Mode.GEMINI_JSON,
             )
+    elif model_name in LOCAL_MODELS.keys():
+        client = openai.OpenAI(
+            api_key="not-needed",  # Local models don't need API key
+            base_url="http://localhost:8000/v1",
+        )
+        if structured_output:
+            raise NotImplementedError("Structured output not supported for local models.")
     else:
         raise ValueError(f"Model {model_name} not supported.")
 
diff --git a/shinka/llm/embedding.py b/shinka/llm/embedding.py
index 4082ad58..dc6dcb03 100644
--- a/shinka/llm/embedding.py
+++ b/shinka/llm/embedding.py
@@ -26,6 +26,10 @@
     "gemini-embedding-001",
 ]
 
+LOCAL_EMBEDDING_MODELS = [
+    "local-qwen3",  # Local embedding model name
+]
+
 OPENAI_EMBEDDING_COSTS = {
     "text-embedding-3-small": 0.02 / M,
     "text-embedding-3-large": 0.13 / M,
@@ -37,6 +41,11 @@
     "gemini-embedding-001": 0.0 / M,  # Check current pricing
 }
 
+# Local embedding costs (free)
+LOCAL_EMBEDDING_COSTS = {
+    "qwen3": 0.0 / M,  # Free local model
+}
+
 def get_client_model(model_name: str) -> tuple[Union[openai.OpenAI, str], str]:
     if model_name in OPENAI_EMBEDDING_MODELS:
         client = openai.OpenAI()
@@ -57,6 +66,13 @@ def get_client_model(model_name: str) -> tuple[Union[openai.OpenAI, str], str]:
         genai.configure(api_key=api_key)
         client = "gemini"  # Use string identifier for Gemini
         model_to_use = model_name
+    elif model_name in LOCAL_EMBEDDING_MODELS:
+        # Local OpenAI-compatible embedding model
+        client = openai.OpenAI(
+            api_key="not-needed",  # Local models don't need API key
+            base_url="http://localhost:8000/v1",
+        )
+        model_to_use = "qwen3"  # Use the actual model name for the API
     else:
         raise ValueError(f"Invalid embedding model: {model_name}")
 
@@ -128,8 +144,26 @@ def get_embedding(
             response = self.client.embeddings.create(
                 model=self.model, input=code, encoding_format="float"
             )
-            cost = response.usage.total_tokens * OPENAI_EMBEDDING_COSTS[self.model]
+            # Get cost - use local cost if it's a local model, otherwise use OpenAI cost
+            # Handle cases where usage might be None (local models)
+            if response.usage and hasattr(response.usage, 'total_tokens'):
+                total_tokens = response.usage.total_tokens
+            else:
+                # Estimate tokens for local models (rough approximation)
+                total_tokens = sum(len(text.split()) for text in code)
+            
+            if self.model_name in LOCAL_EMBEDDING_MODELS:
+                cost = total_tokens * LOCAL_EMBEDDING_COSTS.get(self.model, 0.0)
+            else:
+                cost = total_tokens * OPENAI_EMBEDDING_COSTS.get(self.model, 0.0)
             # Extract embedding from response
+            if response.data is None:
+                logger.error("Embedding response data is None")
+                if single_code:
+                    return [], cost
+                else:
+                    return [[]], cost
+            
             if single_code:
                 return response.data[0].embedding, cost
             else:
diff --git a/shinka/llm/models/__init__.py b/shinka/llm/models/__init__.py
index af5c3787..59029c07 100644
--- a/shinka/llm/models/__init__.py
+++ b/shinka/llm/models/__init__.py
@@ -2,6 +2,7 @@
 from .openai import query_openai
 from .deepseek import query_deepseek
 from .gemini import query_gemini
+from .local import query_local
 from .result import QueryResult
 
 __all__ = [
@@ -9,5 +10,6 @@
     "query_openai",
     "query_deepseek",
     "query_gemini",
+    "query_local",
     "QueryResult",
 ]
diff --git a/shinka/llm/models/local.py b/shinka/llm/models/local.py
new file mode 100644
index 00000000..0b81dd31
--- /dev/null
+++ b/shinka/llm/models/local.py
@@ -0,0 +1,88 @@
+import backoff
+import openai
+from .pricing import LOCAL_MODELS
+from .result import QueryResult
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+def backoff_handler(details):
+    exc = details.get("exception")
+    if exc:
+        logger.info(
+            f"Local LLM - Retry {details['tries']} due to error: {exc}. Waiting {details['wait']:0.1f}s..."
+        )
+
+
+@backoff.on_exception(
+    backoff.expo,
+    (
+        openai.APIConnectionError,
+        openai.APIStatusError,
+        openai.RateLimitError,
+        openai.APITimeoutError,
+    ),
+    max_tries=5,
+    max_value=20,
+    on_backoff=backoff_handler,
+)
+def query_local(
+    client,
+    model,
+    msg,
+    system_msg,
+    msg_history,
+    output_model,
+    model_posteriors=None,
+    **kwargs,
+) -> QueryResult:
+    """Query local OpenAI-compatible model."""
+    if output_model is not None:
+        raise NotImplementedError("Structured output not supported for local models.")
+    new_msg_history = msg_history + [{"role": "user", "content": msg}]
+    
+    # Convert max_output_tokens to max_tokens for OpenAI-compatible API
+    local_kwargs = kwargs.copy()
+    if "max_output_tokens" in local_kwargs:
+        local_kwargs["max_tokens"] = local_kwargs.pop("max_output_tokens")
+    
+    response = client.chat.completions.create(
+        model=model,
+        messages=[
+            {"role": "system", "content": system_msg},
+            *new_msg_history,
+        ],
+        **local_kwargs,
+        n=1,
+        stop=None,
+    )
+    content = response.choices[0].message.content
+    try:
+        thought = response.choices[0].message.reasoning_content
+    except:
+        thought = ""
+    new_msg_history.append({"role": "assistant", "content": content})
+    
+    # Get token usage, defaulting to 0 if not available
+    input_tokens = getattr(response.usage, 'prompt_tokens', 0) if response.usage else 0
+    output_tokens = getattr(response.usage, 'completion_tokens', 0) if response.usage else 0
+    
+    input_cost = LOCAL_MODELS[model]["input_price"] * input_tokens
+    output_cost = LOCAL_MODELS[model]["output_price"] * output_tokens
+    return QueryResult(
+        content=content,
+        msg=msg,
+        system_msg=system_msg,
+        new_msg_history=new_msg_history,
+        model_name=model,
+        kwargs=kwargs,
+        input_tokens=input_tokens,
+        output_tokens=output_tokens,
+        cost=input_cost + output_cost,
+        input_cost=input_cost,
+        output_cost=output_cost,
+        thought=thought,
+        model_posteriors=model_posteriors,
+    )
+
diff --git a/shinka/llm/models/pricing.py b/shinka/llm/models/pricing.py
index c9c101a2..04efdccf 100644
--- a/shinka/llm/models/pricing.py
+++ b/shinka/llm/models/pricing.py
@@ -161,6 +161,13 @@
     ],
 }
 
+LOCAL_MODELS = {
+    "qwen3": {
+        "input_price": 0.0 / M,  # Free local model
+        "output_price": 0.0 / M,  # Free local model
+    },
+}
+
 REASONING_OAI_MODELS = [
     "o3-mini-2025-01-31",
     "o1-2024-12-17",
diff --git a/shinka/llm/query.py b/shinka/llm/query.py
index c88c7d7c..8e181752 100644
--- a/shinka/llm/query.py
+++ b/shinka/llm/query.py
@@ -8,6 +8,7 @@
     DEEPSEEK_MODELS,
     GEMINI_MODELS,
     BEDROCK_MODELS,
+    LOCAL_MODELS,
     REASONING_OAI_MODELS,
     REASONING_CLAUDE_MODELS,
     REASONING_DEEPSEEK_MODELS,
@@ -20,6 +21,7 @@
     query_openai,
     query_deepseek,
     query_gemini,
+    query_local,
     QueryResult,
 )
 import logging
@@ -204,6 +206,8 @@ def query(
         query_fn = query_deepseek
     elif model_name in GEMINI_MODELS.keys():
         query_fn = query_gemini
+    elif model_name in LOCAL_MODELS.keys():
+        query_fn = query_local
     else:
         raise ValueError(f"Model {model_name} not supported.")
     result = query_fn(
diff --git a/shinka/webui/visualization.py b/shinka/webui/visualization.py
index 35cce086..24692bd1 100644
--- a/shinka/webui/visualization.py
+++ b/shinka/webui/visualization.py
@@ -160,13 +160,19 @@ def handle_get_programs(self, db_path: str):
         # Extract the actual path by removing the task name prefix if present
         actual_db_path = self._get_actual_db_path(db_path)
 
-        # Check cache first
+        # Check cache first - but reduce cache time for active databases
+        # Clear cache if it's been more than 2 seconds (for active experiments)
         if db_path in db_cache:
             last_fetch_time, cached_data = db_cache[db_path]
-            if time.time() - last_fetch_time < CACHE_EXPIRATION_SECONDS:
-                print(f"[SERVER] Serving from cache for DB: {db_path}")
+            cache_age = time.time() - last_fetch_time
+            if cache_age < 2.0:  # Reduced from 5 to 2 seconds for more frequent updates
+                print(f"[SERVER] Serving from cache for DB: {db_path} (age: {cache_age:.1f}s, {len(cached_data)} programs)")
                 self.send_json_response(cached_data)
                 return
+            else:
+                # Cache expired, remove it
+                del db_cache[db_path]
+                print(f"[SERVER] Cache expired for DB: {db_path}, fetching fresh data")
 
         # Construct absolute path to the database from search root using actual path
         abs_db_path = os.path.join(self.search_root, actual_db_path)
@@ -183,14 +189,14 @@ def handle_get_programs(self, db_path: str):
             db = None
             try:
                 config = DatabaseConfig(db_path=abs_db_path)
+                # Use read_only=True to avoid locking issues
+                # SQLite URI read-only connections should see WAL data if properly configured
                 db = ProgramDatabase(config, read_only=True)
-
-                # Set WAL mode compatible settings for read-only connections
+                
                 if db.cursor:
-                    db.cursor.execute(
-                        "PRAGMA busy_timeout = 10000;"
-                    )  # 10 second timeout
-                    db.cursor.execute("PRAGMA journal_mode = WAL;")  # Ensure WAL mode
+                    db.cursor.execute("PRAGMA busy_timeout = 10000;")
+                    # Ensure WAL mode is enabled (should already be set by database)
+                    db.cursor.execute("PRAGMA journal_mode = WAL;")
 
                 programs = db.get_all_programs()