From 1a62973eaab839659b1b20d640eb40ff11466227 Mon Sep 17 00:00:00 2001 From: martin Date: Wed, 5 Nov 2025 18:56:58 +0000 Subject: [PATCH] added local model support, and minor fixes to work on ubuntu --- configs/evolution/large_budget.yaml | 11 +--- shinka/core/runner.py | 38 ++++++++++--- shinka/llm/client.py | 8 +++ shinka/llm/embedding.py | 36 +++++++++++- shinka/llm/models/__init__.py | 2 + shinka/llm/models/local.py | 88 +++++++++++++++++++++++++++++ shinka/llm/models/pricing.py | 7 +++ shinka/llm/query.py | 4 ++ shinka/webui/visualization.py | 24 +++++--- 9 files changed, 191 insertions(+), 27 deletions(-) create mode 100644 shinka/llm/models/local.py diff --git a/configs/evolution/large_budget.yaml b/configs/evolution/large_budget.yaml index fb22bbe8..0d1a737d 100644 --- a/configs/evolution/large_budget.yaml +++ b/configs/evolution/large_budget.yaml @@ -13,12 +13,7 @@ evo_config: max_patch_resamples: 3 max_patch_attempts: 3 llm_models: - - "gpt-4.1" - - "gpt-4.1-mini" - - "gpt-4.1-nano" - - "bedrock/us.anthropic.claude-sonnet-4-20250514-v1:0" - - "o4-mini" - llm_dynamic_selection: ucb + - "qwen3" llm_kwargs: temperatures: - 0.0 @@ -27,10 +22,10 @@ evo_config: max_tokens: 16384 meta_rec_interval: 10 meta_llm_models: - - "gpt-4.1" + - "qwen3" meta_llm_kwargs: temperatures: - 0.0 - embedding_model: "text-embedding-3-small" + embedding_model: "local-qwen3" results_dir: ${output_dir} \ No newline at end of file diff --git a/shinka/core/runner.py b/shinka/core/runner.py index f1b5e947..c54f2baf 100644 --- a/shinka/core/runner.py +++ b/shinka/core/runner.py @@ -7,7 +7,7 @@ from rich.table import Table from rich.console import Console import rich.box -from typing import List, Optional, Union, cast +from typing import List, Optional, Union, cast, Any from datetime import datetime from pathlib import Path from dataclasses import dataclass, field, asdict @@ -275,19 +275,39 @@ def _save_experiment_config( db_config: DatabaseConfig, ) -> None: """Save experiment configuration to a YAML file.""" - config_data = { - "evolution_config": asdict(evo_config), - "job_config": asdict(job_config), - "database_config": asdict(db_config), - "timestamp": datetime.now().isoformat(), - "results_directory": str(self.results_dir), - } + try: + # Use OmegaConf to convert to a YAML-safe structure + from omegaconf import OmegaConf + + config_data = { + "evolution_config": asdict(evo_config), + "job_config": asdict(job_config), + "database_config": asdict(db_config), + "timestamp": datetime.now().isoformat(), + "results_directory": str(self.results_dir), + } + + # Convert to OmegaConf and then to YAML-safe container + omega_conf = OmegaConf.create(config_data) + yaml_safe_dict = OmegaConf.to_container(omega_conf, resolve=True) + + except Exception as e: + # Fallback: use simple dict conversion, filtering out None values + logger.warning(f"Failed to use OmegaConf for config serialization: {e}, using fallback") + config_data = { + "evolution_config": {k: v for k, v in asdict(evo_config).items() if v is not None}, + "job_config": {k: v for k, v in asdict(job_config).items() if v is not None}, + "database_config": {k: v for k, v in asdict(db_config).items() if v is not None}, + "timestamp": datetime.now().isoformat(), + "results_directory": str(self.results_dir), + } + yaml_safe_dict = config_data config_path = Path(self.results_dir) / "experiment_config.yaml" config_path.parent.mkdir(parents=True, exist_ok=True) with config_path.open("w", encoding="utf-8") as f: - yaml.dump(config_data, f, default_flow_style=False, indent=2) + yaml.dump(yaml_safe_dict, f, default_flow_style=False, indent=2, allow_unicode=True) logger.info(f"Experiment configuration saved to {config_path}") diff --git a/shinka/llm/client.py b/shinka/llm/client.py index eaef6123..0b223b57 100644 --- a/shinka/llm/client.py +++ b/shinka/llm/client.py @@ -11,6 +11,7 @@ OPENAI_MODELS, DEEPSEEK_MODELS, GEMINI_MODELS, + LOCAL_MODELS, ) env_path = Path(__file__).parent.parent.parent / ".env" @@ -78,6 +79,13 @@ def get_client_llm(model_name: str, structured_output: bool = False) -> Tuple[An client, mode=instructor.Mode.GEMINI_JSON, ) + elif model_name in LOCAL_MODELS.keys(): + client = openai.OpenAI( + api_key="not-needed", # Local models don't need API key + base_url="http://localhost:8000/v1", + ) + if structured_output: + raise NotImplementedError("Structured output not supported for local models.") else: raise ValueError(f"Model {model_name} not supported.") diff --git a/shinka/llm/embedding.py b/shinka/llm/embedding.py index 4082ad58..dc6dcb03 100644 --- a/shinka/llm/embedding.py +++ b/shinka/llm/embedding.py @@ -26,6 +26,10 @@ "gemini-embedding-001", ] +LOCAL_EMBEDDING_MODELS = [ + "local-qwen3", # Local embedding model name +] + OPENAI_EMBEDDING_COSTS = { "text-embedding-3-small": 0.02 / M, "text-embedding-3-large": 0.13 / M, @@ -37,6 +41,11 @@ "gemini-embedding-001": 0.0 / M, # Check current pricing } +# Local embedding costs (free) +LOCAL_EMBEDDING_COSTS = { + "qwen3": 0.0 / M, # Free local model +} + def get_client_model(model_name: str) -> tuple[Union[openai.OpenAI, str], str]: if model_name in OPENAI_EMBEDDING_MODELS: client = openai.OpenAI() @@ -57,6 +66,13 @@ def get_client_model(model_name: str) -> tuple[Union[openai.OpenAI, str], str]: genai.configure(api_key=api_key) client = "gemini" # Use string identifier for Gemini model_to_use = model_name + elif model_name in LOCAL_EMBEDDING_MODELS: + # Local OpenAI-compatible embedding model + client = openai.OpenAI( + api_key="not-needed", # Local models don't need API key + base_url="http://localhost:8000/v1", + ) + model_to_use = "qwen3" # Use the actual model name for the API else: raise ValueError(f"Invalid embedding model: {model_name}") @@ -128,8 +144,26 @@ def get_embedding( response = self.client.embeddings.create( model=self.model, input=code, encoding_format="float" ) - cost = response.usage.total_tokens * OPENAI_EMBEDDING_COSTS[self.model] + # Get cost - use local cost if it's a local model, otherwise use OpenAI cost + # Handle cases where usage might be None (local models) + if response.usage and hasattr(response.usage, 'total_tokens'): + total_tokens = response.usage.total_tokens + else: + # Estimate tokens for local models (rough approximation) + total_tokens = sum(len(text.split()) for text in code) + + if self.model_name in LOCAL_EMBEDDING_MODELS: + cost = total_tokens * LOCAL_EMBEDDING_COSTS.get(self.model, 0.0) + else: + cost = total_tokens * OPENAI_EMBEDDING_COSTS.get(self.model, 0.0) # Extract embedding from response + if response.data is None: + logger.error("Embedding response data is None") + if single_code: + return [], cost + else: + return [[]], cost + if single_code: return response.data[0].embedding, cost else: diff --git a/shinka/llm/models/__init__.py b/shinka/llm/models/__init__.py index af5c3787..59029c07 100644 --- a/shinka/llm/models/__init__.py +++ b/shinka/llm/models/__init__.py @@ -2,6 +2,7 @@ from .openai import query_openai from .deepseek import query_deepseek from .gemini import query_gemini +from .local import query_local from .result import QueryResult __all__ = [ @@ -9,5 +10,6 @@ "query_openai", "query_deepseek", "query_gemini", + "query_local", "QueryResult", ] diff --git a/shinka/llm/models/local.py b/shinka/llm/models/local.py new file mode 100644 index 00000000..0b81dd31 --- /dev/null +++ b/shinka/llm/models/local.py @@ -0,0 +1,88 @@ +import backoff +import openai +from .pricing import LOCAL_MODELS +from .result import QueryResult +import logging + +logger = logging.getLogger(__name__) + + +def backoff_handler(details): + exc = details.get("exception") + if exc: + logger.info( + f"Local LLM - Retry {details['tries']} due to error: {exc}. Waiting {details['wait']:0.1f}s..." + ) + + +@backoff.on_exception( + backoff.expo, + ( + openai.APIConnectionError, + openai.APIStatusError, + openai.RateLimitError, + openai.APITimeoutError, + ), + max_tries=5, + max_value=20, + on_backoff=backoff_handler, +) +def query_local( + client, + model, + msg, + system_msg, + msg_history, + output_model, + model_posteriors=None, + **kwargs, +) -> QueryResult: + """Query local OpenAI-compatible model.""" + if output_model is not None: + raise NotImplementedError("Structured output not supported for local models.") + new_msg_history = msg_history + [{"role": "user", "content": msg}] + + # Convert max_output_tokens to max_tokens for OpenAI-compatible API + local_kwargs = kwargs.copy() + if "max_output_tokens" in local_kwargs: + local_kwargs["max_tokens"] = local_kwargs.pop("max_output_tokens") + + response = client.chat.completions.create( + model=model, + messages=[ + {"role": "system", "content": system_msg}, + *new_msg_history, + ], + **local_kwargs, + n=1, + stop=None, + ) + content = response.choices[0].message.content + try: + thought = response.choices[0].message.reasoning_content + except: + thought = "" + new_msg_history.append({"role": "assistant", "content": content}) + + # Get token usage, defaulting to 0 if not available + input_tokens = getattr(response.usage, 'prompt_tokens', 0) if response.usage else 0 + output_tokens = getattr(response.usage, 'completion_tokens', 0) if response.usage else 0 + + input_cost = LOCAL_MODELS[model]["input_price"] * input_tokens + output_cost = LOCAL_MODELS[model]["output_price"] * output_tokens + return QueryResult( + content=content, + msg=msg, + system_msg=system_msg, + new_msg_history=new_msg_history, + model_name=model, + kwargs=kwargs, + input_tokens=input_tokens, + output_tokens=output_tokens, + cost=input_cost + output_cost, + input_cost=input_cost, + output_cost=output_cost, + thought=thought, + model_posteriors=model_posteriors, + ) + diff --git a/shinka/llm/models/pricing.py b/shinka/llm/models/pricing.py index c9c101a2..04efdccf 100644 --- a/shinka/llm/models/pricing.py +++ b/shinka/llm/models/pricing.py @@ -161,6 +161,13 @@ ], } +LOCAL_MODELS = { + "qwen3": { + "input_price": 0.0 / M, # Free local model + "output_price": 0.0 / M, # Free local model + }, +} + REASONING_OAI_MODELS = [ "o3-mini-2025-01-31", "o1-2024-12-17", diff --git a/shinka/llm/query.py b/shinka/llm/query.py index c88c7d7c..8e181752 100644 --- a/shinka/llm/query.py +++ b/shinka/llm/query.py @@ -8,6 +8,7 @@ DEEPSEEK_MODELS, GEMINI_MODELS, BEDROCK_MODELS, + LOCAL_MODELS, REASONING_OAI_MODELS, REASONING_CLAUDE_MODELS, REASONING_DEEPSEEK_MODELS, @@ -20,6 +21,7 @@ query_openai, query_deepseek, query_gemini, + query_local, QueryResult, ) import logging @@ -204,6 +206,8 @@ def query( query_fn = query_deepseek elif model_name in GEMINI_MODELS.keys(): query_fn = query_gemini + elif model_name in LOCAL_MODELS.keys(): + query_fn = query_local else: raise ValueError(f"Model {model_name} not supported.") result = query_fn( diff --git a/shinka/webui/visualization.py b/shinka/webui/visualization.py index 35cce086..24692bd1 100644 --- a/shinka/webui/visualization.py +++ b/shinka/webui/visualization.py @@ -160,13 +160,19 @@ def handle_get_programs(self, db_path: str): # Extract the actual path by removing the task name prefix if present actual_db_path = self._get_actual_db_path(db_path) - # Check cache first + # Check cache first - but reduce cache time for active databases + # Clear cache if it's been more than 2 seconds (for active experiments) if db_path in db_cache: last_fetch_time, cached_data = db_cache[db_path] - if time.time() - last_fetch_time < CACHE_EXPIRATION_SECONDS: - print(f"[SERVER] Serving from cache for DB: {db_path}") + cache_age = time.time() - last_fetch_time + if cache_age < 2.0: # Reduced from 5 to 2 seconds for more frequent updates + print(f"[SERVER] Serving from cache for DB: {db_path} (age: {cache_age:.1f}s, {len(cached_data)} programs)") self.send_json_response(cached_data) return + else: + # Cache expired, remove it + del db_cache[db_path] + print(f"[SERVER] Cache expired for DB: {db_path}, fetching fresh data") # Construct absolute path to the database from search root using actual path abs_db_path = os.path.join(self.search_root, actual_db_path) @@ -183,14 +189,14 @@ def handle_get_programs(self, db_path: str): db = None try: config = DatabaseConfig(db_path=abs_db_path) + # Use read_only=True to avoid locking issues + # SQLite URI read-only connections should see WAL data if properly configured db = ProgramDatabase(config, read_only=True) - - # Set WAL mode compatible settings for read-only connections + if db.cursor: - db.cursor.execute( - "PRAGMA busy_timeout = 10000;" - ) # 10 second timeout - db.cursor.execute("PRAGMA journal_mode = WAL;") # Ensure WAL mode + db.cursor.execute("PRAGMA busy_timeout = 10000;") + # Ensure WAL mode is enabled (should already be set by database) + db.cursor.execute("PRAGMA journal_mode = WAL;") programs = db.get_all_programs()