Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 3 additions & 8 deletions configs/evolution/large_budget.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,7 @@ evo_config:
max_patch_resamples: 3
max_patch_attempts: 3
llm_models:
- "gpt-4.1"
- "gpt-4.1-mini"
- "gpt-4.1-nano"
- "bedrock/us.anthropic.claude-sonnet-4-20250514-v1:0"
- "o4-mini"
llm_dynamic_selection: ucb
- "qwen3"
llm_kwargs:
temperatures:
- 0.0
Expand All @@ -27,10 +22,10 @@ evo_config:
max_tokens: 16384
meta_rec_interval: 10
meta_llm_models:
- "gpt-4.1"
- "qwen3"
meta_llm_kwargs:
temperatures:
- 0.0
embedding_model: "text-embedding-3-small"
embedding_model: "local-qwen3"
results_dir: ${output_dir}

38 changes: 29 additions & 9 deletions shinka/core/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from rich.table import Table
from rich.console import Console
import rich.box
from typing import List, Optional, Union, cast
from typing import List, Optional, Union, cast, Any
from datetime import datetime
from pathlib import Path
from dataclasses import dataclass, field, asdict
Expand Down Expand Up @@ -275,19 +275,39 @@ def _save_experiment_config(
db_config: DatabaseConfig,
) -> None:
"""Save experiment configuration to a YAML file."""
config_data = {
"evolution_config": asdict(evo_config),
"job_config": asdict(job_config),
"database_config": asdict(db_config),
"timestamp": datetime.now().isoformat(),
"results_directory": str(self.results_dir),
}
try:
# Use OmegaConf to convert to a YAML-safe structure
from omegaconf import OmegaConf

config_data = {
"evolution_config": asdict(evo_config),
"job_config": asdict(job_config),
"database_config": asdict(db_config),
"timestamp": datetime.now().isoformat(),
"results_directory": str(self.results_dir),
}

# Convert to OmegaConf and then to YAML-safe container
omega_conf = OmegaConf.create(config_data)
yaml_safe_dict = OmegaConf.to_container(omega_conf, resolve=True)

except Exception as e:
# Fallback: use simple dict conversion, filtering out None values
logger.warning(f"Failed to use OmegaConf for config serialization: {e}, using fallback")
config_data = {
"evolution_config": {k: v for k, v in asdict(evo_config).items() if v is not None},
"job_config": {k: v for k, v in asdict(job_config).items() if v is not None},
"database_config": {k: v for k, v in asdict(db_config).items() if v is not None},
"timestamp": datetime.now().isoformat(),
"results_directory": str(self.results_dir),
}
yaml_safe_dict = config_data

config_path = Path(self.results_dir) / "experiment_config.yaml"
config_path.parent.mkdir(parents=True, exist_ok=True)

with config_path.open("w", encoding="utf-8") as f:
yaml.dump(config_data, f, default_flow_style=False, indent=2)
yaml.dump(yaml_safe_dict, f, default_flow_style=False, indent=2, allow_unicode=True)

logger.info(f"Experiment configuration saved to {config_path}")

Expand Down
8 changes: 8 additions & 0 deletions shinka/llm/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
OPENAI_MODELS,
DEEPSEEK_MODELS,
GEMINI_MODELS,
LOCAL_MODELS,
)

env_path = Path(__file__).parent.parent.parent / ".env"
Expand Down Expand Up @@ -78,6 +79,13 @@ def get_client_llm(model_name: str, structured_output: bool = False) -> Tuple[An
client,
mode=instructor.Mode.GEMINI_JSON,
)
elif model_name in LOCAL_MODELS.keys():
client = openai.OpenAI(
api_key="not-needed", # Local models don't need API key
base_url="http://localhost:8000/v1",
)
if structured_output:
raise NotImplementedError("Structured output not supported for local models.")
else:
raise ValueError(f"Model {model_name} not supported.")

Expand Down
36 changes: 35 additions & 1 deletion shinka/llm/embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@
"gemini-embedding-001",
]

LOCAL_EMBEDDING_MODELS = [
"local-qwen3", # Local embedding model name
]

OPENAI_EMBEDDING_COSTS = {
"text-embedding-3-small": 0.02 / M,
"text-embedding-3-large": 0.13 / M,
Expand All @@ -37,6 +41,11 @@
"gemini-embedding-001": 0.0 / M, # Check current pricing
}

# Local embedding costs (free)
LOCAL_EMBEDDING_COSTS = {
"qwen3": 0.0 / M, # Free local model
}

def get_client_model(model_name: str) -> tuple[Union[openai.OpenAI, str], str]:
if model_name in OPENAI_EMBEDDING_MODELS:
client = openai.OpenAI()
Expand All @@ -57,6 +66,13 @@ def get_client_model(model_name: str) -> tuple[Union[openai.OpenAI, str], str]:
genai.configure(api_key=api_key)
client = "gemini" # Use string identifier for Gemini
model_to_use = model_name
elif model_name in LOCAL_EMBEDDING_MODELS:
# Local OpenAI-compatible embedding model
client = openai.OpenAI(
api_key="not-needed", # Local models don't need API key
base_url="http://localhost:8000/v1",
)
model_to_use = "qwen3" # Use the actual model name for the API
else:
raise ValueError(f"Invalid embedding model: {model_name}")

Expand Down Expand Up @@ -128,8 +144,26 @@ def get_embedding(
response = self.client.embeddings.create(
model=self.model, input=code, encoding_format="float"
)
cost = response.usage.total_tokens * OPENAI_EMBEDDING_COSTS[self.model]
# Get cost - use local cost if it's a local model, otherwise use OpenAI cost
# Handle cases where usage might be None (local models)
if response.usage and hasattr(response.usage, 'total_tokens'):
total_tokens = response.usage.total_tokens
else:
# Estimate tokens for local models (rough approximation)
total_tokens = sum(len(text.split()) for text in code)

if self.model_name in LOCAL_EMBEDDING_MODELS:
cost = total_tokens * LOCAL_EMBEDDING_COSTS.get(self.model, 0.0)
else:
cost = total_tokens * OPENAI_EMBEDDING_COSTS.get(self.model, 0.0)
# Extract embedding from response
if response.data is None:
logger.error("Embedding response data is None")
if single_code:
return [], cost
else:
return [[]], cost

if single_code:
return response.data[0].embedding, cost
else:
Expand Down
2 changes: 2 additions & 0 deletions shinka/llm/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@
from .openai import query_openai
from .deepseek import query_deepseek
from .gemini import query_gemini
from .local import query_local
from .result import QueryResult

__all__ = [
"query_anthropic",
"query_openai",
"query_deepseek",
"query_gemini",
"query_local",
"QueryResult",
]
88 changes: 88 additions & 0 deletions shinka/llm/models/local.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import backoff
import openai
from .pricing import LOCAL_MODELS
from .result import QueryResult
import logging

logger = logging.getLogger(__name__)


def backoff_handler(details):
exc = details.get("exception")
if exc:
logger.info(
f"Local LLM - Retry {details['tries']} due to error: {exc}. Waiting {details['wait']:0.1f}s..."
)


@backoff.on_exception(
backoff.expo,
(
openai.APIConnectionError,
openai.APIStatusError,
openai.RateLimitError,
openai.APITimeoutError,
),
max_tries=5,
max_value=20,
on_backoff=backoff_handler,
)
def query_local(
client,
model,
msg,
system_msg,
msg_history,
output_model,
model_posteriors=None,
**kwargs,
) -> QueryResult:
"""Query local OpenAI-compatible model."""
if output_model is not None:
raise NotImplementedError("Structured output not supported for local models.")
new_msg_history = msg_history + [{"role": "user", "content": msg}]

# Convert max_output_tokens to max_tokens for OpenAI-compatible API
local_kwargs = kwargs.copy()
if "max_output_tokens" in local_kwargs:
local_kwargs["max_tokens"] = local_kwargs.pop("max_output_tokens")

response = client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": system_msg},
*new_msg_history,
],
**local_kwargs,
n=1,
stop=None,
)
content = response.choices[0].message.content
try:
thought = response.choices[0].message.reasoning_content
except:
thought = ""
new_msg_history.append({"role": "assistant", "content": content})

# Get token usage, defaulting to 0 if not available
input_tokens = getattr(response.usage, 'prompt_tokens', 0) if response.usage else 0
output_tokens = getattr(response.usage, 'completion_tokens', 0) if response.usage else 0

input_cost = LOCAL_MODELS[model]["input_price"] * input_tokens
output_cost = LOCAL_MODELS[model]["output_price"] * output_tokens
return QueryResult(
content=content,
msg=msg,
system_msg=system_msg,
new_msg_history=new_msg_history,
model_name=model,
kwargs=kwargs,
input_tokens=input_tokens,
output_tokens=output_tokens,
cost=input_cost + output_cost,
input_cost=input_cost,
output_cost=output_cost,
thought=thought,
model_posteriors=model_posteriors,
)

7 changes: 7 additions & 0 deletions shinka/llm/models/pricing.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,13 @@
],
}

LOCAL_MODELS = {
"qwen3": {
"input_price": 0.0 / M, # Free local model
"output_price": 0.0 / M, # Free local model
},
}

REASONING_OAI_MODELS = [
"o3-mini-2025-01-31",
"o1-2024-12-17",
Expand Down
4 changes: 4 additions & 0 deletions shinka/llm/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
DEEPSEEK_MODELS,
GEMINI_MODELS,
BEDROCK_MODELS,
LOCAL_MODELS,
REASONING_OAI_MODELS,
REASONING_CLAUDE_MODELS,
REASONING_DEEPSEEK_MODELS,
Expand All @@ -20,6 +21,7 @@
query_openai,
query_deepseek,
query_gemini,
query_local,
QueryResult,
)
import logging
Expand Down Expand Up @@ -204,6 +206,8 @@ def query(
query_fn = query_deepseek
elif model_name in GEMINI_MODELS.keys():
query_fn = query_gemini
elif model_name in LOCAL_MODELS.keys():
query_fn = query_local
else:
raise ValueError(f"Model {model_name} not supported.")
result = query_fn(
Expand Down
24 changes: 15 additions & 9 deletions shinka/webui/visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,13 +160,19 @@ def handle_get_programs(self, db_path: str):
# Extract the actual path by removing the task name prefix if present
actual_db_path = self._get_actual_db_path(db_path)

# Check cache first
# Check cache first - but reduce cache time for active databases
# Clear cache if it's been more than 2 seconds (for active experiments)
if db_path in db_cache:
last_fetch_time, cached_data = db_cache[db_path]
if time.time() - last_fetch_time < CACHE_EXPIRATION_SECONDS:
print(f"[SERVER] Serving from cache for DB: {db_path}")
cache_age = time.time() - last_fetch_time
if cache_age < 2.0: # Reduced from 5 to 2 seconds for more frequent updates
print(f"[SERVER] Serving from cache for DB: {db_path} (age: {cache_age:.1f}s, {len(cached_data)} programs)")
self.send_json_response(cached_data)
return
else:
# Cache expired, remove it
del db_cache[db_path]
print(f"[SERVER] Cache expired for DB: {db_path}, fetching fresh data")

# Construct absolute path to the database from search root using actual path
abs_db_path = os.path.join(self.search_root, actual_db_path)
Expand All @@ -183,14 +189,14 @@ def handle_get_programs(self, db_path: str):
db = None
try:
config = DatabaseConfig(db_path=abs_db_path)
# Use read_only=True to avoid locking issues
# SQLite URI read-only connections should see WAL data if properly configured
db = ProgramDatabase(config, read_only=True)

# Set WAL mode compatible settings for read-only connections

if db.cursor:
db.cursor.execute(
"PRAGMA busy_timeout = 10000;"
) # 10 second timeout
db.cursor.execute("PRAGMA journal_mode = WAL;") # Ensure WAL mode
db.cursor.execute("PRAGMA busy_timeout = 10000;")
# Ensure WAL mode is enabled (should already be set by database)
db.cursor.execute("PRAGMA journal_mode = WAL;")

programs = db.get_all_programs()

Expand Down