From 9325a29b4ea8984370a4fb27d381c5c718272d66 Mon Sep 17 00:00:00 2001 From: Yuki Imajuku Date: Mon, 27 Oct 2025 11:20:06 +0900 Subject: [PATCH] Add new models (2025-10-17) --- llm_configs/claude-4.5-haiku.json | 17 ++++++++++++++++ llm_configs/deepseek-v3.1-terminus.json | 2 +- llm_configs/glm-4.6.json | 3 +-- llm_configs/qwen3-coder-plus.json | 15 ++++++++++++++ src/ale_bench_eval/calc_cost.py | 26 ++++++++++++++++++++++++- 5 files changed, 59 insertions(+), 4 deletions(-) create mode 100644 llm_configs/claude-4.5-haiku.json create mode 100644 llm_configs/qwen3-coder-plus.json diff --git a/llm_configs/claude-4.5-haiku.json b/llm_configs/claude-4.5-haiku.json new file mode 100644 index 0000000..df6ae92 --- /dev/null +++ b/llm_configs/claude-4.5-haiku.json @@ -0,0 +1,17 @@ +{ + "model_name": "anthropic/claude-haiku-4.5", + "provider": "openrouter", + "settings": { + "temperature": 1.0, + "max_tokens": 64000, + "extra_body": { + "reasoning": { + "enabled": true, + "max_tokens": 32000 + }, + "usage": { + "include": true + } + } + } +} diff --git a/llm_configs/deepseek-v3.1-terminus.json b/llm_configs/deepseek-v3.1-terminus.json index 5c19c4c..431293f 100644 --- a/llm_configs/deepseek-v3.1-terminus.json +++ b/llm_configs/deepseek-v3.1-terminus.json @@ -8,7 +8,7 @@ "provider": { "allow_fallbacks": false, "data_collection": "deny", - "order": ["siliconflow/fp8"], + "order": ["deepinfra/fp4"], "require_parameters": true }, "reasoning": { diff --git a/llm_configs/glm-4.6.json b/llm_configs/glm-4.6.json index 87ec9ee..81cca0b 100644 --- a/llm_configs/glm-4.6.json +++ b/llm_configs/glm-4.6.json @@ -5,11 +5,10 @@ "temperature": 1.0, "top_p": 0.95, "extra_body": { - "top_k": 40, "provider": { "allow_fallbacks": false, "data_collection": "deny", - "order": ["parasail/fp8"], + "order": ["z-ai"], "require_parameters": true }, "reasoning": { diff --git a/llm_configs/qwen3-coder-plus.json b/llm_configs/qwen3-coder-plus.json new file mode 100644 index 0000000..6843e09 --- /dev/null +++ b/llm_configs/qwen3-coder-plus.json @@ -0,0 +1,15 @@ +{ + "model_name": "qwen/qwen3-coder-plus", + "provider": "openrouter", + "settings": { + "temperature": 0.7, + "max_tokens": 65536, + "top_p": 0.8, + "extra_body": { + "top_k": 20, + "usage": { + "include": true + } + } + } +} diff --git a/src/ale_bench_eval/calc_cost.py b/src/ale_bench_eval/calc_cost.py index 02bf40d..5097c10 100644 --- a/src/ale_bench_eval/calc_cost.py +++ b/src/ale_bench_eval/calc_cost.py @@ -36,11 +36,21 @@ ), output_mtok=TieredPrices(base=Decimal(15), tiers=[Tier(start=200000, price=Decimal(225) / Decimal(10))]), ), + "claude-haiku-4.5": ModelPrice( + input_mtok=Decimal(1), + cache_write_mtok=Decimal(125) / Decimal(100), + cache_read_mtok=Decimal(1) / Decimal(10), + output_mtok=Decimal(5), + ), "deepseek-v3.1": ModelPrice(input_mtok=Decimal(56) / Decimal(100), output_mtok=Decimal(168) / Decimal(100)), "deepseek-v3.1-terminus": ModelPrice(input_mtok=Decimal(27) / Decimal(100), output_mtok=Decimal(1)), "deepseek-r1-0528": ModelPrice(input_mtok=Decimal(79) / Decimal(100), output_mtok=Decimal(4)), "glm-4.5": ModelPrice(input_mtok=Decimal(59) / Decimal(100), output_mtok=Decimal(21) / Decimal(10)), - "glm-4.6": ModelPrice(input_mtok=Decimal(6) / Decimal(10), output_mtok=Decimal(21) / Decimal(10)), + "glm-4.6": ModelPrice( + input_mtok=Decimal(6) / Decimal(10), + output_mtok=Decimal(22) / Decimal(10), + cache_read_mtok=Decimal(11) / Decimal(100), + ), "gpt-oss-120b": ModelPrice(input_mtok=Decimal(1) / Decimal(10), output_mtok=Decimal(5) / Decimal(10)), "gpt-oss-20b": ModelPrice(input_mtok=Decimal(5) / Decimal(100), output_mtok=Decimal(2) / Decimal(10)), "grok-code-fast-1": ModelPrice(input_mtok=Decimal(2) / Decimal(10), output_mtok=Decimal(15) / Decimal(10)), @@ -61,6 +71,20 @@ input_mtok=Decimal(3) / Decimal(10), output_mtok=Decimal(29) / Decimal(10) ), "qwen3-coder": ModelPrice(input_mtok=Decimal(29) / Decimal(100), output_mtok=Decimal(12) / Decimal(10)), + "qwen3-coder-plus": ModelPrice( + input_mtok=TieredPrices( + base=Decimal(1), + tiers=[Tier(start=32000, price=Decimal(18) / Decimal(10))], + ), + output_mtok=TieredPrices( + base=Decimal(5), + tiers=[Tier(start=32000, price=Decimal(9))], + ), + cache_read_mtok=TieredPrices( + base=Decimal(1) / Decimal(10), + tiers=[Tier(start=32000, price=Decimal(18) / Decimal(100))], + ), + ), "qwen3-max": ModelPrice( input_mtok=TieredPrices(base=Decimal(12) / Decimal(10), tiers=[Tier(start=128000, price=Decimal(3))]), cache_read_mtok=TieredPrices(