From 9325a29b4ea8984370a4fb27d381c5c718272d66 Mon Sep 17 00:00:00 2001
From: Yuki Imajuku <yuki.imjk@gmail.com>
Date: Mon, 27 Oct 2025 11:20:06 +0900
Subject: [PATCH] Add new models (2025-10-17)

---
 llm_configs/claude-4.5-haiku.json       | 17 ++++++++++++++++
 llm_configs/deepseek-v3.1-terminus.json |  2 +-
 llm_configs/glm-4.6.json                |  3 +--
 llm_configs/qwen3-coder-plus.json       | 15 ++++++++++++++
 src/ale_bench_eval/calc_cost.py         | 26 ++++++++++++++++++++++++-
 5 files changed, 59 insertions(+), 4 deletions(-)
 create mode 100644 llm_configs/claude-4.5-haiku.json
 create mode 100644 llm_configs/qwen3-coder-plus.json

diff --git a/llm_configs/claude-4.5-haiku.json b/llm_configs/claude-4.5-haiku.json
new file mode 100644
index 0000000..df6ae92
--- /dev/null
+++ b/llm_configs/claude-4.5-haiku.json
@@ -0,0 +1,17 @@
+{
+    "model_name": "anthropic/claude-haiku-4.5",
+    "provider": "openrouter",
+    "settings": {
+        "temperature": 1.0,
+        "max_tokens": 64000,
+        "extra_body": {
+            "reasoning": {
+                "enabled": true,
+                "max_tokens": 32000
+            },
+            "usage": {
+                "include": true
+            }
+        }
+    }
+}
diff --git a/llm_configs/deepseek-v3.1-terminus.json b/llm_configs/deepseek-v3.1-terminus.json
index 5c19c4c..431293f 100644
--- a/llm_configs/deepseek-v3.1-terminus.json
+++ b/llm_configs/deepseek-v3.1-terminus.json
@@ -8,7 +8,7 @@
             "provider": {
                 "allow_fallbacks": false,
                 "data_collection": "deny",
-                "order": ["siliconflow/fp8"],
+                "order": ["deepinfra/fp4"],
                 "require_parameters": true
             },
             "reasoning": {
diff --git a/llm_configs/glm-4.6.json b/llm_configs/glm-4.6.json
index 87ec9ee..81cca0b 100644
--- a/llm_configs/glm-4.6.json
+++ b/llm_configs/glm-4.6.json
@@ -5,11 +5,10 @@
         "temperature": 1.0,
         "top_p": 0.95,
         "extra_body": {
-            "top_k": 40,
             "provider": {
                 "allow_fallbacks": false,
                 "data_collection": "deny",
-                "order": ["parasail/fp8"],
+                "order": ["z-ai"],
                 "require_parameters": true
             },
             "reasoning": {
diff --git a/llm_configs/qwen3-coder-plus.json b/llm_configs/qwen3-coder-plus.json
new file mode 100644
index 0000000..6843e09
--- /dev/null
+++ b/llm_configs/qwen3-coder-plus.json
@@ -0,0 +1,15 @@
+{
+    "model_name": "qwen/qwen3-coder-plus",
+    "provider": "openrouter",
+    "settings": {
+        "temperature": 0.7,
+        "max_tokens": 65536,
+        "top_p": 0.8,
+        "extra_body": {
+            "top_k": 20,
+            "usage": {
+                "include": true
+            }
+        }
+    }
+}
diff --git a/src/ale_bench_eval/calc_cost.py b/src/ale_bench_eval/calc_cost.py
index 02bf40d..5097c10 100644
--- a/src/ale_bench_eval/calc_cost.py
+++ b/src/ale_bench_eval/calc_cost.py
@@ -36,11 +36,21 @@
         ),
         output_mtok=TieredPrices(base=Decimal(15), tiers=[Tier(start=200000, price=Decimal(225) / Decimal(10))]),
     ),
+    "claude-haiku-4.5": ModelPrice(
+        input_mtok=Decimal(1),
+        cache_write_mtok=Decimal(125) / Decimal(100),
+        cache_read_mtok=Decimal(1) / Decimal(10),
+        output_mtok=Decimal(5),
+    ),
     "deepseek-v3.1": ModelPrice(input_mtok=Decimal(56) / Decimal(100), output_mtok=Decimal(168) / Decimal(100)),
     "deepseek-v3.1-terminus": ModelPrice(input_mtok=Decimal(27) / Decimal(100), output_mtok=Decimal(1)),
     "deepseek-r1-0528": ModelPrice(input_mtok=Decimal(79) / Decimal(100), output_mtok=Decimal(4)),
     "glm-4.5": ModelPrice(input_mtok=Decimal(59) / Decimal(100), output_mtok=Decimal(21) / Decimal(10)),
-    "glm-4.6": ModelPrice(input_mtok=Decimal(6) / Decimal(10), output_mtok=Decimal(21) / Decimal(10)),
+    "glm-4.6": ModelPrice(
+        input_mtok=Decimal(6) / Decimal(10),
+        output_mtok=Decimal(22) / Decimal(10),
+        cache_read_mtok=Decimal(11) / Decimal(100),
+    ),
     "gpt-oss-120b": ModelPrice(input_mtok=Decimal(1) / Decimal(10), output_mtok=Decimal(5) / Decimal(10)),
     "gpt-oss-20b": ModelPrice(input_mtok=Decimal(5) / Decimal(100), output_mtok=Decimal(2) / Decimal(10)),
     "grok-code-fast-1": ModelPrice(input_mtok=Decimal(2) / Decimal(10), output_mtok=Decimal(15) / Decimal(10)),
@@ -61,6 +71,20 @@
         input_mtok=Decimal(3) / Decimal(10), output_mtok=Decimal(29) / Decimal(10)
     ),
     "qwen3-coder": ModelPrice(input_mtok=Decimal(29) / Decimal(100), output_mtok=Decimal(12) / Decimal(10)),
+    "qwen3-coder-plus": ModelPrice(
+        input_mtok=TieredPrices(
+            base=Decimal(1),
+            tiers=[Tier(start=32000, price=Decimal(18) / Decimal(10))],
+        ),
+        output_mtok=TieredPrices(
+            base=Decimal(5),
+            tiers=[Tier(start=32000, price=Decimal(9))],
+        ),
+        cache_read_mtok=TieredPrices(
+            base=Decimal(1) / Decimal(10),
+            tiers=[Tier(start=32000, price=Decimal(18) / Decimal(100))],
+        ),
+    ),
     "qwen3-max": ModelPrice(
         input_mtok=TieredPrices(base=Decimal(12) / Decimal(10), tiers=[Tier(start=128000, price=Decimal(3))]),
         cache_read_mtok=TieredPrices(