Skip to content

Commit 9d75e8d

Browse files
authored
Merge pull request #39 from pinchbench/scuttlebot/fail-fast-bad-model
2 parents 2a21f71 + 31a714b commit 9d75e8d

2 files changed

Lines changed: 115 additions & 0 deletions

File tree

scripts/benchmark.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,9 @@
2727
cleanup_agent_sessions,
2828
ensure_agent_exists,
2929
execute_openclaw_task,
30+
ModelValidationError,
3031
slugify_model,
32+
validate_openrouter_model,
3133
)
3234
from lib_grading import GradeResult, grade_task
3335
from lib_tasks import Task, TaskLoader
@@ -559,6 +561,13 @@ def main():
559561
# Use a shared workspace for the agent - we'll copy fixtures per task
560562
agent_workspace = Path(f"/tmp/pinchbench/{run_id}/agent_workspace")
561563

564+
# Validate model exists before wasting time on tasks
565+
try:
566+
validate_openrouter_model(args.model)
567+
except ModelValidationError as exc:
568+
logger.error("❌ %s", exc)
569+
sys.exit(1)
570+
562571
ensure_agent_exists(agent_id, args.model, agent_workspace)
563572
cleanup_agent_sessions(agent_id)
564573

scripts/lib_agent.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,19 @@
1111
import time
1212
from pathlib import Path
1313
from typing import Any, Dict, List
14+
from urllib import error, request
1415

1516
from lib_tasks import Task
1617

1718

1819
logger = logging.getLogger(__name__)
20+
21+
22+
class ModelValidationError(Exception):
23+
"""Raised when a model ID is invalid or inaccessible."""
24+
pass
25+
26+
1927
MAX_OPENCLAW_MESSAGE_CHARS = int(os.environ.get("PINCHBENCH_MAX_MSG_CHARS", "4000"))
2028

2129

@@ -24,6 +32,104 @@ def slugify_model(model_id: str) -> str:
2432

2533

2634

35+
def validate_openrouter_model(model_id: str, timeout_seconds: float = 10.0) -> bool:
36+
"""
37+
Validate that a model ID exists on OpenRouter.
38+
39+
Args:
40+
model_id: Model ID (with or without openrouter/ prefix)
41+
timeout_seconds: HTTP request timeout
42+
43+
Returns:
44+
True if model is valid and accessible
45+
46+
Raises:
47+
ModelValidationError: If model doesn't exist or validation fails
48+
"""
49+
# Strip openrouter/ prefix if present
50+
bare_model_id = model_id
51+
if bare_model_id.startswith("openrouter/"):
52+
bare_model_id = bare_model_id[len("openrouter/"):]
53+
54+
# Skip validation for non-OpenRouter models
55+
if "/" not in bare_model_id:
56+
logger.info("Skipping model validation for non-OpenRouter model: %s", model_id)
57+
return True
58+
59+
api_key = os.environ.get("OPENROUTER_API_KEY")
60+
if not api_key:
61+
logger.warning("OPENROUTER_API_KEY not set, skipping model validation")
62+
return True
63+
64+
logger.info("🔍 Validating model: %s", bare_model_id)
65+
66+
headers = {
67+
"Authorization": f"Bearer {api_key}",
68+
"HTTP-Referer": "https://pinchbench.com",
69+
"X-Title": "PinchBench",
70+
}
71+
72+
# First, try the specific model endpoint (fast path for valid models)
73+
encoded_model_id = bare_model_id.replace("/", "%2F")
74+
specific_endpoint = f"https://openrouter.ai/api/v1/models/{encoded_model_id}"
75+
req = request.Request(specific_endpoint, headers=headers, method="GET")
76+
try:
77+
with request.urlopen(req, timeout=timeout_seconds) as resp:
78+
# Model exists - validation passed
79+
logger.info("✅ Model validated: %s", bare_model_id)
80+
return True
81+
except error.HTTPError as exc:
82+
if exc.code == 404:
83+
# Model not found - fall through to fetch full catalog for suggestions
84+
pass
85+
else:
86+
logger.warning("OpenRouter API error during validation: %s", exc)
87+
return True
88+
except error.URLError as exc:
89+
logger.warning("Network error during model validation: %s", exc)
90+
return True
91+
92+
# Model not found - fetch full catalog for "did you mean" suggestions
93+
catalog_endpoint = "https://openrouter.ai/api/v1/models"
94+
req = request.Request(catalog_endpoint, headers=headers, method="GET")
95+
try:
96+
with request.urlopen(req, timeout=timeout_seconds) as resp:
97+
data = json.loads(resp.read().decode("utf-8"))
98+
except error.HTTPError as exc:
99+
logger.warning("OpenRouter API error fetching model catalog: %s", exc)
100+
raise ModelValidationError(f"Model '{bare_model_id}' not found on OpenRouter.")
101+
except error.URLError as exc:
102+
logger.warning("Network error fetching model catalog: %s", exc)
103+
raise ModelValidationError(f"Model '{bare_model_id}' not found on OpenRouter.")
104+
except json.JSONDecodeError as exc:
105+
logger.warning("Failed to parse OpenRouter response: %s", exc)
106+
raise ModelValidationError(f"Model '{bare_model_id}' not found on OpenRouter.")
107+
108+
models = data.get("data", [])
109+
model_ids = {m.get("id") for m in models if isinstance(m, dict) and m.get("id")}
110+
111+
# Check for close matches (typos)
112+
close_matches = []
113+
bare_lower = bare_model_id.lower()
114+
for mid in model_ids:
115+
if bare_lower in mid.lower() or mid.lower() in bare_lower:
116+
close_matches.append(mid)
117+
118+
error_msg = f"Model '{bare_model_id}' not found on OpenRouter."
119+
if close_matches:
120+
close_matches_str = ", ".join(sorted(close_matches)[:5])
121+
error_msg += f" Did you mean: {close_matches_str}?"
122+
else:
123+
# Try to suggest based on provider
124+
provider = bare_model_id.split("/")[0] if "/" in bare_model_id else None
125+
if provider:
126+
provider_models = [m for m in model_ids if m.startswith(f"{provider}/")]
127+
if provider_models:
128+
error_msg += f" Available {provider} models: {', '.join(sorted(provider_models)[:5])}"
129+
130+
raise ModelValidationError(error_msg)
131+
132+
27133
def _get_agent_workspace(agent_id: str) -> Path | None:
28134
"""Get the workspace path for an agent from OpenClaw config."""
29135
try:

0 commit comments

Comments
 (0)