From 38c34902466ea95e0cac8372772923e1bed7a517 Mon Sep 17 00:00:00 2001 From: 0x4007 Date: Tue, 12 May 2026 15:59:50 -0400 Subject: [PATCH] Expose model context windows --- README.md | 4 ++++ scripts/codex-models.ts | 14 ++++++++++++++ src/admin.ts | 14 ++++++++++++++ src/openai.ts | 3 +++ static/chat.js | 11 +++++++++++ static/docs/llms-agents.md | 9 +++++---- tests/admin-auth.test.ts | 17 ++++++++++++++++- tests/codex-models.test.ts | 5 ++++- tests/openai-compat.test.ts | 9 +++++++++ 9 files changed, 80 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index d7bad26..47c5fae 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,10 @@ curl -sS https://ai.ubq.fi/uos/models/capabilities \ -H "Authorization: Bearer $UOS_AI_TOKEN" ``` +Use this endpoint, not `/v1/models`, when clients need gateway-specific fields such as reasoning support or token-window +limits (`context_window_tokens`, `max_context_window_tokens`, and `auto_compact_token_limit_tokens`). `/v1/models` keeps +the strict OpenAI model-object shape. + Whoami (debug which auth method was used; never returns raw secrets): ```bash diff --git a/scripts/codex-models.ts b/scripts/codex-models.ts index 164dd97..b8b3ede 100644 --- a/scripts/codex-models.ts +++ b/scripts/codex-models.ts @@ -209,6 +209,12 @@ const isRecord = (value: unknown): value is Record => const getString = (value: unknown): string | null => (typeof value === "string" ? value : null); +const getNonNegativeInteger = (value: unknown): number | null => { + if (typeof value !== "number" || !Number.isFinite(value)) return null; + const normalized = Math.trunc(value); + return normalized >= 0 ? normalized : null; +}; + const isHiddenCodexModel = (value: Record): boolean => getString(value.visibility)?.trim().toLowerCase() === "hide"; @@ -241,6 +247,14 @@ export const extractCodexModelsFromText = (text: string): ExtractedCodexModels | if (displayName) normalized.display_name = displayName; const description = getString(parsed.description); if (description) normalized.description = description; + for (const key of ["context_window", "max_context_window", "auto_compact_token_limit"]) { + if (parsed[key] === null) { + normalized[key] = null; + continue; + } + const count = getNonNegativeInteger(parsed[key]); + if (count !== null) normalized[key] = count; + } const defaultReasoning = getString(parsed.default_reasoning_level); if (defaultReasoning) normalized.default_reasoning_level = defaultReasoning; if (Array.isArray(parsed.supported_reasoning_levels)) { diff --git a/src/admin.ts b/src/admin.ts index e308dca..b3722d4 100644 --- a/src/admin.ts +++ b/src/admin.ts @@ -75,6 +75,12 @@ const MAX_KV_MIGRATION_BODY_BYTES = 5 * 1024 * 1024; const isHiddenCodexModel = (value: Record): boolean => getString(value.visibility)?.trim().toLowerCase() === "hide"; +const normalizeNonNegativeInteger = (value: unknown): number | null => { + if (typeof value !== "number" || !Number.isFinite(value)) return null; + const normalized = Math.trunc(value); + return normalized >= 0 ? normalized : null; +}; + const resolveDefaultModel = async (entryValue: unknown): Promise => { const configured = typeof entryValue === "string" ? entryValue.trim() : ""; if (configured) return configured; @@ -611,6 +617,14 @@ const normalizeCodexModelsPayload = (value: unknown): CodexModelsSnapshot | null if (displayName) normalized.display_name = displayName; const description = getString(item.description); if (description) normalized.description = description; + for (const key of ["context_window", "max_context_window", "auto_compact_token_limit"]) { + if (item[key] === null) { + normalized[key] = null; + continue; + } + const count = normalizeNonNegativeInteger(item[key]); + if (count !== null) normalized[key] = count; + } const defaultReasoning = getString(item.default_reasoning_level); if (defaultReasoning) normalized.default_reasoning_level = defaultReasoning; if (Array.isArray(item.supported_reasoning_levels)) { diff --git a/src/openai.ts b/src/openai.ts index c78fd60..8230585 100644 --- a/src/openai.ts +++ b/src/openai.ts @@ -1308,6 +1308,9 @@ const normalizeModelCapabilitiesEntry = (value: unknown): Record { merged.default_reasoning_effort = defaultReasoning; merged.default_reasoning_level = defaultReasoning; } + for ( + const key of [ + "context_window_tokens", + "max_context_window_tokens", + "auto_compact_token_limit_tokens", + ] + ) { + if (typeof capability[key] === "number" || capability[key] === null) { + merged[key] = capability[key]; + } + } return merged; }); }; diff --git a/static/docs/llms-agents.md b/static/docs/llms-agents.md index 7a0f175..5146bb9 100644 --- a/static/docs/llms-agents.md +++ b/static/docs/llms-agents.md @@ -135,8 +135,9 @@ backed by Codex with a ChatGPT account, so some OpenAI API model aliases may not Hidden Codex catalog entries such as internal review models are filtered during snapshot upload and are not exposed. Use `GET /uos/models/capabilities` for gateway-specific model metadata such as `supported_reasoning_levels`, -`default_reasoning_effort`, `supported_endpoints`, and `upstream_provider`. This metadata is intentionally not included -in `/v1/models` so OpenAI-compatible SDKs receive an OpenAI-shaped response. +`default_reasoning_effort`, `context_window_tokens`, `max_context_window_tokens`, `auto_compact_token_limit_tokens`, +`supported_endpoints`, and `upstream_provider`. This metadata is intentionally not included in `/v1/models` so +OpenAI-compatible SDKs receive an OpenAI-shaped response. Observed integration behavior: @@ -330,8 +331,8 @@ Defaults can be managed via `/admin/defaults` (admin auth required). When no mod uses the first model in the current Codex model snapshot. If neither a configured default nor a snapshot is available, no-model requests fail with `503` instead of fetching a live fallback catalog. -`GET /uos/models/capabilities` is the endpoint to inspect reasoning support programmatically. `/v1/models` remains -OpenAI-compatible and does not include reasoning metadata. +`GET /uos/models/capabilities` is the endpoint to inspect reasoning support and token-window limits programmatically. +`/v1/models` remains OpenAI-compatible and does not include gateway metadata. ## Ignored parameters and warnings diff --git a/tests/admin-auth.test.ts b/tests/admin-auth.test.ts index fd85436..8ff516d 100644 --- a/tests/admin-auth.test.ts +++ b/tests/admin-auth.test.ts @@ -102,6 +102,9 @@ Deno.test("admin codex auth stores CLI model snapshot as source of truth", async models: [{ slug: "gpt-5.5", display_name: "GPT-5.5", + context_window: 272000, + max_context_window: 1000000, + auto_compact_token_limit: null, supported_reasoning_levels: ["low", "medium", "high", "xhigh"], }, { slug: "codex-auto-review", @@ -120,11 +123,23 @@ Deno.test("admin codex auth stores CLI model snapshot as source of truth", async assert.equal(fetchUrls.length, 1); const stored = kvStore.get(keyToString(["ubq_ai", "codex_models"])) as - | { source?: string; client_version?: string; models?: Array<{ slug?: string }> } + | { + source?: string; + client_version?: string; + models?: Array<{ + slug?: string; + context_window?: number; + max_context_window?: number; + auto_compact_token_limit?: number | null; + }>; + } | undefined; assert.equal(stored?.source, "codex_cli"); assert.equal(stored?.client_version, "0.126.0"); assert.deepEqual(stored?.models?.map((model) => model.slug), ["gpt-5.5"]); + assert.equal(stored?.models?.[0]?.context_window, 272000); + assert.equal(stored?.models?.[0]?.max_context_window, 1000000); + assert.equal(stored?.models?.[0]?.auto_compact_token_limit, null); } finally { globalThis.fetch = originalFetch; } diff --git a/tests/codex-models.test.ts b/tests/codex-models.test.ts index a341823..c7a4559 100644 --- a/tests/codex-models.test.ts +++ b/tests/codex-models.test.ts @@ -95,11 +95,14 @@ let vendorRoot; Deno.test("extractCodexModelsFromText parses slugs and reasoning levels", () => { const text = - 'codex_cli_rs/0.99.0 {"slug":"gpt-5.2-codex","supported_reasoning_levels":[{"effort":"low"},{"effort":"high"}]}'; + 'codex_cli_rs/0.99.0 {"slug":"gpt-5.2-codex","context_window":272000,"max_context_window":1000000,"auto_compact_token_limit":null,"supported_reasoning_levels":[{"effort":"low"},{"effort":"high"}]}'; const extracted = extractCodexModelsFromText(text); assert.ok(extracted); assert.equal(extracted?.clientVersion, "0.99.0"); assert.equal(extracted?.models[0]?.slug, "gpt-5.2-codex"); + assert.equal(extracted?.models[0]?.context_window, 272000); + assert.equal(extracted?.models[0]?.max_context_window, 1000000); + assert.equal(extracted?.models[0]?.auto_compact_token_limit, null); assert.deepEqual(extracted?.models[0]?.supported_reasoning_levels, ["low", "high"]); }); diff --git a/tests/openai-compat.test.ts b/tests/openai-compat.test.ts index 6551217..4eee25e 100644 --- a/tests/openai-compat.test.ts +++ b/tests/openai-compat.test.ts @@ -21,6 +21,9 @@ kvStore.set(keyToString(TEST_CODEX_MODELS_KEY), { models: [{ slug: DEFAULT_TEST_MODEL, display_name: "GPT-5 Fixture Default", + context_window: 272000, + max_context_window: 1000000, + auto_compact_token_limit: null, default_reasoning_level: "medium", supported_reasoning_levels: ["low", "medium", "high", "xhigh"], }], @@ -475,6 +478,9 @@ Deno.test("openai: model capabilities are exposed outside /v1 model objects", as supported_endpoints?: string[]; supported_reasoning_levels?: string[]; default_reasoning_effort?: string | null; + context_window_tokens?: number | null; + max_context_window_tokens?: number | null; + auto_compact_token_limit_tokens?: number | null; }>; }; assert.equal(payload.object, "list"); @@ -485,6 +491,9 @@ Deno.test("openai: model capabilities are exposed outside /v1 model objects", as assert.equal(model.upstream_provider, "codex_chatgpt"); assert.deepEqual(model.supported_reasoning_levels, ["low", "medium", "high", "xhigh"]); assert.equal(model.default_reasoning_effort, "medium"); + assert.equal(model.context_window_tokens, 272000); + assert.equal(model.max_context_window_tokens, 1000000); + assert.equal(model.auto_compact_token_limit_tokens, null); assert.ok(model.supported_endpoints?.includes("/v1/chat/completions")); assert.ok(model.supported_endpoints?.includes("/v1/responses")); });