Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ curl -sS https://ai.ubq.fi/uos/models/capabilities \
-H "Authorization: Bearer $UOS_AI_TOKEN"
```

Use this endpoint, not `/v1/models`, when clients need gateway-specific fields such as reasoning support or token-window
limits (`context_window_tokens`, `max_context_window_tokens`, and `auto_compact_token_limit_tokens`). `/v1/models` keeps
the strict OpenAI model-object shape.

Whoami (debug which auth method was used; never returns raw secrets):

```bash
Expand Down
14 changes: 14 additions & 0 deletions scripts/codex-models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,12 @@ const isRecord = (value: unknown): value is Record<string, unknown> =>

const getString = (value: unknown): string | null => (typeof value === "string" ? value : null);

const getNonNegativeInteger = (value: unknown): number | null => {
if (typeof value !== "number" || !Number.isFinite(value)) return null;
const normalized = Math.trunc(value);
return normalized >= 0 ? normalized : null;
};

const isHiddenCodexModel = (value: Record<string, unknown>): boolean =>
getString(value.visibility)?.trim().toLowerCase() === "hide";

Expand Down Expand Up @@ -241,6 +247,14 @@ export const extractCodexModelsFromText = (text: string): ExtractedCodexModels |
if (displayName) normalized.display_name = displayName;
const description = getString(parsed.description);
if (description) normalized.description = description;
for (const key of ["context_window", "max_context_window", "auto_compact_token_limit"]) {
if (parsed[key] === null) {
normalized[key] = null;
continue;
}
const count = getNonNegativeInteger(parsed[key]);
if (count !== null) normalized[key] = count;
}
const defaultReasoning = getString(parsed.default_reasoning_level);
if (defaultReasoning) normalized.default_reasoning_level = defaultReasoning;
if (Array.isArray(parsed.supported_reasoning_levels)) {
Expand Down
14 changes: 14 additions & 0 deletions src/admin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,12 @@ const MAX_KV_MIGRATION_BODY_BYTES = 5 * 1024 * 1024;
const isHiddenCodexModel = (value: Record<string, unknown>): boolean =>
getString(value.visibility)?.trim().toLowerCase() === "hide";

const normalizeNonNegativeInteger = (value: unknown): number | null => {
if (typeof value !== "number" || !Number.isFinite(value)) return null;
const normalized = Math.trunc(value);
return normalized >= 0 ? normalized : null;
};

const resolveDefaultModel = async (entryValue: unknown): Promise<string> => {
const configured = typeof entryValue === "string" ? entryValue.trim() : "";
if (configured) return configured;
Expand Down Expand Up @@ -611,6 +617,14 @@ const normalizeCodexModelsPayload = (value: unknown): CodexModelsSnapshot | null
if (displayName) normalized.display_name = displayName;
const description = getString(item.description);
if (description) normalized.description = description;
for (const key of ["context_window", "max_context_window", "auto_compact_token_limit"]) {
if (item[key] === null) {
normalized[key] = null;
continue;
}
const count = normalizeNonNegativeInteger(item[key]);
if (count !== null) normalized[key] = count;
}
const defaultReasoning = getString(item.default_reasoning_level);
if (defaultReasoning) normalized.default_reasoning_level = defaultReasoning;
if (Array.isArray(item.supported_reasoning_levels)) {
Expand Down
3 changes: 3 additions & 0 deletions src/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1308,6 +1308,9 @@ const normalizeModelCapabilitiesEntry = (value: unknown): Record<string, unknown
supported_endpoints: ["/v1/chat/completions", "/v1/responses"],
supported_reasoning_levels: reasoning.levels,
default_reasoning_effort: reasoning.defaultLevel,
context_window_tokens: normalizeTokenCount(value.context_window),
max_context_window_tokens: normalizeTokenCount(value.max_context_window),
auto_compact_token_limit_tokens: normalizeTokenCount(value.auto_compact_token_limit),
};
};

Expand Down
11 changes: 11 additions & 0 deletions static/chat.js
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,17 @@ const mergeModelCapabilities = (models, capabilities) => {
merged.default_reasoning_effort = defaultReasoning;
merged.default_reasoning_level = defaultReasoning;
}
for (
const key of [
"context_window_tokens",
"max_context_window_tokens",
"auto_compact_token_limit_tokens",
]
) {
if (typeof capability[key] === "number" || capability[key] === null) {
merged[key] = capability[key];
}
}
return merged;
});
};
Expand Down
9 changes: 5 additions & 4 deletions static/docs/llms-agents.md
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,9 @@ backed by Codex with a ChatGPT account, so some OpenAI API model aliases may not
Hidden Codex catalog entries such as internal review models are filtered during snapshot upload and are not exposed.

Use `GET /uos/models/capabilities` for gateway-specific model metadata such as `supported_reasoning_levels`,
`default_reasoning_effort`, `supported_endpoints`, and `upstream_provider`. This metadata is intentionally not included
in `/v1/models` so OpenAI-compatible SDKs receive an OpenAI-shaped response.
`default_reasoning_effort`, `context_window_tokens`, `max_context_window_tokens`, `auto_compact_token_limit_tokens`,
`supported_endpoints`, and `upstream_provider`. This metadata is intentionally not included in `/v1/models` so
OpenAI-compatible SDKs receive an OpenAI-shaped response.

Observed integration behavior:

Expand Down Expand Up @@ -330,8 +331,8 @@ Defaults can be managed via `/admin/defaults` (admin auth required). When no mod
uses the first model in the current Codex model snapshot. If neither a configured default nor a snapshot is available,
no-model requests fail with `503` instead of fetching a live fallback catalog.

`GET /uos/models/capabilities` is the endpoint to inspect reasoning support programmatically. `/v1/models` remains
OpenAI-compatible and does not include reasoning metadata.
`GET /uos/models/capabilities` is the endpoint to inspect reasoning support and token-window limits programmatically.
`/v1/models` remains OpenAI-compatible and does not include gateway metadata.

## Ignored parameters and warnings

Expand Down
17 changes: 16 additions & 1 deletion tests/admin-auth.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,9 @@ Deno.test("admin codex auth stores CLI model snapshot as source of truth", async
models: [{
slug: "gpt-5.5",
display_name: "GPT-5.5",
context_window: 272000,
max_context_window: 1000000,
auto_compact_token_limit: null,
supported_reasoning_levels: ["low", "medium", "high", "xhigh"],
}, {
slug: "codex-auto-review",
Expand All @@ -120,11 +123,23 @@ Deno.test("admin codex auth stores CLI model snapshot as source of truth", async
assert.equal(fetchUrls.length, 1);

const stored = kvStore.get(keyToString(["ubq_ai", "codex_models"])) as
| { source?: string; client_version?: string; models?: Array<{ slug?: string }> }
| {
source?: string;
client_version?: string;
models?: Array<{
slug?: string;
context_window?: number;
max_context_window?: number;
auto_compact_token_limit?: number | null;
}>;
}
| undefined;
assert.equal(stored?.source, "codex_cli");
assert.equal(stored?.client_version, "0.126.0");
assert.deepEqual(stored?.models?.map((model) => model.slug), ["gpt-5.5"]);
assert.equal(stored?.models?.[0]?.context_window, 272000);
assert.equal(stored?.models?.[0]?.max_context_window, 1000000);
assert.equal(stored?.models?.[0]?.auto_compact_token_limit, null);
} finally {
globalThis.fetch = originalFetch;
}
Expand Down
5 changes: 4 additions & 1 deletion tests/codex-models.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -95,11 +95,14 @@ let vendorRoot;

Deno.test("extractCodexModelsFromText parses slugs and reasoning levels", () => {
const text =
'codex_cli_rs/0.99.0 {"slug":"gpt-5.2-codex","supported_reasoning_levels":[{"effort":"low"},{"effort":"high"}]}';
'codex_cli_rs/0.99.0 {"slug":"gpt-5.2-codex","context_window":272000,"max_context_window":1000000,"auto_compact_token_limit":null,"supported_reasoning_levels":[{"effort":"low"},{"effort":"high"}]}';
const extracted = extractCodexModelsFromText(text);
assert.ok(extracted);
assert.equal(extracted?.clientVersion, "0.99.0");
assert.equal(extracted?.models[0]?.slug, "gpt-5.2-codex");
assert.equal(extracted?.models[0]?.context_window, 272000);
assert.equal(extracted?.models[0]?.max_context_window, 1000000);
assert.equal(extracted?.models[0]?.auto_compact_token_limit, null);
assert.deepEqual(extracted?.models[0]?.supported_reasoning_levels, ["low", "high"]);
});

Expand Down
9 changes: 9 additions & 0 deletions tests/openai-compat.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ kvStore.set(keyToString(TEST_CODEX_MODELS_KEY), {
models: [{
slug: DEFAULT_TEST_MODEL,
display_name: "GPT-5 Fixture Default",
context_window: 272000,
max_context_window: 1000000,
auto_compact_token_limit: null,
default_reasoning_level: "medium",
supported_reasoning_levels: ["low", "medium", "high", "xhigh"],
}],
Expand Down Expand Up @@ -475,6 +478,9 @@ Deno.test("openai: model capabilities are exposed outside /v1 model objects", as
supported_endpoints?: string[];
supported_reasoning_levels?: string[];
default_reasoning_effort?: string | null;
context_window_tokens?: number | null;
max_context_window_tokens?: number | null;
auto_compact_token_limit_tokens?: number | null;
}>;
};
assert.equal(payload.object, "list");
Expand All @@ -485,6 +491,9 @@ Deno.test("openai: model capabilities are exposed outside /v1 model objects", as
assert.equal(model.upstream_provider, "codex_chatgpt");
assert.deepEqual(model.supported_reasoning_levels, ["low", "medium", "high", "xhigh"]);
assert.equal(model.default_reasoning_effort, "medium");
assert.equal(model.context_window_tokens, 272000);
assert.equal(model.max_context_window_tokens, 1000000);
assert.equal(model.auto_compact_token_limit_tokens, null);
assert.ok(model.supported_endpoints?.includes("/v1/chat/completions"));
assert.ok(model.supported_endpoints?.includes("/v1/responses"));
});
Expand Down
Loading