diff --git a/bin/lib/local-inference.js b/bin/lib/local-inference.js index 923363389..2aa200153 100644 --- a/bin/lib/local-inference.js +++ b/bin/lib/local-inference.js @@ -1,228 +1,7 @@ // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 +// +// Thin re-export shim — the implementation lives in src/lib/local-inference.ts, +// compiled to dist/lib/local-inference.js. -const { shellQuote } = require("./runner"); - -const HOST_GATEWAY_URL = "http://host.openshell.internal"; -const CONTAINER_REACHABILITY_IMAGE = "curlimages/curl:8.10.1"; -const DEFAULT_OLLAMA_MODEL = "nemotron-3-nano:30b"; -const SMALL_OLLAMA_MODEL = "qwen2.5:7b"; -const LARGE_OLLAMA_MIN_MEMORY_MB = 32768; - -function getLocalProviderBaseUrl(provider) { - switch (provider) { - case "vllm-local": - return `${HOST_GATEWAY_URL}:8000/v1`; - case "ollama-local": - return `${HOST_GATEWAY_URL}:11434/v1`; - default: - return null; - } -} - -function getLocalProviderValidationBaseUrl(provider) { - switch (provider) { - case "vllm-local": - return "http://localhost:8000/v1"; - case "ollama-local": - return "http://localhost:11434/v1"; - default: - return null; - } -} - -function getLocalProviderHealthCheck(provider) { - switch (provider) { - case "vllm-local": - return "curl -sf http://localhost:8000/v1/models 2>/dev/null"; - case "ollama-local": - return "curl -sf http://localhost:11434/api/tags 2>/dev/null"; - default: - return null; - } -} - -function getLocalProviderContainerReachabilityCheck(provider) { - switch (provider) { - case "vllm-local": - return `docker run --rm --add-host host.openshell.internal:host-gateway ${CONTAINER_REACHABILITY_IMAGE} -sf http://host.openshell.internal:8000/v1/models 2>/dev/null`; - case "ollama-local": - return `docker run --rm --add-host host.openshell.internal:host-gateway ${CONTAINER_REACHABILITY_IMAGE} -sf http://host.openshell.internal:11434/api/tags 2>/dev/null`; - default: - return null; - } -} - -function validateLocalProvider(provider, runCapture) { - const command = getLocalProviderHealthCheck(provider); - if (!command) { - return { ok: true }; - } - - const output = runCapture(command, { ignoreError: true }); - if (!output) { - switch (provider) { - case "vllm-local": - return { - ok: false, - message: "Local vLLM was selected, but nothing is responding on http://localhost:8000.", - }; - case "ollama-local": - return { - ok: false, - message: - "Local Ollama was selected, but nothing is responding on http://localhost:11434.", - }; - default: - return { ok: false, message: "The selected local inference provider is unavailable." }; - } - } - - const containerCommand = getLocalProviderContainerReachabilityCheck(provider); - if (!containerCommand) { - return { ok: true }; - } - - const containerOutput = runCapture(containerCommand, { ignoreError: true }); - if (containerOutput) { - return { ok: true }; - } - - switch (provider) { - case "vllm-local": - return { - ok: false, - message: - "Local vLLM is responding on localhost, but containers cannot reach http://host.openshell.internal:8000. Ensure the server is reachable from containers, not only from the host shell.", - }; - case "ollama-local": - return { - ok: false, - message: - "Local Ollama is responding on localhost, but containers cannot reach http://host.openshell.internal:11434. Ensure Ollama listens on 0.0.0.0:11434 instead of 127.0.0.1 so sandboxes can reach it.", - }; - default: - return { - ok: false, - message: "The selected local inference provider is unavailable from containers.", - }; - } -} - -function parseOllamaList(output) { - return String(output || "") - .split(/\r?\n/) - .map((line) => line.trim()) - .filter(Boolean) - .filter((line) => !/^NAME\s+/i.test(line)) - .map((line) => line.split(/\s{2,}/)[0]) - .filter(Boolean); -} - -function parseOllamaTags(output) { - try { - const parsed = JSON.parse(String(output || "")); - return Array.isArray(parsed?.models) - ? parsed.models.map((model) => model && model.name).filter(Boolean) - : []; - } catch { - return []; - } -} - -function getOllamaModelOptions(runCapture) { - const tagsOutput = runCapture("curl -sf http://localhost:11434/api/tags 2>/dev/null", { - ignoreError: true, - }); - const tagsParsed = parseOllamaTags(tagsOutput); - if (tagsParsed.length > 0) { - return tagsParsed; - } - - const listOutput = runCapture("ollama list 2>/dev/null", { ignoreError: true }); - return parseOllamaList(listOutput); -} - -function getBootstrapOllamaModelOptions(gpu) { - const options = [SMALL_OLLAMA_MODEL]; - if (gpu && gpu.totalMemoryMB >= LARGE_OLLAMA_MIN_MEMORY_MB) { - options.push(DEFAULT_OLLAMA_MODEL); - } - return options; -} - -function getDefaultOllamaModel(runCapture, gpu = null) { - const models = getOllamaModelOptions(runCapture); - if (models.length === 0) { - const bootstrap = getBootstrapOllamaModelOptions(gpu); - return bootstrap[0]; - } - return models.includes(DEFAULT_OLLAMA_MODEL) ? DEFAULT_OLLAMA_MODEL : models[0]; -} - -function getOllamaWarmupCommand(model, keepAlive = "15m") { - const payload = JSON.stringify({ - model, - prompt: "hello", - stream: false, - keep_alive: keepAlive, - }); - return `nohup curl -s http://localhost:11434/api/generate -H 'Content-Type: application/json' -d ${shellQuote(payload)} >/dev/null 2>&1 &`; -} - -function getOllamaProbeCommand(model, timeoutSeconds = 120, keepAlive = "15m") { - const payload = JSON.stringify({ - model, - prompt: "hello", - stream: false, - keep_alive: keepAlive, - }); - return `curl -sS --max-time ${timeoutSeconds} http://localhost:11434/api/generate -H 'Content-Type: application/json' -d ${shellQuote(payload)} 2>/dev/null`; -} - -function validateOllamaModel(model, runCapture) { - const output = runCapture(getOllamaProbeCommand(model), { ignoreError: true }); - if (!output) { - return { - ok: false, - message: - `Selected Ollama model '${model}' did not answer the local probe in time. ` + - "It may still be loading, too large for the host, or otherwise unhealthy.", - }; - } - - try { - const parsed = JSON.parse(output); - if (parsed && typeof parsed.error === "string" && parsed.error.trim()) { - return { - ok: false, - message: `Selected Ollama model '${model}' failed the local probe: ${parsed.error.trim()}`, - }; - } - } catch { - /* ignored */ - } - - return { ok: true }; -} - -module.exports = { - CONTAINER_REACHABILITY_IMAGE, - DEFAULT_OLLAMA_MODEL, - HOST_GATEWAY_URL, - LARGE_OLLAMA_MIN_MEMORY_MB, - SMALL_OLLAMA_MODEL, - getDefaultOllamaModel, - getBootstrapOllamaModelOptions, - getLocalProviderBaseUrl, - getLocalProviderValidationBaseUrl, - getLocalProviderContainerReachabilityCheck, - getLocalProviderHealthCheck, - getOllamaModelOptions, - parseOllamaTags, - getOllamaProbeCommand, - getOllamaWarmupCommand, - parseOllamaList, - validateOllamaModel, - validateLocalProvider, -}; +module.exports = require("../../dist/lib/local-inference"); diff --git a/test/local-inference.test.js b/src/lib/local-inference.test.ts similarity index 92% rename from test/local-inference.test.js rename to src/lib/local-inference.test.ts index e028aa736..34040c814 100644 --- a/test/local-inference.test.js +++ b/src/lib/local-inference.test.ts @@ -3,6 +3,7 @@ import { describe, it, expect } from "vitest"; +// Import from compiled dist/ for correct coverage attribution. import { CONTAINER_REACHABILITY_IMAGE, DEFAULT_OLLAMA_MODEL, @@ -20,7 +21,7 @@ import { parseOllamaTags, validateOllamaModel, validateLocalProvider, -} from "../bin/lib/local-inference"; +} from "../../dist/lib/local-inference"; describe("local inference helpers", () => { it("returns the expected base URL for vllm-local", () => { @@ -28,7 +29,9 @@ describe("local inference helpers", () => { }); it("returns the expected base URL for ollama-local", () => { - expect(getLocalProviderBaseUrl("ollama-local")).toBe("http://host.openshell.internal:11434/v1"); + expect(getLocalProviderBaseUrl("ollama-local")).toBe( + "http://host.openshell.internal:11434/v1", + ); }); it("returns null for unknown local provider URLs", () => { @@ -111,6 +114,16 @@ describe("local inference helpers", () => { expect(validateLocalProvider("custom-provider", () => "")).toEqual({ ok: true }); }); + it("skips health check entirely for unknown providers", () => { + let callCount = 0; + const result = validateLocalProvider("custom-provider", () => { + callCount += 1; + return callCount <= 1 ? "ok" : ""; + }); + // custom-provider has no health check command, so it returns ok immediately + expect(result).toEqual({ ok: true }); + }); + it("parses model names from ollama list output", () => { expect( parseOllamaList( @@ -189,10 +202,9 @@ describe("local inference helpers", () => { expect( getBootstrapOllamaModelOptions({ totalMemoryMB: LARGE_OLLAMA_MIN_MEMORY_MB - 1 }), ).toEqual(["qwen2.5:7b"]); - expect(getBootstrapOllamaModelOptions({ totalMemoryMB: LARGE_OLLAMA_MIN_MEMORY_MB })).toEqual([ - "qwen2.5:7b", - DEFAULT_OLLAMA_MODEL, - ]); + expect( + getBootstrapOllamaModelOptions({ totalMemoryMB: LARGE_OLLAMA_MIN_MEMORY_MB }), + ).toEqual(["qwen2.5:7b", DEFAULT_OLLAMA_MODEL]); expect(getDefaultOllamaModel(() => "", { totalMemoryMB: 16384 })).toBe("qwen2.5:7b"); }); diff --git a/src/lib/local-inference.ts b/src/lib/local-inference.ts new file mode 100644 index 000000000..9390bb70e --- /dev/null +++ b/src/lib/local-inference.ts @@ -0,0 +1,237 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Local inference provider helpers — URL mappers, Ollama parsers, + * health checks, and command generators for vLLM and Ollama. + */ + +// eslint-disable-next-line @typescript-eslint/no-require-imports +const { shellQuote } = require("../../bin/lib/runner"); + +export const HOST_GATEWAY_URL = "http://host.openshell.internal"; +export const CONTAINER_REACHABILITY_IMAGE = "curlimages/curl:8.10.1"; +export const DEFAULT_OLLAMA_MODEL = "nemotron-3-nano:30b"; +export const SMALL_OLLAMA_MODEL = "qwen2.5:7b"; +export const LARGE_OLLAMA_MIN_MEMORY_MB = 32768; + +export type RunCaptureFn = (cmd: string, opts?: { ignoreError?: boolean }) => string; + +export interface GpuInfo { + totalMemoryMB: number; +} + +export interface ValidationResult { + ok: boolean; + message?: string; +} + +export function getLocalProviderBaseUrl(provider: string): string | null { + switch (provider) { + case "vllm-local": + return `${HOST_GATEWAY_URL}:8000/v1`; + case "ollama-local": + return `${HOST_GATEWAY_URL}:11434/v1`; + default: + return null; + } +} + +export function getLocalProviderValidationBaseUrl(provider: string): string | null { + switch (provider) { + case "vllm-local": + return "http://localhost:8000/v1"; + case "ollama-local": + return "http://localhost:11434/v1"; + default: + return null; + } +} + +export function getLocalProviderHealthCheck(provider: string): string | null { + switch (provider) { + case "vllm-local": + return "curl -sf http://localhost:8000/v1/models 2>/dev/null"; + case "ollama-local": + return "curl -sf http://localhost:11434/api/tags 2>/dev/null"; + default: + return null; + } +} + +export function getLocalProviderContainerReachabilityCheck(provider: string): string | null { + switch (provider) { + case "vllm-local": + return `docker run --rm --add-host host.openshell.internal:host-gateway ${CONTAINER_REACHABILITY_IMAGE} -sf http://host.openshell.internal:8000/v1/models 2>/dev/null`; + case "ollama-local": + return `docker run --rm --add-host host.openshell.internal:host-gateway ${CONTAINER_REACHABILITY_IMAGE} -sf http://host.openshell.internal:11434/api/tags 2>/dev/null`; + default: + return null; + } +} + +export function validateLocalProvider( + provider: string, + runCapture: RunCaptureFn, +): ValidationResult { + const command = getLocalProviderHealthCheck(provider); + if (!command) { + return { ok: true }; + } + + const output = runCapture(command, { ignoreError: true }); + if (!output) { + switch (provider) { + case "vllm-local": + return { + ok: false, + message: "Local vLLM was selected, but nothing is responding on http://localhost:8000.", + }; + case "ollama-local": + return { + ok: false, + message: + "Local Ollama was selected, but nothing is responding on http://localhost:11434.", + }; + default: + return { ok: false, message: "The selected local inference provider is unavailable." }; + } + } + + const containerCommand = getLocalProviderContainerReachabilityCheck(provider); + if (!containerCommand) { + return { ok: true }; + } + + const containerOutput = runCapture(containerCommand, { ignoreError: true }); + if (containerOutput) { + return { ok: true }; + } + + switch (provider) { + case "vllm-local": + return { + ok: false, + message: + "Local vLLM is responding on localhost, but containers cannot reach http://host.openshell.internal:8000. Ensure the server is reachable from containers, not only from the host shell.", + }; + case "ollama-local": + return { + ok: false, + message: + "Local Ollama is responding on localhost, but containers cannot reach http://host.openshell.internal:11434. Ensure Ollama listens on 0.0.0.0:11434 instead of 127.0.0.1 so sandboxes can reach it.", + }; + default: + return { + ok: false, + message: "The selected local inference provider is unavailable from containers.", + }; + } +} + +export function parseOllamaList(output: unknown): string[] { + return String(output || "") + .split(/\r?\n/) + .map((line) => line.trim()) + .filter(Boolean) + .filter((line) => !/^NAME\s+/i.test(line)) + .map((line) => line.split(/\s{2,}/)[0]) + .filter(Boolean); +} + +export function parseOllamaTags(output: unknown): string[] { + try { + const parsed = JSON.parse(String(output || "")); + return Array.isArray(parsed?.models) + ? parsed.models.map((model: { name?: string }) => model && model.name).filter(Boolean) + : []; + } catch { + return []; + } +} + +export function getOllamaModelOptions(runCapture: RunCaptureFn): string[] { + const tagsOutput = runCapture("curl -sf http://localhost:11434/api/tags 2>/dev/null", { + ignoreError: true, + }); + const tagsParsed = parseOllamaTags(tagsOutput); + if (tagsParsed.length > 0) { + return tagsParsed; + } + + const listOutput = runCapture("ollama list 2>/dev/null", { ignoreError: true }); + return parseOllamaList(listOutput); +} + +export function getBootstrapOllamaModelOptions(gpu: GpuInfo | null): string[] { + const options = [SMALL_OLLAMA_MODEL]; + if (gpu && gpu.totalMemoryMB >= LARGE_OLLAMA_MIN_MEMORY_MB) { + options.push(DEFAULT_OLLAMA_MODEL); + } + return options; +} + +export function getDefaultOllamaModel( + runCapture: RunCaptureFn, + gpu: GpuInfo | null = null, +): string { + const models = getOllamaModelOptions(runCapture); + if (models.length === 0) { + const bootstrap = getBootstrapOllamaModelOptions(gpu); + return bootstrap[0]; + } + return models.includes(DEFAULT_OLLAMA_MODEL) ? DEFAULT_OLLAMA_MODEL : models[0]; +} + +export function getOllamaWarmupCommand(model: string, keepAlive = "15m"): string { + const payload = JSON.stringify({ + model, + prompt: "hello", + stream: false, + keep_alive: keepAlive, + }); + return `nohup curl -s http://localhost:11434/api/generate -H 'Content-Type: application/json' -d ${shellQuote(payload)} >/dev/null 2>&1 &`; +} + +export function getOllamaProbeCommand( + model: string, + timeoutSeconds = 120, + keepAlive = "15m", +): string { + const payload = JSON.stringify({ + model, + prompt: "hello", + stream: false, + keep_alive: keepAlive, + }); + return `curl -sS --max-time ${timeoutSeconds} http://localhost:11434/api/generate -H 'Content-Type: application/json' -d ${shellQuote(payload)} 2>/dev/null`; +} + +export function validateOllamaModel( + model: string, + runCapture: RunCaptureFn, +): ValidationResult { + const output = runCapture(getOllamaProbeCommand(model), { ignoreError: true }); + if (!output) { + return { + ok: false, + message: + `Selected Ollama model '${model}' did not answer the local probe in time. ` + + "It may still be loading, too large for the host, or otherwise unhealthy.", + }; + } + + try { + const parsed = JSON.parse(output); + if (parsed && typeof parsed.error === "string" && parsed.error.trim()) { + return { + ok: false, + message: `Selected Ollama model '${model}' failed the local probe: ${parsed.error.trim()}`, + }; + } + } catch { + /* ignored */ + } + + return { ok: true }; +}