Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
229 changes: 4 additions & 225 deletions bin/lib/local-inference.js
Original file line number Diff line number Diff line change
@@ -1,228 +1,7 @@
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Thin re-export shim — the implementation lives in src/lib/local-inference.ts,
// compiled to dist/lib/local-inference.js.

const { shellQuote } = require("./runner");

const HOST_GATEWAY_URL = "http://host.openshell.internal";
const CONTAINER_REACHABILITY_IMAGE = "curlimages/curl:8.10.1";
const DEFAULT_OLLAMA_MODEL = "nemotron-3-nano:30b";
const SMALL_OLLAMA_MODEL = "qwen2.5:7b";
const LARGE_OLLAMA_MIN_MEMORY_MB = 32768;

function getLocalProviderBaseUrl(provider) {
switch (provider) {
case "vllm-local":
return `${HOST_GATEWAY_URL}:8000/v1`;
case "ollama-local":
return `${HOST_GATEWAY_URL}:11434/v1`;
default:
return null;
}
}

function getLocalProviderValidationBaseUrl(provider) {
switch (provider) {
case "vllm-local":
return "http://localhost:8000/v1";
case "ollama-local":
return "http://localhost:11434/v1";
default:
return null;
}
}

function getLocalProviderHealthCheck(provider) {
switch (provider) {
case "vllm-local":
return "curl -sf http://localhost:8000/v1/models 2>/dev/null";
case "ollama-local":
return "curl -sf http://localhost:11434/api/tags 2>/dev/null";
default:
return null;
}
}

function getLocalProviderContainerReachabilityCheck(provider) {
switch (provider) {
case "vllm-local":
return `docker run --rm --add-host host.openshell.internal:host-gateway ${CONTAINER_REACHABILITY_IMAGE} -sf http://host.openshell.internal:8000/v1/models 2>/dev/null`;
case "ollama-local":
return `docker run --rm --add-host host.openshell.internal:host-gateway ${CONTAINER_REACHABILITY_IMAGE} -sf http://host.openshell.internal:11434/api/tags 2>/dev/null`;
default:
return null;
}
}

function validateLocalProvider(provider, runCapture) {
const command = getLocalProviderHealthCheck(provider);
if (!command) {
return { ok: true };
}

const output = runCapture(command, { ignoreError: true });
if (!output) {
switch (provider) {
case "vllm-local":
return {
ok: false,
message: "Local vLLM was selected, but nothing is responding on http://localhost:8000.",
};
case "ollama-local":
return {
ok: false,
message:
"Local Ollama was selected, but nothing is responding on http://localhost:11434.",
};
default:
return { ok: false, message: "The selected local inference provider is unavailable." };
}
}

const containerCommand = getLocalProviderContainerReachabilityCheck(provider);
if (!containerCommand) {
return { ok: true };
}

const containerOutput = runCapture(containerCommand, { ignoreError: true });
if (containerOutput) {
return { ok: true };
}

switch (provider) {
case "vllm-local":
return {
ok: false,
message:
"Local vLLM is responding on localhost, but containers cannot reach http://host.openshell.internal:8000. Ensure the server is reachable from containers, not only from the host shell.",
};
case "ollama-local":
return {
ok: false,
message:
"Local Ollama is responding on localhost, but containers cannot reach http://host.openshell.internal:11434. Ensure Ollama listens on 0.0.0.0:11434 instead of 127.0.0.1 so sandboxes can reach it.",
};
default:
return {
ok: false,
message: "The selected local inference provider is unavailable from containers.",
};
}
}

function parseOllamaList(output) {
return String(output || "")
.split(/\r?\n/)
.map((line) => line.trim())
.filter(Boolean)
.filter((line) => !/^NAME\s+/i.test(line))
.map((line) => line.split(/\s{2,}/)[0])
.filter(Boolean);
}

function parseOllamaTags(output) {
try {
const parsed = JSON.parse(String(output || ""));
return Array.isArray(parsed?.models)
? parsed.models.map((model) => model && model.name).filter(Boolean)
: [];
} catch {
return [];
}
}

function getOllamaModelOptions(runCapture) {
const tagsOutput = runCapture("curl -sf http://localhost:11434/api/tags 2>/dev/null", {
ignoreError: true,
});
const tagsParsed = parseOllamaTags(tagsOutput);
if (tagsParsed.length > 0) {
return tagsParsed;
}

const listOutput = runCapture("ollama list 2>/dev/null", { ignoreError: true });
return parseOllamaList(listOutput);
}

function getBootstrapOllamaModelOptions(gpu) {
const options = [SMALL_OLLAMA_MODEL];
if (gpu && gpu.totalMemoryMB >= LARGE_OLLAMA_MIN_MEMORY_MB) {
options.push(DEFAULT_OLLAMA_MODEL);
}
return options;
}

function getDefaultOllamaModel(runCapture, gpu = null) {
const models = getOllamaModelOptions(runCapture);
if (models.length === 0) {
const bootstrap = getBootstrapOllamaModelOptions(gpu);
return bootstrap[0];
}
return models.includes(DEFAULT_OLLAMA_MODEL) ? DEFAULT_OLLAMA_MODEL : models[0];
}

function getOllamaWarmupCommand(model, keepAlive = "15m") {
const payload = JSON.stringify({
model,
prompt: "hello",
stream: false,
keep_alive: keepAlive,
});
return `nohup curl -s http://localhost:11434/api/generate -H 'Content-Type: application/json' -d ${shellQuote(payload)} >/dev/null 2>&1 &`;
}

function getOllamaProbeCommand(model, timeoutSeconds = 120, keepAlive = "15m") {
const payload = JSON.stringify({
model,
prompt: "hello",
stream: false,
keep_alive: keepAlive,
});
return `curl -sS --max-time ${timeoutSeconds} http://localhost:11434/api/generate -H 'Content-Type: application/json' -d ${shellQuote(payload)} 2>/dev/null`;
}

function validateOllamaModel(model, runCapture) {
const output = runCapture(getOllamaProbeCommand(model), { ignoreError: true });
if (!output) {
return {
ok: false,
message:
`Selected Ollama model '${model}' did not answer the local probe in time. ` +
"It may still be loading, too large for the host, or otherwise unhealthy.",
};
}

try {
const parsed = JSON.parse(output);
if (parsed && typeof parsed.error === "string" && parsed.error.trim()) {
return {
ok: false,
message: `Selected Ollama model '${model}' failed the local probe: ${parsed.error.trim()}`,
};
}
} catch {
/* ignored */
}

return { ok: true };
}

module.exports = {
CONTAINER_REACHABILITY_IMAGE,
DEFAULT_OLLAMA_MODEL,
HOST_GATEWAY_URL,
LARGE_OLLAMA_MIN_MEMORY_MB,
SMALL_OLLAMA_MODEL,
getDefaultOllamaModel,
getBootstrapOllamaModelOptions,
getLocalProviderBaseUrl,
getLocalProviderValidationBaseUrl,
getLocalProviderContainerReachabilityCheck,
getLocalProviderHealthCheck,
getOllamaModelOptions,
parseOllamaTags,
getOllamaProbeCommand,
getOllamaWarmupCommand,
parseOllamaList,
validateOllamaModel,
validateLocalProvider,
};
module.exports = require("../../dist/lib/local-inference");
24 changes: 18 additions & 6 deletions test/local-inference.test.js → src/lib/local-inference.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import { describe, it, expect } from "vitest";

// Import from compiled dist/ for correct coverage attribution.
import {
CONTAINER_REACHABILITY_IMAGE,
DEFAULT_OLLAMA_MODEL,
Expand All @@ -20,15 +21,17 @@ import {
parseOllamaTags,
validateOllamaModel,
validateLocalProvider,
} from "../bin/lib/local-inference";
} from "../../dist/lib/local-inference";

describe("local inference helpers", () => {
it("returns the expected base URL for vllm-local", () => {
expect(getLocalProviderBaseUrl("vllm-local")).toBe("http://host.openshell.internal:8000/v1");
});

it("returns the expected base URL for ollama-local", () => {
expect(getLocalProviderBaseUrl("ollama-local")).toBe("http://host.openshell.internal:11434/v1");
expect(getLocalProviderBaseUrl("ollama-local")).toBe(
"http://host.openshell.internal:11434/v1",
);
});

it("returns null for unknown local provider URLs", () => {
Expand Down Expand Up @@ -111,6 +114,16 @@ describe("local inference helpers", () => {
expect(validateLocalProvider("custom-provider", () => "")).toEqual({ ok: true });
});

it("skips health check entirely for unknown providers", () => {
let callCount = 0;
const result = validateLocalProvider("custom-provider", () => {
callCount += 1;
return callCount <= 1 ? "ok" : "";
});
// custom-provider has no health check command, so it returns ok immediately
expect(result).toEqual({ ok: true });
});

it("parses model names from ollama list output", () => {
expect(
parseOllamaList(
Expand Down Expand Up @@ -189,10 +202,9 @@ describe("local inference helpers", () => {
expect(
getBootstrapOllamaModelOptions({ totalMemoryMB: LARGE_OLLAMA_MIN_MEMORY_MB - 1 }),
).toEqual(["qwen2.5:7b"]);
expect(getBootstrapOllamaModelOptions({ totalMemoryMB: LARGE_OLLAMA_MIN_MEMORY_MB })).toEqual([
"qwen2.5:7b",
DEFAULT_OLLAMA_MODEL,
]);
expect(
getBootstrapOllamaModelOptions({ totalMemoryMB: LARGE_OLLAMA_MIN_MEMORY_MB }),
).toEqual(["qwen2.5:7b", DEFAULT_OLLAMA_MODEL]);
expect(getDefaultOllamaModel(() => "", { totalMemoryMB: 16384 })).toBe("qwen2.5:7b");
});

Expand Down
Loading
Loading