diff --git a/bin/lib/local-inference.js b/bin/lib/local-inference.js
index 923363389..2aa200153 100644
--- a/bin/lib/local-inference.js
+++ b/bin/lib/local-inference.js
@@ -1,228 +1,7 @@
 // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 // SPDX-License-Identifier: Apache-2.0
+//
+// Thin re-export shim — the implementation lives in src/lib/local-inference.ts,
+// compiled to dist/lib/local-inference.js.
 
-const { shellQuote } = require("./runner");
-
-const HOST_GATEWAY_URL = "http://host.openshell.internal";
-const CONTAINER_REACHABILITY_IMAGE = "curlimages/curl:8.10.1";
-const DEFAULT_OLLAMA_MODEL = "nemotron-3-nano:30b";
-const SMALL_OLLAMA_MODEL = "qwen2.5:7b";
-const LARGE_OLLAMA_MIN_MEMORY_MB = 32768;
-
-function getLocalProviderBaseUrl(provider) {
-  switch (provider) {
-    case "vllm-local":
-      return `${HOST_GATEWAY_URL}:8000/v1`;
-    case "ollama-local":
-      return `${HOST_GATEWAY_URL}:11434/v1`;
-    default:
-      return null;
-  }
-}
-
-function getLocalProviderValidationBaseUrl(provider) {
-  switch (provider) {
-    case "vllm-local":
-      return "http://localhost:8000/v1";
-    case "ollama-local":
-      return "http://localhost:11434/v1";
-    default:
-      return null;
-  }
-}
-
-function getLocalProviderHealthCheck(provider) {
-  switch (provider) {
-    case "vllm-local":
-      return "curl -sf http://localhost:8000/v1/models 2>/dev/null";
-    case "ollama-local":
-      return "curl -sf http://localhost:11434/api/tags 2>/dev/null";
-    default:
-      return null;
-  }
-}
-
-function getLocalProviderContainerReachabilityCheck(provider) {
-  switch (provider) {
-    case "vllm-local":
-      return `docker run --rm --add-host host.openshell.internal:host-gateway ${CONTAINER_REACHABILITY_IMAGE} -sf http://host.openshell.internal:8000/v1/models 2>/dev/null`;
-    case "ollama-local":
-      return `docker run --rm --add-host host.openshell.internal:host-gateway ${CONTAINER_REACHABILITY_IMAGE} -sf http://host.openshell.internal:11434/api/tags 2>/dev/null`;
-    default:
-      return null;
-  }
-}
-
-function validateLocalProvider(provider, runCapture) {
-  const command = getLocalProviderHealthCheck(provider);
-  if (!command) {
-    return { ok: true };
-  }
-
-  const output = runCapture(command, { ignoreError: true });
-  if (!output) {
-    switch (provider) {
-      case "vllm-local":
-        return {
-          ok: false,
-          message: "Local vLLM was selected, but nothing is responding on http://localhost:8000.",
-        };
-      case "ollama-local":
-        return {
-          ok: false,
-          message:
-            "Local Ollama was selected, but nothing is responding on http://localhost:11434.",
-        };
-      default:
-        return { ok: false, message: "The selected local inference provider is unavailable." };
-    }
-  }
-
-  const containerCommand = getLocalProviderContainerReachabilityCheck(provider);
-  if (!containerCommand) {
-    return { ok: true };
-  }
-
-  const containerOutput = runCapture(containerCommand, { ignoreError: true });
-  if (containerOutput) {
-    return { ok: true };
-  }
-
-  switch (provider) {
-    case "vllm-local":
-      return {
-        ok: false,
-        message:
-          "Local vLLM is responding on localhost, but containers cannot reach http://host.openshell.internal:8000. Ensure the server is reachable from containers, not only from the host shell.",
-      };
-    case "ollama-local":
-      return {
-        ok: false,
-        message:
-          "Local Ollama is responding on localhost, but containers cannot reach http://host.openshell.internal:11434. Ensure Ollama listens on 0.0.0.0:11434 instead of 127.0.0.1 so sandboxes can reach it.",
-      };
-    default:
-      return {
-        ok: false,
-        message: "The selected local inference provider is unavailable from containers.",
-      };
-  }
-}
-
-function parseOllamaList(output) {
-  return String(output || "")
-    .split(/\r?\n/)
-    .map((line) => line.trim())
-    .filter(Boolean)
-    .filter((line) => !/^NAME\s+/i.test(line))
-    .map((line) => line.split(/\s{2,}/)[0])
-    .filter(Boolean);
-}
-
-function parseOllamaTags(output) {
-  try {
-    const parsed = JSON.parse(String(output || ""));
-    return Array.isArray(parsed?.models)
-      ? parsed.models.map((model) => model && model.name).filter(Boolean)
-      : [];
-  } catch {
-    return [];
-  }
-}
-
-function getOllamaModelOptions(runCapture) {
-  const tagsOutput = runCapture("curl -sf http://localhost:11434/api/tags 2>/dev/null", {
-    ignoreError: true,
-  });
-  const tagsParsed = parseOllamaTags(tagsOutput);
-  if (tagsParsed.length > 0) {
-    return tagsParsed;
-  }
-
-  const listOutput = runCapture("ollama list 2>/dev/null", { ignoreError: true });
-  return parseOllamaList(listOutput);
-}
-
-function getBootstrapOllamaModelOptions(gpu) {
-  const options = [SMALL_OLLAMA_MODEL];
-  if (gpu && gpu.totalMemoryMB >= LARGE_OLLAMA_MIN_MEMORY_MB) {
-    options.push(DEFAULT_OLLAMA_MODEL);
-  }
-  return options;
-}
-
-function getDefaultOllamaModel(runCapture, gpu = null) {
-  const models = getOllamaModelOptions(runCapture);
-  if (models.length === 0) {
-    const bootstrap = getBootstrapOllamaModelOptions(gpu);
-    return bootstrap[0];
-  }
-  return models.includes(DEFAULT_OLLAMA_MODEL) ? DEFAULT_OLLAMA_MODEL : models[0];
-}
-
-function getOllamaWarmupCommand(model, keepAlive = "15m") {
-  const payload = JSON.stringify({
-    model,
-    prompt: "hello",
-    stream: false,
-    keep_alive: keepAlive,
-  });
-  return `nohup curl -s http://localhost:11434/api/generate -H 'Content-Type: application/json' -d ${shellQuote(payload)} >/dev/null 2>&1 &`;
-}
-
-function getOllamaProbeCommand(model, timeoutSeconds = 120, keepAlive = "15m") {
-  const payload = JSON.stringify({
-    model,
-    prompt: "hello",
-    stream: false,
-    keep_alive: keepAlive,
-  });
-  return `curl -sS --max-time ${timeoutSeconds} http://localhost:11434/api/generate -H 'Content-Type: application/json' -d ${shellQuote(payload)} 2>/dev/null`;
-}
-
-function validateOllamaModel(model, runCapture) {
-  const output = runCapture(getOllamaProbeCommand(model), { ignoreError: true });
-  if (!output) {
-    return {
-      ok: false,
-      message:
-        `Selected Ollama model '${model}' did not answer the local probe in time. ` +
-        "It may still be loading, too large for the host, or otherwise unhealthy.",
-    };
-  }
-
-  try {
-    const parsed = JSON.parse(output);
-    if (parsed && typeof parsed.error === "string" && parsed.error.trim()) {
-      return {
-        ok: false,
-        message: `Selected Ollama model '${model}' failed the local probe: ${parsed.error.trim()}`,
-      };
-    }
-  } catch {
-    /* ignored */
-  }
-
-  return { ok: true };
-}
-
-module.exports = {
-  CONTAINER_REACHABILITY_IMAGE,
-  DEFAULT_OLLAMA_MODEL,
-  HOST_GATEWAY_URL,
-  LARGE_OLLAMA_MIN_MEMORY_MB,
-  SMALL_OLLAMA_MODEL,
-  getDefaultOllamaModel,
-  getBootstrapOllamaModelOptions,
-  getLocalProviderBaseUrl,
-  getLocalProviderValidationBaseUrl,
-  getLocalProviderContainerReachabilityCheck,
-  getLocalProviderHealthCheck,
-  getOllamaModelOptions,
-  parseOllamaTags,
-  getOllamaProbeCommand,
-  getOllamaWarmupCommand,
-  parseOllamaList,
-  validateOllamaModel,
-  validateLocalProvider,
-};
+module.exports = require("../../dist/lib/local-inference");
diff --git a/test/local-inference.test.js b/src/lib/local-inference.test.ts
similarity index 92%
rename from test/local-inference.test.js
rename to src/lib/local-inference.test.ts
index e028aa736..34040c814 100644
--- a/test/local-inference.test.js
+++ b/src/lib/local-inference.test.ts
@@ -3,6 +3,7 @@
 
 import { describe, it, expect } from "vitest";
 
+// Import from compiled dist/ for correct coverage attribution.
 import {
   CONTAINER_REACHABILITY_IMAGE,
   DEFAULT_OLLAMA_MODEL,
@@ -20,7 +21,7 @@ import {
   parseOllamaTags,
   validateOllamaModel,
   validateLocalProvider,
-} from "../bin/lib/local-inference";
+} from "../../dist/lib/local-inference";
 
 describe("local inference helpers", () => {
   it("returns the expected base URL for vllm-local", () => {
@@ -28,7 +29,9 @@ describe("local inference helpers", () => {
   });
 
   it("returns the expected base URL for ollama-local", () => {
-    expect(getLocalProviderBaseUrl("ollama-local")).toBe("http://host.openshell.internal:11434/v1");
+    expect(getLocalProviderBaseUrl("ollama-local")).toBe(
+      "http://host.openshell.internal:11434/v1",
+    );
   });
 
   it("returns null for unknown local provider URLs", () => {
@@ -111,6 +114,16 @@ describe("local inference helpers", () => {
     expect(validateLocalProvider("custom-provider", () => "")).toEqual({ ok: true });
   });
 
+  it("skips health check entirely for unknown providers", () => {
+    let callCount = 0;
+    const result = validateLocalProvider("custom-provider", () => {
+      callCount += 1;
+      return callCount <= 1 ? "ok" : "";
+    });
+    // custom-provider has no health check command, so it returns ok immediately
+    expect(result).toEqual({ ok: true });
+  });
+
   it("parses model names from ollama list output", () => {
     expect(
       parseOllamaList(
@@ -189,10 +202,9 @@ describe("local inference helpers", () => {
     expect(
       getBootstrapOllamaModelOptions({ totalMemoryMB: LARGE_OLLAMA_MIN_MEMORY_MB - 1 }),
     ).toEqual(["qwen2.5:7b"]);
-    expect(getBootstrapOllamaModelOptions({ totalMemoryMB: LARGE_OLLAMA_MIN_MEMORY_MB })).toEqual([
-      "qwen2.5:7b",
-      DEFAULT_OLLAMA_MODEL,
-    ]);
+    expect(
+      getBootstrapOllamaModelOptions({ totalMemoryMB: LARGE_OLLAMA_MIN_MEMORY_MB }),
+    ).toEqual(["qwen2.5:7b", DEFAULT_OLLAMA_MODEL]);
     expect(getDefaultOllamaModel(() => "", { totalMemoryMB: 16384 })).toBe("qwen2.5:7b");
   });
 
diff --git a/src/lib/local-inference.ts b/src/lib/local-inference.ts
new file mode 100644
index 000000000..9390bb70e
--- /dev/null
+++ b/src/lib/local-inference.ts
@@ -0,0 +1,237 @@
+// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * Local inference provider helpers — URL mappers, Ollama parsers,
+ * health checks, and command generators for vLLM and Ollama.
+ */
+
+// eslint-disable-next-line @typescript-eslint/no-require-imports
+const { shellQuote } = require("../../bin/lib/runner");
+
+export const HOST_GATEWAY_URL = "http://host.openshell.internal";
+export const CONTAINER_REACHABILITY_IMAGE = "curlimages/curl:8.10.1";
+export const DEFAULT_OLLAMA_MODEL = "nemotron-3-nano:30b";
+export const SMALL_OLLAMA_MODEL = "qwen2.5:7b";
+export const LARGE_OLLAMA_MIN_MEMORY_MB = 32768;
+
+export type RunCaptureFn = (cmd: string, opts?: { ignoreError?: boolean }) => string;
+
+export interface GpuInfo {
+  totalMemoryMB: number;
+}
+
+export interface ValidationResult {
+  ok: boolean;
+  message?: string;
+}
+
+export function getLocalProviderBaseUrl(provider: string): string | null {
+  switch (provider) {
+    case "vllm-local":
+      return `${HOST_GATEWAY_URL}:8000/v1`;
+    case "ollama-local":
+      return `${HOST_GATEWAY_URL}:11434/v1`;
+    default:
+      return null;
+  }
+}
+
+export function getLocalProviderValidationBaseUrl(provider: string): string | null {
+  switch (provider) {
+    case "vllm-local":
+      return "http://localhost:8000/v1";
+    case "ollama-local":
+      return "http://localhost:11434/v1";
+    default:
+      return null;
+  }
+}
+
+export function getLocalProviderHealthCheck(provider: string): string | null {
+  switch (provider) {
+    case "vllm-local":
+      return "curl -sf http://localhost:8000/v1/models 2>/dev/null";
+    case "ollama-local":
+      return "curl -sf http://localhost:11434/api/tags 2>/dev/null";
+    default:
+      return null;
+  }
+}
+
+export function getLocalProviderContainerReachabilityCheck(provider: string): string | null {
+  switch (provider) {
+    case "vllm-local":
+      return `docker run --rm --add-host host.openshell.internal:host-gateway ${CONTAINER_REACHABILITY_IMAGE} -sf http://host.openshell.internal:8000/v1/models 2>/dev/null`;
+    case "ollama-local":
+      return `docker run --rm --add-host host.openshell.internal:host-gateway ${CONTAINER_REACHABILITY_IMAGE} -sf http://host.openshell.internal:11434/api/tags 2>/dev/null`;
+    default:
+      return null;
+  }
+}
+
+export function validateLocalProvider(
+  provider: string,
+  runCapture: RunCaptureFn,
+): ValidationResult {
+  const command = getLocalProviderHealthCheck(provider);
+  if (!command) {
+    return { ok: true };
+  }
+
+  const output = runCapture(command, { ignoreError: true });
+  if (!output) {
+    switch (provider) {
+      case "vllm-local":
+        return {
+          ok: false,
+          message: "Local vLLM was selected, but nothing is responding on http://localhost:8000.",
+        };
+      case "ollama-local":
+        return {
+          ok: false,
+          message:
+            "Local Ollama was selected, but nothing is responding on http://localhost:11434.",
+        };
+      default:
+        return { ok: false, message: "The selected local inference provider is unavailable." };
+    }
+  }
+
+  const containerCommand = getLocalProviderContainerReachabilityCheck(provider);
+  if (!containerCommand) {
+    return { ok: true };
+  }
+
+  const containerOutput = runCapture(containerCommand, { ignoreError: true });
+  if (containerOutput) {
+    return { ok: true };
+  }
+
+  switch (provider) {
+    case "vllm-local":
+      return {
+        ok: false,
+        message:
+          "Local vLLM is responding on localhost, but containers cannot reach http://host.openshell.internal:8000. Ensure the server is reachable from containers, not only from the host shell.",
+      };
+    case "ollama-local":
+      return {
+        ok: false,
+        message:
+          "Local Ollama is responding on localhost, but containers cannot reach http://host.openshell.internal:11434. Ensure Ollama listens on 0.0.0.0:11434 instead of 127.0.0.1 so sandboxes can reach it.",
+      };
+    default:
+      return {
+        ok: false,
+        message: "The selected local inference provider is unavailable from containers.",
+      };
+  }
+}
+
+export function parseOllamaList(output: unknown): string[] {
+  return String(output || "")
+    .split(/\r?\n/)
+    .map((line) => line.trim())
+    .filter(Boolean)
+    .filter((line) => !/^NAME\s+/i.test(line))
+    .map((line) => line.split(/\s{2,}/)[0])
+    .filter(Boolean);
+}
+
+export function parseOllamaTags(output: unknown): string[] {
+  try {
+    const parsed = JSON.parse(String(output || ""));
+    return Array.isArray(parsed?.models)
+      ? parsed.models.map((model: { name?: string }) => model && model.name).filter(Boolean)
+      : [];
+  } catch {
+    return [];
+  }
+}
+
+export function getOllamaModelOptions(runCapture: RunCaptureFn): string[] {
+  const tagsOutput = runCapture("curl -sf http://localhost:11434/api/tags 2>/dev/null", {
+    ignoreError: true,
+  });
+  const tagsParsed = parseOllamaTags(tagsOutput);
+  if (tagsParsed.length > 0) {
+    return tagsParsed;
+  }
+
+  const listOutput = runCapture("ollama list 2>/dev/null", { ignoreError: true });
+  return parseOllamaList(listOutput);
+}
+
+export function getBootstrapOllamaModelOptions(gpu: GpuInfo | null): string[] {
+  const options = [SMALL_OLLAMA_MODEL];
+  if (gpu && gpu.totalMemoryMB >= LARGE_OLLAMA_MIN_MEMORY_MB) {
+    options.push(DEFAULT_OLLAMA_MODEL);
+  }
+  return options;
+}
+
+export function getDefaultOllamaModel(
+  runCapture: RunCaptureFn,
+  gpu: GpuInfo | null = null,
+): string {
+  const models = getOllamaModelOptions(runCapture);
+  if (models.length === 0) {
+    const bootstrap = getBootstrapOllamaModelOptions(gpu);
+    return bootstrap[0];
+  }
+  return models.includes(DEFAULT_OLLAMA_MODEL) ? DEFAULT_OLLAMA_MODEL : models[0];
+}
+
+export function getOllamaWarmupCommand(model: string, keepAlive = "15m"): string {
+  const payload = JSON.stringify({
+    model,
+    prompt: "hello",
+    stream: false,
+    keep_alive: keepAlive,
+  });
+  return `nohup curl -s http://localhost:11434/api/generate -H 'Content-Type: application/json' -d ${shellQuote(payload)} >/dev/null 2>&1 &`;
+}
+
+export function getOllamaProbeCommand(
+  model: string,
+  timeoutSeconds = 120,
+  keepAlive = "15m",
+): string {
+  const payload = JSON.stringify({
+    model,
+    prompt: "hello",
+    stream: false,
+    keep_alive: keepAlive,
+  });
+  return `curl -sS --max-time ${timeoutSeconds} http://localhost:11434/api/generate -H 'Content-Type: application/json' -d ${shellQuote(payload)} 2>/dev/null`;
+}
+
+export function validateOllamaModel(
+  model: string,
+  runCapture: RunCaptureFn,
+): ValidationResult {
+  const output = runCapture(getOllamaProbeCommand(model), { ignoreError: true });
+  if (!output) {
+    return {
+      ok: false,
+      message:
+        `Selected Ollama model '${model}' did not answer the local probe in time. ` +
+        "It may still be loading, too large for the host, or otherwise unhealthy.",
+    };
+  }
+
+  try {
+    const parsed = JSON.parse(output);
+    if (parsed && typeof parsed.error === "string" && parsed.error.trim()) {
+      return {
+        ok: false,
+        message: `Selected Ollama model '${model}' failed the local probe: ${parsed.error.trim()}`,
+      };
+    }
+  } catch {
+    /* ignored */
+  }
+
+  return { ok: true };
+}