diff --git a/README.md b/README.md index 570e692a9..bb9b6844e 100644 --- a/README.md +++ b/README.md @@ -58,9 +58,9 @@ The sandbox image is approximately 2.4 GB compressed. During image push, the Doc |----------|--------------------|-------| | Linux | Docker | Primary supported path. | | macOS (Apple Silicon) | Colima, Docker Desktop | Install Xcode Command Line Tools (`xcode-select --install`) and start the runtime before running the installer. | -| macOS (Intel) | Podman | Not supported yet. Depends on OpenShell support for Podman on macOS. | +| macOS (Intel) | Docker Desktop | Start the runtime before running the installer. | | Windows WSL | Docker Desktop (WSL backend) | Supported target path. | -| DGX Spark | Docker | Refer to the [DGX Spark setup guide](https://github.com/NVIDIA/NemoClaw/blob/main/spark-install.md) for cgroup v2 and Docker configuration. | +| DGX Spark | Docker | Use the standard installer and `nemoclaw onboard`. | ### Install NemoClaw and Onboard OpenClaw Agent diff --git a/bin/lib/onboard.js b/bin/lib/onboard.js index ccfb60085..c16df0f1c 100644 --- a/bin/lib/onboard.js +++ b/bin/lib/onboard.js @@ -35,12 +35,7 @@ const { getProviderSelectionConfig, parseGatewayInference, } = require("./inference-config"); -const { - inferContainerRuntime, - isUnsupportedMacosRuntime, - isWsl, - shouldPatchCoredns, -} = require("./platform"); +const { inferContainerRuntime, isWsl, shouldPatchCoredns } = require("./platform"); const { resolveOpenshell } = require("./resolve-openshell"); const { prompt, @@ -54,7 +49,13 @@ const nim = require("./nim"); const onboardSession = require("./onboard-session"); const policies = require("./policies"); const { ensureUsageNoticeConsent } = require("./usage-notice"); -const { checkPortAvailable, ensureSwap, getMemoryInfo } = require("./preflight"); +const { + assessHost, + checkPortAvailable, + ensureSwap, + getMemoryInfo, + planHostRemediation, +} = require("./preflight"); // Typed modules (compiled from src/lib/*.ts → dist/lib/*.js) const gatewayState = require("../../dist/lib/gateway-state"); @@ -1849,20 +1850,27 @@ function getResumeConfigConflicts(session, opts = {}) { return conflicts; } -function isDockerRunning() { - try { - runCapture("docker info", { ignoreError: false }); - return true; - } catch { - return false; - } -} - function getContainerRuntime() { const info = runCapture("docker info 2>/dev/null", { ignoreError: true }); return inferContainerRuntime(info); } +function printRemediationActions(actions) { + if (!Array.isArray(actions) || actions.length === 0) { + return; + } + + console.error(""); + console.error(" Suggested fix:"); + console.error(""); + for (const action of actions) { + console.error(` - ${action.title}: ${action.reason}`); + for (const command of action.commands || []) { + console.error(` ${command}`); + } + } +} + function isOpenshellInstalled() { return resolveOpenshell() !== null; } @@ -2025,24 +2033,27 @@ function getNonInteractiveModel(providerKey) { async function preflight() { step(1, 7, "Preflight checks"); - // Docker - if (!isDockerRunning()) { - console.error(" Docker is not running. Please start Docker and try again."); + const host = assessHost(); + + // Docker / runtime + if (!host.dockerReachable) { + console.error(" Docker is not reachable. Please fix Docker and try again."); + printRemediationActions(planHostRemediation(host)); process.exit(1); } console.log(" ✓ Docker is running"); - const runtime = getContainerRuntime(); - if (isUnsupportedMacosRuntime(runtime)) { - console.error(" Podman on macOS is not supported by NemoClaw at this time."); - console.error( - " OpenShell currently depends on Docker host-gateway behavior that Podman on macOS does not provide.", + if (host.runtime !== "unknown") { + console.log(` ✓ Container runtime: ${host.runtime}`); + } + if (host.isUnsupportedRuntime) { + console.warn( + " ! Podman is not a supported OpenShell runtime. NemoClaw will continue, but your experience may vary.", ); - console.error(" Use Colima or Docker Desktop on macOS instead."); - process.exit(1); + printRemediationActions(planHostRemediation(host)); } - if (runtime !== "unknown") { - console.log(` ✓ Container runtime: ${runtime}`); + if (host.notes.includes("Running under WSL")) { + console.log(" ⓘ Running under WSL"); } // OpenShell CLI diff --git a/bin/nemoclaw.js b/bin/nemoclaw.js index 8dd37d6bf..6b7bc61ca 100755 --- a/bin/nemoclaw.js +++ b/bin/nemoclaw.js @@ -23,7 +23,6 @@ const YW = _useColor ? "\x1b[1;33m" : ""; const { ROOT, - SCRIPTS, run, runCapture: _runCapture, runInteractive, @@ -32,12 +31,7 @@ const { } = require("./lib/runner"); const { resolveOpenshell } = require("./lib/resolve-openshell"); const { startGatewayForRecovery } = require("./lib/onboard"); -const { - ensureApiKey, - ensureGithubToken, - getCredential, - isRepoPrivate, -} = require("./lib/credentials"); +const { getCredential } = require("./lib/credentials"); const registry = require("./lib/registry"); const nim = require("./lib/nim"); const policies = require("./lib/policies"); @@ -46,6 +40,7 @@ const { getVersion } = require("./lib/version"); const onboardSession = require("./lib/onboard-session"); const { parseLiveSandboxNames } = require("./lib/runtime-recovery"); const { NOTICE_ACCEPT_ENV, NOTICE_ACCEPT_FLAG } = require("./lib/usage-notice"); +const { executeDeploy } = require("../dist/lib/deploy"); // ── Global commands ────────────────────────────────────────────── @@ -807,139 +802,33 @@ async function setup(args = []) { await onboard(args); } -async function setupSpark() { - // setup-spark.sh configures Docker cgroups — it does not use NVIDIA_API_KEY. - run(`sudo bash "${SCRIPTS}/setup-spark.sh"`); -} - -// eslint-disable-next-line complexity -async function deploy(instanceName) { - if (!instanceName) { - console.error(" Usage: nemoclaw deploy "); - console.error(""); - console.error(" Examples:"); - console.error(" nemoclaw deploy my-gpu-box"); - console.error(" nemoclaw deploy nemoclaw-prod"); - console.error(" nemoclaw deploy nemoclaw-test"); - process.exit(1); - } - await ensureApiKey(); - if (isRepoPrivate("NVIDIA/OpenShell")) { - await ensureGithubToken(); - } - validateName(instanceName, "instance name"); - const name = instanceName; - const qname = shellQuote(name); - const gpu = process.env.NEMOCLAW_GPU || "a2-highgpu-1g:nvidia-tesla-a100:1"; - +async function setupSpark(args = []) { console.log(""); - console.log(` Deploying NemoClaw to Brev instance: ${name}`); + console.log(" ⚠ `nemoclaw setup-spark` is deprecated."); + console.log(" Current OpenShell releases handle the old DGX Spark cgroup issue themselves."); + console.log(" Use `nemoclaw onboard` instead."); console.log(""); + await onboard(args); +} - try { - execFileSync("which", ["brev"], { stdio: "ignore" }); - } catch { - console.error("brev CLI not found. Install: https://brev.nvidia.com"); - process.exit(1); - } - - let exists = false; - try { - const out = execFileSync("brev", ["ls"], { encoding: "utf-8" }); - exists = out.includes(name); - } catch (err) { - if (err.stdout && err.stdout.includes(name)) exists = true; - if (err.stderr && err.stderr.includes(name)) exists = true; - } - - if (!exists) { - console.log(` Creating Brev instance '${name}' (${gpu})...`); - run(`brev create ${qname} --gpu ${shellQuote(gpu)}`); - } else { - console.log(` Brev instance '${name}' already exists.`); - } - - run(`brev refresh`, { ignoreError: true }); - - process.stdout.write(` Waiting for SSH `); - for (let i = 0; i < 60; i++) { - try { - execFileSync( - "ssh", - ["-o", "ConnectTimeout=5", "-o", "StrictHostKeyChecking=no", name, "echo", "ok"], - { encoding: "utf-8", stdio: "ignore" }, - ); - process.stdout.write(` ${G}✓${R}\n`); - break; - } catch { - if (i === 59) { - process.stdout.write("\n"); - console.error(` Timed out waiting for SSH to ${name}`); - process.exit(1); - } - process.stdout.write("."); - spawnSync("sleep", ["3"]); - } - } - - console.log(" Syncing NemoClaw to VM..."); - run( - `ssh -o StrictHostKeyChecking=no -o LogLevel=ERROR ${qname} 'mkdir -p /home/ubuntu/nemoclaw'`, - ); - run( - `rsync -az --delete --exclude node_modules --exclude .git --exclude src -e "ssh -o StrictHostKeyChecking=no -o LogLevel=ERROR" "${ROOT}/scripts" "${ROOT}/Dockerfile" "${ROOT}/nemoclaw" "${ROOT}/nemoclaw-blueprint" "${ROOT}/bin" "${ROOT}/package.json" ${qname}:/home/ubuntu/nemoclaw/`, - ); - - const envLines = [`NVIDIA_API_KEY=${shellQuote(process.env.NVIDIA_API_KEY || "")}`]; - const ghToken = process.env.GITHUB_TOKEN; - if (ghToken) envLines.push(`GITHUB_TOKEN=${shellQuote(ghToken)}`); - const tgToken = getCredential("TELEGRAM_BOT_TOKEN"); - if (tgToken) envLines.push(`TELEGRAM_BOT_TOKEN=${shellQuote(tgToken)}`); - const discordToken = getCredential("DISCORD_BOT_TOKEN"); - if (discordToken) envLines.push(`DISCORD_BOT_TOKEN=${shellQuote(discordToken)}`); - const slackToken = getCredential("SLACK_BOT_TOKEN"); - if (slackToken) envLines.push(`SLACK_BOT_TOKEN=${shellQuote(slackToken)}`); - const envDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-env-")); - const envTmp = path.join(envDir, "env"); - fs.writeFileSync(envTmp, envLines.join("\n") + "\n", { mode: 0o600 }); - try { - run( - `scp -q -o StrictHostKeyChecking=no -o LogLevel=ERROR ${shellQuote(envTmp)} ${qname}:/home/ubuntu/nemoclaw/.env`, - ); - run( - `ssh -q -o StrictHostKeyChecking=no -o LogLevel=ERROR ${qname} 'chmod 600 /home/ubuntu/nemoclaw/.env'`, - ); - } finally { - try { - fs.unlinkSync(envTmp); - } catch { - /* ignored */ - } - try { - fs.rmdirSync(envDir); - } catch { - /* ignored */ - } - } - - console.log(" Running setup..."); - runInteractive( - `ssh -t -o StrictHostKeyChecking=no -o LogLevel=ERROR ${qname} 'cd /home/ubuntu/nemoclaw && set -a && . .env && set +a && bash scripts/brev-setup.sh'`, - ); - - if (tgToken) { - console.log(" Starting services..."); - run( - `ssh -o StrictHostKeyChecking=no -o LogLevel=ERROR ${qname} 'cd /home/ubuntu/nemoclaw && set -a && . .env && set +a && bash scripts/start-services.sh'`, - ); - } - - console.log(""); - console.log(" Connecting to sandbox..."); - console.log(""); - runInteractive( - `ssh -t -o StrictHostKeyChecking=no -o LogLevel=ERROR ${qname} 'cd /home/ubuntu/nemoclaw && set -a && . .env && set +a && openshell sandbox connect nemoclaw'`, - ); +async function deploy(instanceName) { + await executeDeploy({ + instanceName, + env: process.env, + rootDir: ROOT, + getCredential, + validateName, + shellQuote, + run, + runInteractive, + execFileSync: (file, args, opts = {}) => + String(execFileSync(file, args, { encoding: "utf-8", ...opts })), + spawnSync, + log: console.log, + error: console.error, + stdoutWrite: (message) => process.stdout.write(message), + exit: (code) => process.exit(code), + }); } async function start() { @@ -1374,7 +1263,6 @@ function help() { ${G}Getting Started:${R} ${B}nemoclaw onboard${R} Configure inference endpoint and credentials ${D}(non-interactive: ${NOTICE_ACCEPT_FLAG} or ${NOTICE_ACCEPT_ENV}=1)${R} - nemoclaw setup-spark Set up on DGX Spark ${D}(fixes cgroup v2 + Docker)${R} ${G}Sandbox Management:${R} ${B}nemoclaw list${R} List all sandboxes @@ -1387,8 +1275,10 @@ function help() { nemoclaw policy-add Add a network or filesystem policy preset nemoclaw policy-list List presets ${D}(● = applied)${R} - ${G}Deploy:${R} - nemoclaw deploy Deploy to a Brev VM and start services + ${G}Compatibility Commands:${R} + nemoclaw setup Deprecated alias for ${B}nemoclaw onboard${R} + nemoclaw setup-spark Deprecated alias for ${B}nemoclaw onboard${R} + nemoclaw deploy Deprecated Brev-specific bootstrap path ${G}Services:${R} nemoclaw start Start auxiliary services ${D}(Telegram, tunnel)${R} @@ -1435,7 +1325,7 @@ const [cmd, ...args] = process.argv.slice(2); await setup(args); break; case "setup-spark": - await setupSpark(); + await setupSpark(args); break; case "deploy": await deploy(args[0]); diff --git a/docs/deployment/deploy-to-remote-gpu.md b/docs/deployment/deploy-to-remote-gpu.md index 34a945393..3ce3da5dd 100644 --- a/docs/deployment/deploy-to-remote-gpu.md +++ b/docs/deployment/deploy-to-remote-gpu.md @@ -3,8 +3,8 @@ title: page: "Deploy NemoClaw to a Remote GPU Instance with Brev" nav: "Deploy to Remote GPU" description: - main: "Provision a remote GPU VM with NemoClaw using Brev deployment." - agent: "Provisions a remote GPU VM with NemoClaw using Brev deployment. Use when deploying to a cloud GPU, setting up a remote NemoClaw instance, or configuring Brev." + main: "Run NemoClaw on a remote GPU instance and understand the legacy Brev compatibility flow." + agent: "Explains how to run NemoClaw on a remote GPU instance, including the deprecated Brev compatibility path and the preferred installer plus onboard flow." keywords: ["deploy nemoclaw remote gpu", "nemoclaw brev cloud deployment"] topics: ["generative_ai", "ai_agents"] tags: ["openclaw", "openshell", "deployment", "gpu", "nemoclaw"] @@ -23,11 +23,11 @@ status: published # Deploy NemoClaw to a Remote GPU Instance Run NemoClaw on a remote GPU instance through [Brev](https://brev.nvidia.com). -The deploy command provisions the VM, installs dependencies, and connects you to a running sandbox. +The preferred path is to provision the VM, run the standard NemoClaw installer on that host, and then run `nemoclaw onboard`. ## Quick Start -If your Brev instance is already up and you want to try NemoClaw immediately, start with the sandbox chat flow: +If your Brev instance is already up and has already been onboarded with a sandbox, start with the standard sandbox chat flow: ```console $ nemoclaw my-assistant connect @@ -35,22 +35,24 @@ $ openclaw tui ``` This gets you into the sandbox shell first and opens the OpenClaw chat UI right away. +If the VM is fresh, run the standard installer on that host and then run `nemoclaw onboard` before trying `nemoclaw my-assistant connect`. -If you are connecting from your local machine and still need to provision the remote VM, use `nemoclaw deploy ` as described below. +If you are connecting from your local machine and still need to provision the remote VM, you can still use `nemoclaw deploy ` as the legacy compatibility path described below. ## Prerequisites - The [Brev CLI](https://brev.nvidia.com) installed and authenticated. -- An NVIDIA API key from [build.nvidia.com](https://build.nvidia.com). -- NemoClaw installed locally. Follow the [Quickstart](../get-started/quickstart.md) install steps. +- A provider credential for the inference backend you want to use during onboarding. +- NemoClaw installed locally if you plan to use the deprecated `nemoclaw deploy` wrapper. Otherwise, install NemoClaw directly on the remote host after provisioning it. ## Deploy the Instance :::{warning} -The `nemoclaw deploy` command is experimental and may not work as expected. +The `nemoclaw deploy` command is deprecated. +Prefer provisioning the remote host separately, then running the standard NemoClaw installer and `nemoclaw onboard` on that host. ::: -Create a Brev instance and run the NemoClaw setup: +Create a Brev instance and run the legacy compatibility flow: ```console $ nemoclaw deploy @@ -58,17 +60,19 @@ $ nemoclaw deploy Replace `` with a name for your remote instance, for example `my-gpu-box`. -The deploy script performs the following steps on the VM: +The legacy compatibility flow performs the following steps on the VM: 1. Installs Docker and the NVIDIA Container Toolkit if a GPU is present. 2. Installs the OpenShell CLI. 3. Runs `nemoclaw onboard` (the setup wizard) to create the gateway, register providers, and launch the sandbox. -4. Starts auxiliary services, such as the Telegram bridge and cloudflared tunnel. +4. Starts auxiliary services, such as the Telegram bridge and cloudflared tunnel, when those tools are available. + +By default, the compatibility wrapper asks Brev to provision on `gcp`. Override this with `NEMOCLAW_BREV_PROVIDER` if you need a different Brev cloud provider. ## Connect to the Remote Sandbox After deployment finishes, the deploy command opens an interactive shell inside the remote sandbox. -To reconnect after closing the session, run the deploy command again: +To reconnect after closing the session, run the command again: ```console $ nemoclaw deploy @@ -108,8 +112,8 @@ default), so no extra configuration is needed. :::{note} On Brev, set `CHAT_UI_URL` in the launchable environment configuration so it is -available when the setup script builds the sandbox image. If `CHAT_UI_URL` is -not set on a headless host, `brev-setup.sh` prints a warning. +available when the installer builds the sandbox image. If `CHAT_UI_URL` is not +set on a headless host, the compatibility wrapper prints a warning. ::: ## GPU Configuration diff --git a/docs/get-started/quickstart.md b/docs/get-started/quickstart.md index 182120b73..753b59f91 100644 --- a/docs/get-started/quickstart.md +++ b/docs/get-started/quickstart.md @@ -61,9 +61,9 @@ The sandbox image is approximately 2.4 GB compressed. During image push, the Doc |----------|--------------------|-------| | Linux | Docker | Primary supported path. | | macOS (Apple Silicon) | Colima, Docker Desktop | Install Xcode Command Line Tools (`xcode-select --install`) and start the runtime before running the installer. | -| macOS (Intel) | Podman | Not supported yet. Depends on OpenShell support for Podman on macOS. | +| macOS (Intel) | Docker Desktop | Start the runtime before running the installer. | | Windows WSL | Docker Desktop (WSL backend) | Supported target path. | -| DGX Spark | Docker | Refer to the [DGX Spark setup guide](https://github.com/NVIDIA/NemoClaw/blob/main/spark-install.md) for cgroup v2 and Docker configuration. | +| DGX Spark | Docker | Use the standard installer and `nemoclaw onboard`. | ## Install NemoClaw and Onboard OpenClaw Agent diff --git a/docs/reference/commands.md b/docs/reference/commands.md index f5723faa9..c312c6991 100644 --- a/docs/reference/commands.md +++ b/docs/reference/commands.md @@ -102,7 +102,7 @@ Names must follow RFC 1123 subdomain rules: lowercase alphanumeric characters an Uppercase letters are automatically lowercased. Before creating the gateway, the wizard runs preflight checks. -On systems with cgroup v2 (Ubuntu 24.04, DGX Spark, WSL2), it verifies that Docker is configured with `"default-cgroupns-mode": "host"` and provides fix instructions if the setting is missing. +It verifies that Docker is reachable, warns on unsupported runtimes such as Podman, and prints host remediation guidance when prerequisites are missing. ### `nemoclaw list` @@ -115,11 +115,12 @@ $ nemoclaw list ### `nemoclaw deploy` :::{warning} -The `nemoclaw deploy` command is experimental and may not work as expected. +The `nemoclaw deploy` command is deprecated. +Prefer provisioning the remote host separately, then running the standard NemoClaw installer and `nemoclaw onboard` on that host. ::: Deploy NemoClaw to a remote GPU instance through [Brev](https://brev.nvidia.com). -The deploy script installs Docker, NVIDIA Container Toolkit if a GPU is present, and OpenShell on the VM, then runs `nemoclaw onboard` and connects to the sandbox. +This command remains as a compatibility wrapper for the older Brev-specific bootstrap flow. ```console $ nemoclaw deploy @@ -221,13 +222,15 @@ $ nemoclaw status ### `nemoclaw setup-spark` -Set up NemoClaw on DGX Spark. -This command applies cgroup v2 and Docker fixes required for Ubuntu 24.04. -Run with `sudo` on the Spark host. -After the fixes complete, the script prompts you to run `nemoclaw onboard` to continue setup. +:::{warning} +The `nemoclaw setup-spark` command is deprecated. +Use the standard installer and run `nemoclaw onboard` instead, because current OpenShell releases handle the older DGX Spark cgroup behavior. +::: + +This command remains as a compatibility alias to `nemoclaw onboard`. ```console -$ sudo nemoclaw setup-spark +$ nemoclaw setup-spark ``` ### `nemoclaw debug` diff --git a/docs/reference/troubleshooting.md b/docs/reference/troubleshooting.md index 676371528..c1a7d68af 100644 --- a/docs/reference/troubleshooting.md +++ b/docs/reference/troubleshooting.md @@ -123,16 +123,18 @@ Then retry onboarding. ### Cgroup v2 errors during onboard -On Ubuntu 24.04, DGX Spark, and WSL2, Docker may not be configured for cgroup v2 delegation. -The onboard preflight check detects this and fails with a clear error message. +Older NemoClaw releases relied on a Docker cgroup workaround on Ubuntu 24.04, DGX Spark, and WSL2. +Current OpenShell releases handle that behavior themselves, so NemoClaw no longer requires a Spark-specific setup step. -Run the Spark setup script to fix the Docker cgroup configuration, then retry onboarding: +If onboarding reports that Docker is missing or unreachable, fix Docker first and retry onboarding: ```console -$ sudo nemoclaw setup-spark $ nemoclaw onboard ``` +If you are using Podman, NemoClaw warns and continues, but OpenShell officially documents Docker-based runtimes only. +If onboarding or sandbox lifecycle fails, switch to Docker Desktop, Colima, or Docker Engine and rerun onboarding. + ### Invalid sandbox name Sandbox names must follow RFC 1123 subdomain rules: lowercase alphanumeric characters and hyphens only, and must start and end with an alphanumeric character. diff --git a/scripts/brev-setup.sh b/scripts/brev-setup.sh deleted file mode 100755 index 29924d1a6..000000000 --- a/scripts/brev-setup.sh +++ /dev/null @@ -1,209 +0,0 @@ -#!/usr/bin/env bash -# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Brev VM bootstrap — installs prerequisites then runs nemoclaw onboard. -# -# Run on a fresh Brev VM: -# export NVIDIA_API_KEY=nvapi-... -# ./scripts/brev-setup.sh -# -# What it does: -# 1. Installs Docker (if missing) -# 2. Installs NVIDIA Container Toolkit (if GPU present) -# 3. Installs openshell CLI from GitHub release (binary, no Rust build) -# 4. Installs nemoclaw CLI and runs nemoclaw onboard - -set -euo pipefail - -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -NC='\033[0m' - -_ts() { date '+%H:%M:%S'; } -info() { echo -e "${GREEN}[$(_ts) brev]${NC} $1"; } -warn() { echo -e "${YELLOW}[$(_ts) brev]${NC} $1"; } -fail() { - echo -e "${RED}[$(_ts) brev]${NC} $1" - exit 1 -} - -SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" - -[ -n "${NVIDIA_API_KEY:-}" ] || fail "NVIDIA_API_KEY not set" - -# Suppress needrestart noise from apt (Scanning processes, No services need...) -export NEEDRESTART_MODE=a -export DEBIAN_FRONTEND=noninteractive - -# --- 0. Node.js (needed for services) --- -if ! command -v node >/dev/null 2>&1; then - info "Installing Node.js..." - NODESOURCE_URL="https://deb.nodesource.com/setup_22.x" - NODESOURCE_SHA256="575583bbac2fccc0b5edd0dbc03e222d9f9dc8d724da996d22754d6411104fd1" - ( - tmpdir="$(mktemp -d)" - trap 'rm -rf "$tmpdir"' EXIT - curl -fsSL "$NODESOURCE_URL" -o "$tmpdir/setup_node.sh" - if command -v sha256sum >/dev/null 2>&1; then - echo "$NODESOURCE_SHA256 $tmpdir/setup_node.sh" | sha256sum -c - >/dev/null \ - || fail "NodeSource installer checksum mismatch — expected $NODESOURCE_SHA256" - elif command -v shasum >/dev/null 2>&1; then - echo "$NODESOURCE_SHA256 $tmpdir/setup_node.sh" | shasum -a 256 -c - >/dev/null \ - || fail "NodeSource installer checksum mismatch — expected $NODESOURCE_SHA256" - else - fail "No SHA-256 verification tool found (need sha256sum or shasum)" - fi - sudo -E bash "$tmpdir/setup_node.sh" >/dev/null 2>&1 - ) - sudo apt-get install -y -qq nodejs >/dev/null 2>&1 - info "Node.js $(node --version) installed" -else - info "Node.js already installed: $(node --version)" -fi - -# --- 1. Docker --- -if ! command -v docker >/dev/null 2>&1; then - info "Installing Docker..." - sudo apt-get update -qq >/dev/null 2>&1 - sudo apt-get install -y -qq docker.io >/dev/null 2>&1 - sudo usermod -aG docker "$(whoami)" - info "Docker installed" -else - info "Docker already installed" -fi - -# --- 2. NVIDIA Container Toolkit (if GPU present) --- -if command -v nvidia-smi >/dev/null 2>&1; then - if ! dpkg -s nvidia-container-toolkit >/dev/null 2>&1; then - info "Installing NVIDIA Container Toolkit..." - curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey \ - | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg - curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list \ - | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' \ - | sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list >/dev/null - sudo apt-get update -qq >/dev/null 2>&1 - sudo apt-get install -y -qq nvidia-container-toolkit >/dev/null 2>&1 - sudo nvidia-ctk runtime configure --runtime=docker >/dev/null 2>&1 - sudo systemctl restart docker - info "NVIDIA Container Toolkit installed" - else - info "NVIDIA Container Toolkit already installed" - fi -fi - -# --- 3. openshell CLI (binary release, not pip) --- -if ! command -v openshell >/dev/null 2>&1; then - info "Installing openshell CLI from GitHub release..." - if ! command -v gh >/dev/null 2>&1; then - sudo apt-get update -qq >/dev/null 2>&1 - sudo apt-get install -y -qq gh >/dev/null 2>&1 - fi - ARCH="$(uname -m)" - case "$ARCH" in - x86_64 | amd64) ASSET="openshell-x86_64-unknown-linux-musl.tar.gz" ;; - aarch64 | arm64) ASSET="openshell-aarch64-unknown-linux-musl.tar.gz" ;; - *) fail "Unsupported architecture: $ARCH" ;; - esac - tmpdir="$(mktemp -d)" - GH_TOKEN="${GITHUB_TOKEN:-}" gh release download --repo NVIDIA/OpenShell \ - --pattern "$ASSET" --dir "$tmpdir" - tar xzf "$tmpdir/$ASSET" -C "$tmpdir" - sudo install -m 755 "$tmpdir/openshell" /usr/local/bin/openshell - rm -rf "$tmpdir" - info "openshell $(openshell --version) installed" -else - info "openshell already installed: $(openshell --version)" -fi - -# --- 3b. cloudflared (for public tunnel) --- -if ! command -v cloudflared >/dev/null 2>&1; then - info "Installing cloudflared..." - CF_ARCH="$(uname -m)" - case "$CF_ARCH" in - x86_64 | amd64) CF_ARCH="amd64" ;; - aarch64 | arm64) CF_ARCH="arm64" ;; - *) fail "Unsupported architecture for cloudflared: $CF_ARCH" ;; - esac - tmpdir=$(mktemp -d) - curl -fsSL "https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-${CF_ARCH}" -o "$tmpdir/cloudflared" - sudo install -m 755 "$tmpdir/cloudflared" /usr/local/bin/cloudflared - rm -rf "$tmpdir" - info "cloudflared $(cloudflared --version 2>&1 | head -1) installed" -else - info "cloudflared already installed" -fi - -# --- 4. vLLM (local inference, if GPU present) --- -VLLM_MODEL="nvidia/nemotron-3-nano-30b-a3b" -if [ "${SKIP_VLLM:-}" = "1" ]; then - info "Skipping vLLM install (SKIP_VLLM=1)" -elif command -v nvidia-smi >/dev/null 2>&1; then - if ! python3 -c "import vllm" 2>/dev/null; then - info "Installing vLLM..." - if ! command -v pip3 >/dev/null 2>&1; then - sudo apt-get install -y -qq python3-pip >/dev/null 2>&1 - fi - pip3 install --break-system-packages vllm 2>/dev/null || pip3 install vllm - info "vLLM installed" - else - info "vLLM already installed" - fi - - # Start vLLM if not already running - if curl -s http://localhost:8000/v1/models >/dev/null 2>&1; then - info "vLLM already running on :8000" - elif python3 -c "import vllm" 2>/dev/null; then - info "Starting vLLM with $VLLM_MODEL..." - nohup python3 -m vllm.entrypoints.openai.api_server \ - --model "$VLLM_MODEL" \ - --port 8000 \ - --host 0.0.0.0 \ - >/tmp/vllm-server.log 2>&1 & - VLLM_PID=$! - info "Waiting for vLLM to load model (this can take a few minutes)..." - for _ in $(seq 1 120); do - if curl -s http://localhost:8000/v1/models >/dev/null 2>&1; then - info "vLLM ready (PID $VLLM_PID)" - break - fi - if ! kill -0 "$VLLM_PID" 2>/dev/null; then - warn "vLLM exited. Check /tmp/vllm-server.log" - break - fi - sleep 2 - done - fi -fi - -# --- 5. Install nemoclaw CLI and run onboard --- -REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" - -info "Installing nemoclaw CLI..." -export npm_config_prefix="$HOME/.local" -export PATH="$HOME/.local/bin:$PATH" -(cd "$REPO_DIR/nemoclaw" && npm install && npm run build) >/dev/null 2>&1 -(cd "$REPO_DIR" && npm install --ignore-scripts && npm link) >/dev/null 2>&1 -info "nemoclaw $(nemoclaw --version) installed" - -# Use sg docker to ensure docker group is active (usermod -aG doesn't -# take effect in the current session without re-login) - -# CHAT_UI_URL tells onboard which browser origin to allow in the gateway -# config. On Brev, the launchable config should set this to the public URL -# (e.g. https://openclaw0-.brevlab.com). Without it the dashboard -# rejects remote browsers with "origin not allowed". -# Ref: https://github.com/NVIDIA/NemoClaw/issues/795 -if [ -n "${CHAT_UI_URL:-}" ]; then - export CHAT_UI_URL - info "CHAT_UI_URL=${CHAT_UI_URL}" -elif [ -z "${DISPLAY:-}" ] && [ ! -e /tmp/.X11-unix ]; then - warn "CHAT_UI_URL is not set. Remote browser access will fail with" - warn "'origin not allowed' unless you set CHAT_UI_URL to the public URL" - warn "of this instance (e.g. https://openclaw0-.brevlab.com)." -fi - -info "Running nemoclaw onboard..." -export NVIDIA_API_KEY -exec sg docker -c "nemoclaw onboard --non-interactive" diff --git a/scripts/install.sh b/scripts/install.sh index 8bb5714c6..9bba08793 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -937,6 +937,81 @@ verify_nemoclaw() { # --------------------------------------------------------------------------- # 5. Onboard # --------------------------------------------------------------------------- +run_installer_host_preflight() { + local preflight_module="${NEMOCLAW_SOURCE_ROOT}/dist/lib/preflight.js" + if ! command_exists node || [[ ! -f "$preflight_module" ]]; then + return 0 + fi + + local output status + if output="$( + # shellcheck disable=SC2016 + node -e ' + const preflightPath = process.argv[1]; + try { + const { assessHost, planHostRemediation } = require(preflightPath); + const host = assessHost(); + const actions = planHostRemediation(host); + const blockingActions = actions.filter((action) => action && action.blocking); + const infoLines = []; + const actionLines = []; + if (host.runtime && host.runtime !== "unknown") { + infoLines.push(`Detected container runtime: ${host.runtime}`); + } + if (host.notes && host.notes.includes("Running under WSL")) { + infoLines.push("Running under WSL"); + } + for (const action of actions) { + actionLines.push(`- ${action.title}: ${action.reason}`); + for (const command of action.commands || []) { + actionLines.push(` ${command}`); + } + } + if (infoLines.length > 0) { + process.stdout.write(`__INFO__\n${infoLines.join("\n")}\n`); + } + if (actionLines.length > 0) { + process.stdout.write(`__ACTIONS__\n${actionLines.join("\n")}`); + } + process.exit(blockingActions.length > 0 ? 10 : 0); + } catch { + process.exit(0); + } + ' "$preflight_module" + )"; then + status=0 + else + status=$? + fi + + if [[ -n "$output" ]]; then + local info_output="" action_output="" + info_output="$(printf "%s\n" "$output" | awk 'BEGIN{mode=0} /^__INFO__$/ {mode=1; next} /^__ACTIONS__$/ {mode=0} mode {print}')" + action_output="$(printf "%s\n" "$output" | awk 'BEGIN{mode=0} /^__ACTIONS__$/ {mode=1; next} mode {print}')" + echo "" + if [[ -n "$info_output" ]]; then + while IFS= read -r line; do + [[ -n "$line" ]] && printf " %s\n" "$line" + done <<<"$info_output" + fi + if [[ "$status" -eq 10 ]]; then + warn "Host preflight found issues that will prevent onboarding right now." + if [[ -n "$action_output" ]]; then + while IFS= read -r line; do + [[ -n "$line" ]] && printf " %s\n" "$line" + done <<<"$action_output" + fi + elif [[ -n "$action_output" ]]; then + warn "Host preflight found warnings." + while IFS= read -r line; do + [[ -n "$line" ]] && printf " %s\n" "$line" + done <<<"$action_output" + fi + fi + + [[ "$status" -ne 10 ]] +} + run_onboard() { show_usage_notice info "Running nemoclaw onboard…" @@ -1058,8 +1133,12 @@ main() { step 3 "Onboarding" if command_exists nemoclaw; then - run_onboard - ONBOARD_RAN=true + if run_installer_host_preflight; then + run_onboard + ONBOARD_RAN=true + else + warn "Skipping onboarding until the host prerequisites above are fixed." + fi else warn "Skipping onboarding — this shell still cannot resolve 'nemoclaw'." fi diff --git a/scripts/setup-spark.sh b/scripts/setup-spark.sh deleted file mode 100755 index 6e4204258..000000000 --- a/scripts/setup-spark.sh +++ /dev/null @@ -1,69 +0,0 @@ -#!/usr/bin/env bash -# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# NemoClaw setup for DGX Spark devices. -# -# Ensures the current user is in the docker group so NemoClaw can -# manage containers without sudo. -# -# Usage: -# sudo bash scripts/setup-spark.sh -# # or via curl: -# curl -fsSL https://raw.githubusercontent.com/NVIDIA/NemoClaw/main/scripts/setup-spark.sh | sudo bash -# -# What it does: -# 1. Adds current user to docker group (avoids sudo for everything else) - -set -euo pipefail - -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -NC='\033[0m' - -info() { echo -e "${GREEN}>>>${NC} $1"; } -warn() { echo -e "${YELLOW}>>>${NC} $1"; } -fail() { - echo -e "${RED}>>>${NC} $1" - exit 1 -} - -# ── Pre-flight checks ───────────────────────────────────────────── - -if [ "$(uname -s)" != "Linux" ]; then - fail "This script is for DGX Spark (Linux). Use 'nemoclaw setup' for macOS." -fi - -if [ "$(id -u)" -ne 0 ]; then - fail "Must run as root: sudo nemoclaw setup-spark" -fi - -# Detect the real user (not root) for docker group add -REAL_USER="${SUDO_USER:-$(logname 2>/dev/null || echo "")}" -if [ -z "$REAL_USER" ]; then - warn "Could not detect non-root user. Docker group will not be configured." -fi - -command -v docker >/dev/null || fail "Docker not found. DGX Spark should have Docker pre-installed." - -# ── 1. Docker group ─────────────────────────────────────────────── - -if [ -n "$REAL_USER" ]; then - if id -nG "$REAL_USER" | grep -qw docker; then - info "User '$REAL_USER' already in docker group" - else - info "Adding '$REAL_USER' to docker group..." - usermod -aG docker "$REAL_USER" - DOCKER_GROUP_ADDED=true - fi -fi - -# ── 2. Next steps ───────────────────────────────────────────────── - -echo "" -if [ "${DOCKER_GROUP_ADDED:-}" = true ]; then - warn "Docker group was just added. You must open a new terminal (or run 'newgrp docker') before continuing." -else - info "DGX Spark Docker configuration complete." -fi diff --git a/scripts/start-services.sh b/scripts/start-services.sh index 0c64d1341..ede47bee2 100755 --- a/scripts/start-services.sh +++ b/scripts/start-services.sh @@ -162,7 +162,7 @@ do_start() { start_service cloudflared \ cloudflared tunnel --url "http://localhost:$DASHBOARD_PORT" else - warn "cloudflared not found — no public URL. Install: brev-setup.sh or manually." + warn "cloudflared not found — no public URL. Install it separately if you need a public tunnel." fi # Wait for cloudflared to publish URL diff --git a/spark-install.md b/spark-install.md index 94cf265bf..f9fbbe7ca 100644 --- a/spark-install.md +++ b/spark-install.md @@ -2,7 +2,7 @@ > **WIP** — This page is actively being updated as we work through Spark installs. Expect changes. -This guide walks you through installing and running NemoClaw on an NVIDIA DGX Spark. DGX Spark ships with Ubuntu 24.04 and Docker pre-installed; the steps below handle the remaining Spark-specific configuration so you can get from zero to a working sandbox. +This guide walks you through installing and running NemoClaw on an NVIDIA DGX Spark. DGX Spark ships with Ubuntu 24.04 and Docker pre-installed, and current OpenShell releases no longer require a Spark-specific Docker cgroup workaround. ## Prerequisites @@ -10,16 +10,21 @@ Before starting, make sure you have: - **Docker** (pre-installed on DGX Spark, v28.x/29.x) - **Node.js 22** (installed automatically by the NemoClaw installer) -- **OpenShell CLI** (installed automatically by the NemoClaw installer) +- **OpenShell CLI** (installed automatically during NemoClaw onboarding if missing) - **API key** (cloud inference only) — the onboarding wizard prompts for a provider and key during setup. For example, an NVIDIA API key from [build.nvidia.com](https://build.nvidia.com) for NVIDIA Endpoints, or an OpenAI, Anthropic, or Gemini key for those providers. **If you plan to use local inference with Ollama instead, no API key is needed** — see [Local Inference with Ollama](#local-inference-with-ollama) to set up Ollama before installing NemoClaw. ## Quick Start ```bash -# Spark-specific setup (requires sudo) -curl -fsSL https://raw.githubusercontent.com/NVIDIA/NemoClaw/main/scripts/setup-spark.sh | sudo bash +# Clone NemoClaw: +git clone https://github.com/NVIDIA/NemoClaw.git +cd NemoClaw -# Install NemoClaw +# Install NemoClaw. The standard installer and onboarding flow handle the +# OpenShell CLI and current DGX Spark cgroup behavior automatically: +./install.sh + +# Alternatively, you can use the hosted install script: curl -fsSL https://www.nvidia.com/nemoclaw.sh | bash ``` @@ -146,8 +151,8 @@ openclaw agent --agent main --local -m "Which model and GPU are in use?" --sessi | Issue | Status | Workaround | |-------|--------|------------| -| cgroup v2 kills k3s in Docker | Fixed in recent OpenShell versions | OpenShell sets `cgroupns=host` on the gateway container directly | -| Docker permission denied | Fixed in `setup-spark` | `usermod -aG docker` | +| cgroup v2 kills k3s in Docker | Resolved in current OpenShell releases | Use the standard installer and onboard flow | +| Docker permission denied | Host-specific | Ensure your user can access the Docker daemon | | CoreDNS CrashLoop after setup | Fixed in `fix-coredns.sh` | Uses container gateway IP, not 127.0.0.11 | | Image pull failure (k3s can't find built image) | OpenShell bug | `openshell gateway destroy && openshell gateway start`, re-run setup | | GPU passthrough | Untested on Spark | Should work with `--gpu` flag if NVIDIA Container Toolkit is configured | @@ -155,16 +160,9 @@ openclaw agent --agent main --local -m "Which model and GPU are in use?" --sessi | Port 3000 conflict with AI Workbench | Known | AI Workbench Traefik proxy uses port 3000 (and 10000); use a different port for other services | | Network policy blocks NVIDIA cloud API | By design | Ensure `integrate.api.nvidia.com` is in the sandbox network policy if using cloud inference | -### Manual Setup (if setup-spark doesn't work) - -If `setup-spark.sh` fails, you can apply the fix it performs by hand: - -#### Fix Docker permissions +### Manual Setup -```bash -sudo usermod -aG docker $USER -newgrp docker # or log out and back in -``` +If onboarding reports that Docker is missing or unreachable, fix Docker access on the host and rerun `nemoclaw onboard`. ## Technical Reference @@ -186,7 +184,7 @@ Some NIM containers (e.g., Nemotron-3-Super-120B-A12B) ship native arm64 images ### What's Different on Spark -DGX Spark ships **Ubuntu 24.04 (Noble) + Docker 28.x/29.x** on **aarch64 (Grace CPU + GB10 GPU, 128 GB unified memory)** but no k8s/k3s. OpenShell embeds k3s inside a Docker container, which hits two problems on Spark: +DGX Spark ships **Ubuntu 24.04 (Noble) + Docker 28.x/29.x** on **aarch64 (Grace CPU + GB10 GPU, 128 GB unified memory)** but no k8s/k3s. OpenShell embeds k3s inside a Docker container, so the main Spark-specific concerns today are Docker access and using a current OpenShell release. #### Docker permissions @@ -196,7 +194,7 @@ Error in the hyper legacy client: client error (Connect) ``` **Cause**: Your user isn't in the `docker` group. -**Fix**: `setup-spark` runs `usermod -aG docker $USER`. You may need to log out and back in (or `newgrp docker`) for it to take effect. +**Fix**: Grant your user access to the Docker daemon, then rerun `nemoclaw onboard`. You may need to log out and back in (or `newgrp docker`) for group membership changes to take effect. #### cgroup v2 incompatibility (resolved) @@ -208,7 +206,7 @@ Failed to start ContainerManager: failed to initialize top level QOS containers **Cause**: Spark runs cgroup v2 (Ubuntu 24.04 default). OpenShell's gateway container starts k3s, which tries to create cgroup v1-style paths that don't exist without host cgroup namespace access. -**Fix**: Recent OpenShell versions set `cgroupns=host` on the gateway container directly ([OpenShell PR #329](https://github.com/NVIDIA/OpenShell/pull/329)). No `daemon.json` workaround is needed. If you are on an older OpenShell version, upgrade with: +**Fix**: Recent OpenShell versions set `cgroupns=host` on the gateway container directly ([OpenShell PR #329](https://github.com/NVIDIA/OpenShell/pull/329)). No `default-cgroupns-mode=host` or other `daemon.json` workaround is needed. The standard NemoClaw installer/onboarding flow installs the current OpenShell CLI automatically when it is missing. If you are on an older OpenShell version, upgrade with: ```bash curl -LsSf https://raw.githubusercontent.com/NVIDIA/OpenShell/main/install.sh | sh diff --git a/src/lib/deploy.test.ts b/src/lib/deploy.test.ts new file mode 100644 index 000000000..bcfa78ac6 --- /dev/null +++ b/src/lib/deploy.test.ts @@ -0,0 +1,116 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it } from "vitest"; +import { + buildDeployEnvLines, + findBrevInstanceStatus, + inferDeployProvider, + isBrevInstanceFailed, + isBrevInstanceReady, +} from "../../dist/lib/deploy"; + +describe("inferDeployProvider", () => { + it("prefers an explicit provider override", () => { + const provider = inferDeployProvider("openai", { + NVIDIA_API_KEY: "nvapi-test", + }); + + expect(provider).toBe("openai"); + }); + + it("infers the provider from a single matching credential", () => { + const provider = inferDeployProvider("", { + ANTHROPIC_API_KEY: "sk-ant-test", + }); + + expect(provider).toBe("anthropic"); + }); + + it("returns null when multiple provider credentials are present without an override", () => { + const provider = inferDeployProvider("", { + NVIDIA_API_KEY: "nvapi-test", + OPENAI_API_KEY: "sk-openai-test", + }); + + expect(provider).toBeNull(); + }); +}); + +describe("buildDeployEnvLines", () => { + it("includes standard non-interactive deploy env plus passthrough values", () => { + const envLines = buildDeployEnvLines({ + env: { + CHAT_UI_URL: "https://chat.example.com", + NEMOCLAW_POLICY_MODE: "suggested", + }, + sandboxName: "my-assistant", + provider: "build", + credentials: { + NVIDIA_API_KEY: "nvapi-test", + }, + shellQuote: (value: string) => `'${value}'`, + }); + + expect(envLines).toContain("NEMOCLAW_NON_INTERACTIVE=1"); + expect(envLines).toContain("NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1"); + expect(envLines).toContain("NEMOCLAW_SANDBOX_NAME='my-assistant'"); + expect(envLines).toContain("NEMOCLAW_PROVIDER='build'"); + expect(envLines).toContain("CHAT_UI_URL='https://chat.example.com'"); + expect(envLines).toContain("NEMOCLAW_POLICY_MODE='suggested'"); + expect(envLines).toContain("NVIDIA_API_KEY='nvapi-test'"); + }); +}); + +describe("Brev status helpers", () => { + it("finds the matching instance from brev ls json", () => { + const status = findBrevInstanceStatus( + JSON.stringify([ + { name: "other", status: "RUNNING" }, + { name: "target", status: "FAILURE", build_status: "PENDING", shell_status: "NOT READY" }, + ]), + "target", + ); + + expect(status).toMatchObject({ + name: "target", + status: "FAILURE", + build_status: "PENDING", + shell_status: "NOT READY", + }); + }); + + it("classifies Brev failure states", () => { + expect( + isBrevInstanceFailed({ + status: "FAILURE", + build_status: "PENDING", + shell_status: "NOT READY", + }), + ).toBe(true); + expect( + isBrevInstanceFailed({ + status: "RUNNING", + build_status: "COMPLETED", + shell_status: "READY", + }), + ).toBe(false); + }); + + it("only classifies Brev readiness when running, completed, and ready", () => { + expect( + isBrevInstanceReady({ + status: "RUNNING", + build_status: "COMPLETED", + shell_status: "READY", + }), + ).toBe(true); + expect( + isBrevInstanceReady({ + status: "RUNNING", + build_status: "BUILDING", + shell_status: "NOT READY", + }), + ).toBe(false); + }); +}); diff --git a/src/lib/deploy.ts b/src/lib/deploy.ts new file mode 100644 index 000000000..226fa31aa --- /dev/null +++ b/src/lib/deploy.ts @@ -0,0 +1,397 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; + +export interface DeployCredentials { + NVIDIA_API_KEY?: string | null; + OPENAI_API_KEY?: string | null; + ANTHROPIC_API_KEY?: string | null; + GEMINI_API_KEY?: string | null; + COMPATIBLE_API_KEY?: string | null; + COMPATIBLE_ANTHROPIC_API_KEY?: string | null; + GITHUB_TOKEN?: string | null; + TELEGRAM_BOT_TOKEN?: string | null; + DISCORD_BOT_TOKEN?: string | null; + SLACK_BOT_TOKEN?: string | null; +} + +export interface BrevInstanceStatus { + name?: string; + id?: string; + status?: string; + build_status?: string; + shell_status?: string; + health_status?: string; + instance_type?: string; + instance_kind?: string; + gpu?: string; +} + +export interface DeployExecutionOptions { + instanceName?: string; + env: NodeJS.ProcessEnv; + rootDir: string; + getCredential: (key: string) => string | null; + validateName: (value: string, label: string) => string; + shellQuote: (value: string) => string; + run: (command: string, opts?: { ignoreError?: boolean }) => void; + runInteractive: (command: string) => void; + execFileSync: (file: string, args: string[], opts?: Record) => string; + spawnSync: (file: string, args: string[], opts?: Record) => void; + log: (message?: string) => void; + error: (message?: string) => void; + stdoutWrite: (message: string) => void; + exit: (code: number) => never; +} + +const SSH_HOST_KEY_ARGS = ["-o", "StrictHostKeyChecking=accept-new", "-o", "LogLevel=ERROR"]; + +export function inferDeployProvider( + explicitProvider: string | undefined, + credentials: DeployCredentials, +): string | null { + const explicit = String(explicitProvider || "") + .trim() + .toLowerCase(); + if (explicit) return explicit; + + const providerByCredential: Array<[keyof DeployCredentials, string]> = [ + ["NVIDIA_API_KEY", "build"], + ["OPENAI_API_KEY", "openai"], + ["ANTHROPIC_API_KEY", "anthropic"], + ["GEMINI_API_KEY", "gemini"], + ["COMPATIBLE_API_KEY", "custom"], + ["COMPATIBLE_ANTHROPIC_API_KEY", "anthropicCompatible"], + ]; + const matches = providerByCredential.filter(([key]) => credentials[key]); + if (matches.length === 1) return matches[0][1]; + return null; +} + +export function buildDeployEnvLines(opts: { + env: NodeJS.ProcessEnv; + sandboxName: string; + provider: string; + credentials: DeployCredentials; + shellQuote: (value: string) => string; +}): string[] { + const { env, sandboxName, provider, credentials, shellQuote } = opts; + const envLines = [ + "NEMOCLAW_NON_INTERACTIVE=1", + "NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1", + `NEMOCLAW_SANDBOX_NAME=${shellQuote(sandboxName)}`, + `NEMOCLAW_PROVIDER=${shellQuote(provider)}`, + ]; + + const passthroughVars = [ + "NEMOCLAW_MODEL", + "NEMOCLAW_ENDPOINT_URL", + "NEMOCLAW_POLICY_MODE", + "NEMOCLAW_POLICY_PRESETS", + "CHAT_UI_URL", + ] as const; + for (const key of passthroughVars) { + const value = env[key]; + if (value) envLines.push(`${key}=${shellQuote(value)}`); + } + + for (const [key, value] of Object.entries(credentials)) { + if (value) envLines.push(`${key}=${shellQuote(value)}`); + } + + return envLines; +} + +function outputHasExactLine(output: string | undefined, expected: string): boolean { + return String(output || "") + .split(/\r?\n/) + .some((line) => line.trim() === expected); +} + +export function findBrevInstanceStatus( + rawJson: string, + instanceName: string, +): BrevInstanceStatus | null { + try { + const items = JSON.parse(rawJson); + if (!Array.isArray(items)) return null; + return (items.find((item) => item && item.name === instanceName) as BrevInstanceStatus) || null; + } catch { + return null; + } +} + +export function isBrevInstanceFailed(status: BrevInstanceStatus | null): boolean { + if (!status) return false; + const overall = String(status.status || "").toUpperCase(); + const build = String(status.build_status || "").toUpperCase(); + return overall === "FAILURE" || build === "FAILURE"; +} + +export function isBrevInstanceReady(status: BrevInstanceStatus | null): boolean { + if (!status) return false; + const overall = String(status.status || "").toUpperCase(); + const build = String(status.build_status || "").toUpperCase(); + const shell = String(status.shell_status || "").toUpperCase(); + return overall === "RUNNING" && build === "COMPLETED" && shell === "READY"; +} + +function getBrevInstanceStatus( + instanceName: string, + execFileSync: DeployExecutionOptions["execFileSync"], +): BrevInstanceStatus | null { + try { + const raw = execFileSync("brev", ["ls", "--json"], { encoding: "utf-8" }); + return findBrevInstanceStatus(raw, instanceName); + } catch { + return null; + } +} + +function fail( + lines: string[], + error: DeployExecutionOptions["error"], + exit: DeployExecutionOptions["exit"], +): never { + for (const line of lines) error(line); + return exit(1); +} + +export async function executeDeploy(opts: DeployExecutionOptions): Promise { + const { + instanceName, + env, + rootDir, + getCredential, + validateName, + shellQuote, + run, + runInteractive, + execFileSync, + spawnSync, + log, + error, + stdoutWrite, + exit, + } = opts; + + log(""); + log(" ⚠ `nemoclaw deploy` is deprecated and will be removed in a future release."); + log( + " Prefer provisioning the remote host separately, then run the standard installer and `nemoclaw onboard` on that host.", + ); + log(""); + if (!instanceName) { + return fail( + [ + " Usage: nemoclaw deploy ", + "", + " Examples:", + " nemoclaw deploy my-gpu-box", + " nemoclaw deploy nemoclaw-prod", + " nemoclaw deploy nemoclaw-test", + ], + error, + exit, + ); + } + + const name = validateName(instanceName, "instance name"); + const qname = shellQuote(name); + const gpu = env.NEMOCLAW_GPU || "a2-highgpu-1g:nvidia-tesla-a100:1"; + const brevProvider = String(env.NEMOCLAW_BREV_PROVIDER || "gcp").trim().toLowerCase(); + const skipConnect = ["1", "true"].includes( + String(env.NEMOCLAW_DEPLOY_NO_CONNECT || "").toLowerCase(), + ); + const skipStartServices = ["1", "true"].includes( + String(env.NEMOCLAW_DEPLOY_NO_START_SERVICES || "").toLowerCase(), + ); + const sandboxName = validateName(env.NEMOCLAW_SANDBOX_NAME || "my-assistant", "sandbox name"); + const credentials: DeployCredentials = { + NVIDIA_API_KEY: getCredential("NVIDIA_API_KEY"), + OPENAI_API_KEY: getCredential("OPENAI_API_KEY"), + ANTHROPIC_API_KEY: getCredential("ANTHROPIC_API_KEY"), + GEMINI_API_KEY: getCredential("GEMINI_API_KEY"), + COMPATIBLE_API_KEY: getCredential("COMPATIBLE_API_KEY"), + COMPATIBLE_ANTHROPIC_API_KEY: getCredential("COMPATIBLE_ANTHROPIC_API_KEY"), + GITHUB_TOKEN: getCredential("GITHUB_TOKEN"), + TELEGRAM_BOT_TOKEN: getCredential("TELEGRAM_BOT_TOKEN"), + DISCORD_BOT_TOKEN: getCredential("DISCORD_BOT_TOKEN"), + SLACK_BOT_TOKEN: getCredential("SLACK_BOT_TOKEN"), + }; + const provider = inferDeployProvider(env.NEMOCLAW_PROVIDER, credentials); + if (!provider) { + return fail( + [ + " Could not determine which inference provider to configure for remote onboarding.", + " Set `NEMOCLAW_PROVIDER` explicitly or provide exactly one matching provider credential.", + " Supported provider credentials: NVIDIA_API_KEY, OPENAI_API_KEY, ANTHROPIC_API_KEY, GEMINI_API_KEY, COMPATIBLE_API_KEY, COMPATIBLE_ANTHROPIC_API_KEY.", + ], + error, + exit, + ); + } + + log(""); + log(` Deploying NemoClaw to Brev instance: ${name}`); + log(""); + + try { + execFileSync("which", ["brev"], { stdio: "ignore" }); + } catch { + return fail(["brev CLI not found. Install: https://brev.nvidia.com"], error, exit); + } + + let exists = false; + try { + const out = execFileSync("brev", ["ls"], { encoding: "utf-8" }); + exists = outputHasExactLine(out, name); + } catch (caught) { + const err = caught as { stdout?: string; stderr?: string }; + if (outputHasExactLine(err.stdout, name)) exists = true; + if (outputHasExactLine(err.stderr, name)) exists = true; + } + + if (!exists) { + log(` Creating Brev instance '${name}' (${gpu}, provider=${brevProvider})...`); + run(`brev create ${qname} --type ${shellQuote(gpu)} --provider ${shellQuote(brevProvider)}`); + } else { + log(` Brev instance '${name}' already exists.`); + } + + run("brev refresh", { ignoreError: true }); + + stdoutWrite(" Waiting for Brev instance readiness "); + for (let i = 0; i < 60; i++) { + const brevStatus = getBrevInstanceStatus(name, execFileSync); + if (isBrevInstanceFailed(brevStatus)) { + stdoutWrite("\n"); + error(` Brev instance '${name}' did not become ready.`); + error( + ` Brev status: status=${brevStatus?.status || "unknown"} build=${brevStatus?.build_status || "unknown"} shell=${brevStatus?.shell_status || "unknown"}`, + ); + if (brevStatus?.id) error(` Instance id: ${brevStatus.id}`); + return fail([` Try: brev reset ${name}`], error, exit); + } + if (isBrevInstanceReady(brevStatus)) { + stdoutWrite(" ✓\n"); + break; + } + + if (i === 59) { + stdoutWrite("\n"); + const finalBrevStatus = getBrevInstanceStatus(name, execFileSync); + if (finalBrevStatus) { + error( + ` Brev status at timeout: status=${finalBrevStatus.status || "unknown"} build=${finalBrevStatus.build_status || "unknown"} shell=${finalBrevStatus.shell_status || "unknown"}`, + ); + if (finalBrevStatus.id) error(` Instance id: ${finalBrevStatus.id}`); + } + return fail([` Timed out waiting for Brev instance readiness for ${name}`], error, exit); + } + stdoutWrite("."); + spawnSync("sleep", ["3"]); + } + + stdoutWrite(" Waiting for SSH "); + for (let i = 0; i < 30; i++) { + try { + execFileSync( + "ssh", + ["-o", "ConnectTimeout=5", ...SSH_HOST_KEY_ARGS, name, "echo", "ok"], + { encoding: "utf-8", stdio: "ignore" }, + ); + stdoutWrite(" ✓\n"); + break; + } catch { + if (i === 29) { + stdoutWrite("\n"); + return fail([` Timed out waiting for SSH to ${name}`], error, exit); + } + stdoutWrite("."); + spawnSync("sleep", ["3"]); + } + } + + const remoteHome = execFileSync( + "ssh", + [...SSH_HOST_KEY_ARGS, name, "echo", "$HOME"], + { encoding: "utf-8" }, + ).trim(); + const remoteDir = `${remoteHome}/nemoclaw`; + + log(" Syncing NemoClaw to VM..."); + run( + `ssh -o StrictHostKeyChecking=accept-new -o LogLevel=ERROR ${qname} 'mkdir -p ${shellQuote(remoteDir)}'`, + ); + run( + `rsync -az --delete --exclude node_modules --exclude .git --exclude dist --exclude .venv -e "ssh -o StrictHostKeyChecking=accept-new -o LogLevel=ERROR" "${rootDir}/" ${qname}:${shellQuote(`${remoteDir}/`)}`, + ); + + const envLines = buildDeployEnvLines({ + env, + sandboxName, + provider, + credentials, + shellQuote, + }); + const envDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-env-")); + const envTmp = path.join(envDir, "env"); + fs.writeFileSync(envTmp, envLines.join("\n") + "\n", { mode: 0o600 }); + try { + run( + `scp -q -o StrictHostKeyChecking=accept-new -o LogLevel=ERROR ${shellQuote(envTmp)} ${qname}:${shellQuote(`${remoteDir}/.env`)}`, + ); + run( + `ssh -q -o StrictHostKeyChecking=accept-new -o LogLevel=ERROR ${qname} 'chmod 600 ${shellQuote(`${remoteDir}/.env`)}'`, + ); + } finally { + try { + fs.unlinkSync(envTmp); + } catch { + /* ignored */ + } + try { + fs.rmdirSync(envDir); + } catch { + /* ignored */ + } + } + + log(" Running setup..."); + runInteractive( + `ssh -t -o StrictHostKeyChecking=accept-new -o LogLevel=ERROR ${qname} 'cd ${shellQuote(remoteDir)} && set -a && . .env && set +a && bash scripts/install.sh --non-interactive --yes-i-accept-third-party-software'`, + ); + + if ( + !skipStartServices && + (credentials.TELEGRAM_BOT_TOKEN || credentials.DISCORD_BOT_TOKEN || credentials.SLACK_BOT_TOKEN) + ) { + log(" Starting services..."); + run( + `ssh -o StrictHostKeyChecking=accept-new -o LogLevel=ERROR ${qname} 'cd ${shellQuote(remoteDir)} && set -a && . .env && set +a && bash scripts/start-services.sh'`, + ); + } + + if (skipStartServices) { + log(" Skipping service startup (NEMOCLAW_DEPLOY_NO_START_SERVICES=1)."); + } + + if (skipConnect) { + log(""); + log(" Skipping interactive sandbox connect (NEMOCLAW_DEPLOY_NO_CONNECT=1)."); + log(` Remote sandbox: ${sandboxName}`); + log(` Connect later with: ssh ${name} 'openshell sandbox connect ${sandboxName}'`); + return; + } + + log(""); + log(" Connecting to sandbox..."); + log(""); + runInteractive( + `ssh -t -o StrictHostKeyChecking=accept-new -o LogLevel=ERROR ${qname} 'cd ${shellQuote(remoteDir)} && set -a && . .env && set +a && openshell sandbox connect ${shellQuote(sandboxName)}'`, + ); +} diff --git a/src/lib/preflight.test.ts b/src/lib/preflight.test.ts index a14102c5e..fd87b5aaa 100644 --- a/src/lib/preflight.test.ts +++ b/src/lib/preflight.test.ts @@ -6,9 +6,11 @@ import { describe, expect, it } from "vitest"; // coverage is attributed to dist/lib/preflight.js, which is what the // ratchet measures. import { + assessHost, checkPortAvailable, getMemoryInfo, ensureSwap, + planHostRemediation, } from "../../dist/lib/preflight"; describe("checkPortAvailable", () => { @@ -262,6 +264,199 @@ describe("getMemoryInfo", () => { }); }); +describe("assessHost", () => { + it("detects podman as an unsupported runtime on macOS", () => { + const result = assessHost({ + platform: "darwin", + env: {}, + dockerInfoOutput: "Podman Engine", + commandExistsImpl: (name: string) => name === "docker", + }); + + expect(result.runtime).toBe("podman"); + expect(result.isUnsupportedRuntime).toBe(true); + expect(result.dockerReachable).toBe(true); + }); + + it("detects podman as an unsupported runtime on Linux", () => { + const result = assessHost({ + platform: "linux", + env: {}, + dockerInfoOutput: "Podman Engine", + commandExistsImpl: (name: string) => name === "docker", + }); + + expect(result.runtime).toBe("podman"); + expect(result.isUnsupportedRuntime).toBe(true); + expect(result.dockerReachable).toBe(true); + }); + + it("detects linux docker on cgroup v2 without requiring host cgroupns fix", () => { + const result = assessHost({ + platform: "linux", + env: {}, + dockerInfoOutput: JSON.stringify({ + ServerVersion: "29.3.1", + OperatingSystem: "Ubuntu 24.04", + CgroupVersion: "2", + }), + readFileImpl: () => '{"default-cgroupns-mode":"private"}', + commandExistsImpl: (name: string) => name === "docker" || name === "apt-get" || name === "systemctl", + runCaptureImpl: (command: string) => { + if (command === "command -v apt-get") return "/usr/bin/apt-get"; + if (command === "command -v systemctl") return "/usr/bin/systemctl"; + if (command === "systemctl is-active docker") return "active"; + if (command === "systemctl is-enabled docker") return "enabled"; + return ""; + }, + }); + + expect(result.runtime).toBe("docker"); + expect(result.packageManager).toBe("apt"); + expect(result.systemctlAvailable).toBe(true); + expect(result.dockerServiceActive).toBe(true); + expect(result.dockerServiceEnabled).toBe(true); + expect(result.dockerCgroupVersion).toBe("v2"); + expect(result.dockerDefaultCgroupnsMode).toBe("private"); + expect(result.requiresHostCgroupnsFix).toBe(false); + }); + + it("marks WSL in notes when the environment indicates it", () => { + const result = assessHost({ + platform: "linux", + env: { WSL_DISTRO_NAME: "Ubuntu" }, + dockerInfoOutput: "", + commandExistsImpl: () => false, + }); + + expect(result.isWsl).toBe(true); + expect(result.notes).toContain("Running under WSL"); + }); + + it("detects likely headless environments", () => { + const result = assessHost({ + platform: "linux", + env: {}, + dockerInfoOutput: "", + commandExistsImpl: () => false, + }); + + expect(result.isHeadlessLikely).toBe(true); + expect(result.notes).toContain("Headless environment likely"); + }); +}); + +describe("planHostRemediation", () => { + it("recommends starting docker when installed but unreachable", () => { + const actions = planHostRemediation({ + platform: "linux", + isWsl: false, + runtime: "unknown", + packageManager: "apt", + systemctlAvailable: true, + dockerServiceActive: false, + dockerServiceEnabled: true, + dockerInstalled: true, + dockerRunning: false, + dockerReachable: false, + nodeInstalled: true, + openshellInstalled: true, + dockerCgroupVersion: "unknown", + dockerDefaultCgroupnsMode: "unknown", + requiresHostCgroupnsFix: false, + isUnsupportedRuntime: false, + isHeadlessLikely: false, + hasNvidiaGpu: false, + notes: [], + }); + + expect(actions[0].id).toBe("start_docker"); + expect(actions[0].blocking).toBe(true); + expect(actions[0].commands).toContain("sudo systemctl start docker"); + }); + + it("warns that podman is unsupported on macOS without blocking onboarding", () => { + const actions = planHostRemediation({ + platform: "darwin", + isWsl: false, + runtime: "podman", + packageManager: "brew", + systemctlAvailable: false, + dockerServiceActive: null, + dockerServiceEnabled: null, + dockerInstalled: true, + dockerRunning: true, + dockerReachable: true, + nodeInstalled: true, + openshellInstalled: true, + dockerCgroupVersion: "unknown", + dockerDefaultCgroupnsMode: "unknown", + requiresHostCgroupnsFix: false, + isUnsupportedRuntime: true, + isHeadlessLikely: false, + hasNvidiaGpu: false, + notes: [], + }); + + const action = actions.find((entry: { id: string }) => entry.id === "unsupported_runtime_warning"); + expect(action).toBeTruthy(); + expect(action?.blocking).toBe(false); + }); + + it("recommends installing Docker with a generic Linux hint when it is missing", () => { + const actions = planHostRemediation({ + platform: "linux", + isWsl: false, + runtime: "unknown", + packageManager: "apt", + systemctlAvailable: true, + dockerServiceActive: null, + dockerServiceEnabled: null, + dockerInstalled: false, + dockerRunning: false, + dockerReachable: false, + nodeInstalled: true, + openshellInstalled: true, + dockerCgroupVersion: "unknown", + dockerDefaultCgroupnsMode: "unknown", + requiresHostCgroupnsFix: false, + isUnsupportedRuntime: false, + isHeadlessLikely: false, + hasNvidiaGpu: false, + notes: [], + }); + + expect(actions[0].id).toBe("install_docker"); + expect(actions[0].commands[0]).toContain("Install Docker Engine"); + }); + + it("recommends installing openshell when missing", () => { + const actions = planHostRemediation({ + platform: "linux", + isWsl: false, + runtime: "docker", + packageManager: "apt", + systemctlAvailable: true, + dockerServiceActive: true, + dockerServiceEnabled: true, + dockerInstalled: true, + dockerRunning: true, + dockerReachable: true, + nodeInstalled: true, + openshellInstalled: false, + dockerCgroupVersion: "v2", + dockerDefaultCgroupnsMode: "unknown", + requiresHostCgroupnsFix: false, + isUnsupportedRuntime: false, + isHeadlessLikely: false, + hasNvidiaGpu: false, + notes: [], + }); + + expect(actions.some((action: { id: string }) => action.id === "install_openshell")).toBe(true); + }); +}); + describe("ensureSwap", () => { it("returns ok when total memory already exceeds threshold", () => { const result = ensureSwap(6144, { diff --git a/src/lib/preflight.ts b/src/lib/preflight.ts index 8f874e3b6..657cac38d 100644 --- a/src/lib/preflight.ts +++ b/src/lib/preflight.ts @@ -72,6 +72,352 @@ export interface EnsureSwapOpts { getMemoryInfoImpl?: (opts: GetMemoryInfoOpts) => MemoryInfo | null; } +export type ContainerRuntime = + | "docker" + | "docker-desktop" + | "colima" + | "podman" + | "unknown"; + +export type PackageManager = "apt" | "dnf" | "yum" | "brew" | "pacman" | "unknown"; + +export type RemediationKind = "info" | "manual" | "auto" | "sudo"; + +export interface HostAssessment { + platform: NodeJS.Platform | string; + isWsl: boolean; + runtime: ContainerRuntime; + packageManager?: PackageManager; + systemctlAvailable?: boolean; + dockerServiceActive?: boolean | null; + dockerServiceEnabled?: boolean | null; + dockerInstalled: boolean; + dockerRunning: boolean; + dockerReachable: boolean; + nodeInstalled: boolean; + openshellInstalled: boolean; + dockerInfoSummary?: string; + dockerCgroupVersion?: "v1" | "v2" | "unknown"; + dockerDefaultCgroupnsMode?: "host" | "private" | "unknown"; + requiresHostCgroupnsFix: boolean; + isUnsupportedRuntime: boolean; + isHeadlessLikely: boolean; + hasNvidiaGpu: boolean; + notes: string[]; +} + +export interface RemediationAction { + id: string; + title: string; + kind: RemediationKind; + reason: string; + commands: string[]; + blocking: boolean; +} + +export interface AssessHostOpts { + platform?: NodeJS.Platform; + env?: NodeJS.ProcessEnv; + release?: string; + procVersion?: string; + dockerInfoOutput?: string; + dockerInfoError?: string; + readFileImpl?: (filePath: string, encoding: BufferEncoding) => string; + runCaptureImpl?: (command: string, options?: { ignoreError?: boolean }) => string; + commandExistsImpl?: (commandName: string) => boolean; + gpuProbeImpl?: () => boolean; +} + +function commandExists( + commandName: string, + runCaptureImpl: (command: string, options?: { ignoreError?: boolean }) => string, +): boolean { + try { + const output = runCaptureImpl(`command -v ${commandName}`, { ignoreError: true }); + return Boolean(String(output || "").trim()); + } catch { + return false; + } +} + +function detectWsl(opts: { + platform: NodeJS.Platform | string; + env: NodeJS.ProcessEnv; + release: string; + procVersion: string; +}): boolean { + if (opts.platform !== "linux") return false; + + return ( + Boolean(opts.env.WSL_DISTRO_NAME) || + Boolean(opts.env.WSL_INTEROP) || + /microsoft/i.test(opts.release) || + /microsoft/i.test(opts.procVersion) + ); +} + +function inferContainerRuntime(info = ""): ContainerRuntime { + const normalized = String(info || "").toLowerCase(); + if (!normalized.trim()) return "unknown"; + if (normalized.includes("podman")) return "podman"; + if (normalized.includes("colima")) return "colima"; + if (normalized.includes("docker desktop")) return "docker-desktop"; + if (normalized.includes("docker")) return "docker"; + return "unknown"; +} + +function parseDockerCgroupVersion(info = ""): "v1" | "v2" | "unknown" { + if (/"CgroupVersion"\s*:\s*"2"/.test(info) || /CgroupVersion["=: ]+2/i.test(info)) { + return "v2"; + } + if (/"CgroupVersion"\s*:\s*"1"/.test(info) || /CgroupVersion["=: ]+1/i.test(info)) { + return "v1"; + } + return "unknown"; +} + +function parseDockerInfoSummary(info = ""): string | undefined { + const versionMatch = info.match(/"ServerVersion"\s*:\s*"([^"]+)"/); + const osMatch = info.match(/"OperatingSystem"\s*:\s*"([^"]+)"/); + const parts = [versionMatch?.[1], osMatch?.[1]].filter(Boolean); + return parts.length > 0 ? parts.join(" · ") : undefined; +} + +function readDockerDefaultCgroupnsMode( + readFileImpl: (filePath: string, encoding: BufferEncoding) => string, +): "host" | "private" | "unknown" { + try { + const raw = readFileImpl("/etc/docker/daemon.json", "utf-8"); + const parsed = JSON.parse(raw) as { ["default-cgroupns-mode"]?: unknown }; + const mode = parsed["default-cgroupns-mode"]; + return mode === "host" || mode === "private" ? mode : "unknown"; + } catch { + return "unknown"; + } +} + +function isHeadlessLikely(env: NodeJS.ProcessEnv): boolean { + return !env.DISPLAY && !env.WAYLAND_DISPLAY && !env.TERM_PROGRAM; +} + +function detectNvidiaGpu( + runCaptureImpl: (command: string, options?: { ignoreError?: boolean }) => string, +): boolean { + if (!commandExists("nvidia-smi", runCaptureImpl)) { + return false; + } + return Boolean(String(runCaptureImpl("nvidia-smi -L", { ignoreError: true }) || "").trim()); +} + +function detectPackageManager( + runCaptureImpl: (command: string, options?: { ignoreError?: boolean }) => string, +): PackageManager { + if (commandExists("apt-get", runCaptureImpl)) return "apt"; + if (commandExists("dnf", runCaptureImpl)) return "dnf"; + if (commandExists("yum", runCaptureImpl)) return "yum"; + if (commandExists("brew", runCaptureImpl)) return "brew"; + if (commandExists("pacman", runCaptureImpl)) return "pacman"; + return "unknown"; +} + +function parseSystemctlState(value = ""): boolean | null { + const normalized = String(value || "").trim().toLowerCase(); + if (!normalized) return null; + if (normalized === "active" || normalized === "enabled") return true; + if ( + normalized === "inactive" || + normalized === "failed" || + normalized === "disabled" || + normalized === "masked" + ) { + return false; + } + return null; +} + +export function assessHost(opts: AssessHostOpts = {}): HostAssessment { + const platform = opts.platform ?? process.platform; + const env = opts.env ?? process.env; + const runCaptureImpl = + opts.runCaptureImpl ?? + ((command: string, options?: { ignoreError?: boolean }) => + runCapture(command, { ignoreError: options?.ignoreError ?? false })); + const readFileImpl = opts.readFileImpl ?? fs.readFileSync; + const dockerInstalled = + opts.commandExistsImpl?.("docker") ?? commandExists("docker", runCaptureImpl); + const nodeInstalled = opts.commandExistsImpl?.("node") ?? commandExists("node", runCaptureImpl); + const openshellInstalled = + opts.commandExistsImpl?.("openshell") ?? commandExists("openshell", runCaptureImpl); + const hasNvidiaGpu = opts.gpuProbeImpl?.() ?? detectNvidiaGpu(runCaptureImpl); + const packageManager = detectPackageManager(runCaptureImpl); + const systemctlAvailable = commandExists("systemctl", runCaptureImpl); + + let dockerInfoOutput = opts.dockerInfoOutput; + let dockerReachable = false; + let dockerRunning = false; + if (dockerInstalled && dockerInfoOutput === undefined) { + dockerInfoOutput = runCaptureImpl("docker info --format '{{json .}}' 2>/dev/null", { + ignoreError: true, + }); + } + if (dockerInstalled && String(dockerInfoOutput || "").trim()) { + dockerReachable = true; + dockerRunning = true; + } + + const release = opts.release ?? os.release(); + const procVersion = + opts.procVersion ?? + (() => { + try { + return readFileImpl("/proc/version", "utf-8"); + } catch { + return ""; + } + })(); + let runtime = inferContainerRuntime(dockerInfoOutput); + if (dockerReachable && runtime === "unknown" && platform === "linux") { + runtime = "docker"; + } + const dockerCgroupVersion = dockerReachable + ? parseDockerCgroupVersion(dockerInfoOutput) + : "unknown"; + const dockerDefaultCgroupnsMode = readDockerDefaultCgroupnsMode(readFileImpl); + const dockerServiceActive = + platform === "linux" && systemctlAvailable && dockerInstalled + ? parseSystemctlState(runCaptureImpl("systemctl is-active docker", { ignoreError: true })) + : null; + const dockerServiceEnabled = + platform === "linux" && systemctlAvailable && dockerInstalled + ? parseSystemctlState(runCaptureImpl("systemctl is-enabled docker", { ignoreError: true })) + : null; + const assessment: HostAssessment = { + platform, + isWsl: detectWsl({ platform, env, release, procVersion }), + runtime, + packageManager, + systemctlAvailable, + dockerServiceActive, + dockerServiceEnabled, + dockerInstalled, + dockerRunning, + dockerReachable, + nodeInstalled, + openshellInstalled, + dockerInfoSummary: parseDockerInfoSummary(dockerInfoOutput), + dockerCgroupVersion, + dockerDefaultCgroupnsMode, + // Current OpenShell sets host cgroupns on its own cluster container. + requiresHostCgroupnsFix: false, + isUnsupportedRuntime: runtime === "podman", + isHeadlessLikely: isHeadlessLikely(env), + hasNvidiaGpu, + notes: [], + }; + + if (assessment.isWsl) { + assessment.notes.push("Running under WSL"); + } + if (assessment.isHeadlessLikely) { + assessment.notes.push("Headless environment likely"); + } + if (assessment.dockerInfoSummary) { + assessment.notes.push(`Docker: ${assessment.dockerInfoSummary}`); + } + + return assessment; +} + +export function planHostRemediation(assessment: HostAssessment): RemediationAction[] { + const actions: RemediationAction[] = []; + + if (!assessment.dockerInstalled) { + const installCommands: Record = { + apt: "Install Docker Engine, then rerun `nemoclaw onboard`.", + dnf: "Install Docker Engine with your package manager, then rerun `nemoclaw onboard`.", + yum: "Install Docker Engine with your package manager, then rerun `nemoclaw onboard`.", + brew: "Install Docker Desktop or Colima, then rerun `nemoclaw onboard`.", + pacman: "Install Docker Engine with your package manager, then rerun `nemoclaw onboard`.", + unknown: "Install Docker, then rerun `nemoclaw onboard`.", + }; + actions.push({ + id: "install_docker", + title: "Install Docker", + kind: "manual", + reason: "Docker is required before onboarding can create a gateway or sandbox.", + commands: + assessment.platform === "darwin" + ? ["Install Docker Desktop or Colima, then rerun `nemoclaw onboard`."] + : [installCommands[assessment.packageManager ?? "unknown"]], + blocking: true, + }); + } else if (!assessment.dockerReachable) { + actions.push({ + id: "start_docker", + title: "Start Docker", + kind: "manual", + reason: "Docker is installed but NemoClaw could not talk to the Docker daemon.", + commands: + assessment.platform === "darwin" + ? ["Start Docker Desktop or Colima, then rerun `nemoclaw onboard`."] + : assessment.systemctlAvailable + ? ["sudo systemctl start docker", "nemoclaw onboard"] + : ["Start the Docker daemon, then rerun `nemoclaw onboard`."], + blocking: true, + }); + } + + if (assessment.isUnsupportedRuntime) { + actions.push({ + id: "unsupported_runtime_warning", + title: "Use a supported Docker runtime if problems appear", + kind: "manual", + reason: + "OpenShell officially documents Docker-based runtimes. Podman may work in some environments, but it is not a supported runtime and behavior may vary.", + commands: + assessment.platform === "darwin" + ? ["If onboarding or sandbox lifecycle fails, switch to Docker Desktop or Colima."] + : ["If onboarding or sandbox lifecycle fails, switch to a Docker-supported runtime."], + blocking: false, + }); + } + + if (!assessment.nodeInstalled) { + actions.push({ + id: "install_nodejs", + title: "Install Node.js", + kind: "manual", + reason: "NemoClaw requires Node.js for its CLI and plugin build steps.", + commands: ["Run the NemoClaw installer to install Node.js automatically."], + blocking: false, + }); + } + + if (!assessment.openshellInstalled) { + actions.push({ + id: "install_openshell", + title: "Install OpenShell", + kind: "manual", + reason: "OpenShell is required before onboarding can create or manage a gateway.", + commands: ["Run the NemoClaw installer or `scripts/install-openshell.sh`."], + blocking: false, + }); + } + + if (assessment.isHeadlessLikely && !assessment.hasNvidiaGpu) { + actions.push({ + id: "headless_remote_hint", + title: "Review remote/headless UI settings", + kind: "info", + reason: "Headless Linux hosts often need explicit remote UI handling if you want browser access.", + commands: ["Set `CHAT_UI_URL` when remote browser access matters."], + blocking: false, + }); + } + + return actions; +} + // ── Port availability ──────────────────────────────────────────── export async function probePortAvailability( diff --git a/src/lib/services.ts b/src/lib/services.ts index 9582a5921..ba5b1134e 100644 --- a/src/lib/services.ts +++ b/src/lib/services.ts @@ -313,7 +313,7 @@ export async function startAll(opts: ServiceOptions = {}): Promise { `http://localhost:${String(dashboardPort)}`, ]); } catch { - warn("cloudflared not found — no public URL. Install: brev-setup.sh or manually."); + warn("cloudflared not found — no public URL. Install cloudflared manually if you need one."); } // Wait for cloudflared URL diff --git a/test/cli.test.js b/test/cli.test.js index b71cde1c4..736fb58f1 100644 --- a/test/cli.test.js +++ b/test/cli.test.js @@ -39,6 +39,7 @@ describe("CLI dispatch", () => { expect(r.out.includes("Getting Started")).toBeTruthy(); expect(r.out.includes("Sandbox Management")).toBeTruthy(); expect(r.out.includes("Policy Presets")).toBeTruthy(); + expect(r.out.includes("Compatibility Commands")).toBeTruthy(); }); it("--help exits 0", () => { @@ -147,6 +148,14 @@ describe("CLI dispatch", () => { expect(r.out.includes("No resumable onboarding session was found")).toBeTruthy(); }); + it("setup-spark is a deprecated compatibility alias for onboard", () => { + const r = run("setup-spark --resume --non-interactive --yes-i-accept-third-party-software"); + expect(r.code).toBe(1); + expect(r.out.includes("setup-spark` is deprecated")).toBeTruthy(); + expect(r.out.includes("Use `nemoclaw onboard` instead")).toBeTruthy(); + expect(r.out.includes("No resumable onboarding session was found")).toBeTruthy(); + }); + it("debug --help exits 0 and shows usage", () => { const r = run("debug --help"); expect(r.code).toBe(0); diff --git a/test/e2e/brev-e2e.test.js b/test/e2e/brev-e2e.test.js index ce67e8943..a7e4bd17a 100644 --- a/test/e2e/brev-e2e.test.js +++ b/test/e2e/brev-e2e.test.js @@ -4,21 +4,22 @@ /** * Ephemeral Brev E2E test suite. * - * Creates a fresh Brev instance (via launchable or bare CPU), bootstraps it, + * Creates a fresh Brev instance via the launchable bootstrap path, bootstraps it, * runs E2E tests remotely, then tears it down. * * Intended to be run from CI via: * npx vitest run --project e2e-brev * * Required env vars: - * BREV_API_TOKEN — Brev refresh token for headless auth * NVIDIA_API_KEY — passed to VM for inference config during onboarding * GITHUB_TOKEN — passed to VM for OpenShell binary download * INSTANCE_NAME — Brev instance name (e.g. pr-156-test) * + * Prerequisite: + * The local `brev` CLI must already be authenticated before this suite runs. + * * Optional env vars: - * TEST_SUITE — which test to run: full (default), credential-sanitization, telegram-injection, all - * USE_LAUNCHABLE — "1" (default) to use CI launchable, "0" for bare brev create + brev-setup.sh + * TEST_SUITE — which test to run: full (default), deploy-cli, credential-sanitization, telegram-injection, all * LAUNCHABLE_SETUP_SCRIPT — URL to setup script for launchable path (default: brev-launchable-ci-cpu.sh on main) * BREV_MIN_VCPU — Minimum vCPUs for CPU instance (default: 4) * BREV_MIN_RAM — Minimum RAM in GB for CPU instance (default: 16) @@ -28,8 +29,6 @@ import { describe, it, expect, beforeAll, afterAll } from "vitest"; import { execSync, execFileSync } from "node:child_process"; -import { mkdirSync, writeFileSync } from "node:fs"; -import { homedir } from "node:os"; import path from "node:path"; // Instance configuration @@ -40,6 +39,7 @@ const BREV_MIN_DISK = parseInt(process.env.BREV_MIN_DISK || "50", 10); const INSTANCE_NAME = process.env.INSTANCE_NAME; const TEST_SUITE = process.env.TEST_SUITE || "full"; const REPO_DIR = path.resolve(import.meta.dirname, "../.."); +const CLI_PATH = path.join(REPO_DIR, "bin", "nemoclaw.js"); // Launchable configuration // CI-Ready CPU setup script: pre-bakes Docker, Node.js, OpenShell CLI, npm deps, Docker images. @@ -49,8 +49,6 @@ const REPO_DIR = path.resolve(import.meta.dirname, "../.."); const DEFAULT_SETUP_SCRIPT_PATH = process.env.LAUNCHABLE_SETUP_SCRIPT || path.join(REPO_DIR, "scripts", "brev-launchable-ci-cpu.sh"); -const USE_LAUNCHABLE = !["0", "false"].includes(process.env.USE_LAUNCHABLE?.toLowerCase()); - // Sentinel file written by brev-launchable-ci-cpu.sh when setup is complete. // More reliable than grepping log files. const LAUNCHABLE_SENTINEL = "/var/run/nemoclaw-launchable-ready"; @@ -68,6 +66,53 @@ function brev(...args) { }).trim(); } +function sleep(seconds) { + execSync(`sleep ${seconds}`); +} + +function listBrevInstances() { + try { + return JSON.parse(brev("ls", "--json")); + } catch { + return []; + } +} + +function hasBrevInstance(instanceName) { + return listBrevInstances().some((instance) => instance.name === instanceName); +} + +function deleteBrevInstance(instanceName, { attempts = 5, intervalSeconds = 5 } = {}) { + for (let attempt = 1; attempt <= attempts; attempt += 1) { + if (!hasBrevInstance(instanceName)) { + return true; + } + + try { + brev("delete", instanceName); + } catch { + // Best-effort delete. We'll verify via ls below and retry if needed. + } + sleep(2); + + try { + brev("refresh"); + } catch { + // Ignore transient refresh failures and rely on the next existence check. + } + + if (!hasBrevInstance(instanceName)) { + return true; + } + + if (attempt < attempts) { + sleep(intervalSeconds); + } + } + + return !hasBrevInstance(instanceName); +} + function ssh(cmd, { timeout = 120_000, stream = false } = {}) { const escaped = cmd.replace(/'/g, "'\\''"); /** @type {import("child_process").StdioOptions} */ @@ -193,36 +238,67 @@ function runRemoteTest(scriptPath) { return ssh("cat /tmp/test-output.log", { timeout: 30_000 }); } +function runLocalDeploy(instanceName) { + const env = { + ...process.env, + NEMOCLAW_NON_INTERACTIVE: "1", + NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1", + NEMOCLAW_SANDBOX_NAME: "e2e-test", + NEMOCLAW_PROVIDER: process.env.NEMOCLAW_PROVIDER || "build", + NEMOCLAW_DEPLOY_NO_CONNECT: "1", + NEMOCLAW_DEPLOY_NO_START_SERVICES: "1", + }; + + execFileSync("node", [CLI_PATH, "deploy", instanceName], { + timeout: 2_700_000, + env, + stdio: "inherit", + }); +} + // --- suite ------------------------------------------------------------------ -const REQUIRED_VARS = ["BREV_API_TOKEN", "NVIDIA_API_KEY", "GITHUB_TOKEN", "INSTANCE_NAME"]; +const REQUIRED_VARS = ["NVIDIA_API_KEY", "GITHUB_TOKEN", "INSTANCE_NAME"]; const hasRequiredVars = REQUIRED_VARS.every((key) => process.env[key]); +const hasAuthenticatedBrev = (() => { + try { + brev("ls"); + return true; + } catch { + return false; + } +})(); -describe.runIf(hasRequiredVars)("Brev E2E", () => { +describe.runIf(hasRequiredVars && hasAuthenticatedBrev)("Brev E2E", () => { beforeAll(() => { const bootstrapStart = Date.now(); const elapsed = () => `${Math.round((Date.now() - bootstrapStart) / 1000)}s`; - // Authenticate with Brev - mkdirSync(path.join(homedir(), ".brev"), { recursive: true }); - writeFileSync( - path.join(homedir(), ".brev", "onboarding_step.json"), - '{"step":1,"hasRunBrevShell":true,"hasRunBrevOpen":true}', - ); - brev("login", "--token", process.env.BREV_API_TOKEN); - // Pre-cleanup: delete any leftover instance with the same name. // This can happen when a previous run's create succeeded on the backend // but the CLI got a network error (unexpected EOF) before confirming, // then the retry/fallback fails with "duplicate workspace". - try { - brev("delete", INSTANCE_NAME); + if (hasBrevInstance(INSTANCE_NAME)) { + if (!deleteBrevInstance(INSTANCE_NAME)) { + throw new Error(`Failed to delete leftover instance "${INSTANCE_NAME}"`); + } console.log(`[${elapsed()}] Deleted leftover instance "${INSTANCE_NAME}"`); - } catch { - // Expected — no leftover instance exists } - if (USE_LAUNCHABLE) { + if (TEST_SUITE === "deploy-cli") { + console.log(`[${elapsed()}] Running nemoclaw deploy end to end...`); + instanceCreated = true; + runLocalDeploy(INSTANCE_NAME); + try { + brev("refresh"); + } catch { + /* ignore */ + } + waitForSsh(); + console.log(`[${elapsed()}] SSH is up after deploy`); + const remoteHome = ssh("echo $HOME"); + remoteDir = `${remoteHome}/nemoclaw`; + } else { // ── Launchable path: pre-baked CI environment ────────────────── // Uses brev search cpu | brev create with --startup-script. // The script pre-installs Docker, Node.js, OpenShell CLI, npm deps, @@ -499,7 +575,7 @@ describe.runIf(hasRequiredVars)("Brev E2E", () => { nimContainer: null, provider: null, gpuEnabled: false, - policies: [], + policies: ["pypi", "npm"], }, }, }, @@ -511,81 +587,6 @@ describe.runIf(hasRequiredVars)("Brev E2E", () => { { timeout: 15_000 }, ); console.log(`[${elapsed()}] Registry written, onboard workaround complete`); - } else { - // ── Bare instance path: brev create + brev-setup.sh ──────────── - // Full bootstrap from scratch. Slower but doesn't require a launchable. - console.log(`[${elapsed()}] Creating bare CPU instance via brev search cpu | brev create...`); - console.log( - `[${elapsed()}] min-vcpu: ${BREV_MIN_VCPU}, min-ram: ${BREV_MIN_RAM}GB, disk: ${BREV_MIN_DISK}GB, provider: ${BREV_PROVIDER}`, - ); - try { - execSync( - `brev search cpu --min-vcpu ${BREV_MIN_VCPU} --min-ram ${BREV_MIN_RAM} --min-disk ${BREV_MIN_DISK} --provider ${BREV_PROVIDER} --sort price | ` + - `brev create ${INSTANCE_NAME} --detached`, - { encoding: "utf-8", timeout: 180_000, stdio: ["pipe", "inherit", "inherit"] }, - ); - } catch (createErr) { - console.log( - `[${elapsed()}] brev create exited with error — checking if instance was created anyway...`, - ); - try { - brev("refresh"); - } catch { - /* ignore */ - } - const lsOutput = execSync(`brev ls 2>&1 || true`, { encoding: "utf-8", timeout: 30_000 }); - if (!lsOutput.includes(INSTANCE_NAME)) { - throw new Error( - `brev create failed and instance "${INSTANCE_NAME}" not found in brev ls. ` + - `Original error: ${createErr.message}`, - { cause: createErr }, - ); - } - console.log( - `[${elapsed()}] Instance "${INSTANCE_NAME}" found in brev ls despite create error — proceeding`, - ); - } - instanceCreated = true; - console.log(`[${elapsed()}] brev create returned (instance provisioning in background)`); - - // Wait for SSH - try { - brev("refresh"); - } catch { - /* ignore */ - } - waitForSsh(); - console.log(`[${elapsed()}] SSH is up`); - - // Sync code - const remoteHome = ssh("echo $HOME"); - remoteDir = `${remoteHome}/nemoclaw`; - ssh(`mkdir -p ${remoteDir}`); - execSync( - `rsync -az --delete --exclude node_modules --exclude .git --exclude dist --exclude .venv "${REPO_DIR}/" "${INSTANCE_NAME}:${remoteDir}/"`, - { encoding: "utf-8", timeout: 120_000 }, - ); - console.log(`[${elapsed()}] Code synced`); - - // Bootstrap VM — stream output to CI log so we can see progress - console.log(`[${elapsed()}] Running brev-setup.sh (bootstrap)...`); - sshEnv(`cd ${remoteDir} && SKIP_VLLM=1 bash scripts/brev-setup.sh`, { - timeout: 2_400_000, - stream: true, - }); - console.log(`[${elapsed()}] Bootstrap complete`); - - // Verify the CLI installed by brev-setup.sh is visible - console.log(`[${elapsed()}] Verifying nemoclaw CLI...`); - ssh( - [ - `export npm_config_prefix=$HOME/.local`, - `export PATH=$HOME/.local/bin:$PATH`, - `which nemoclaw && nemoclaw --version`, - ].join(" && "), - { timeout: 120_000 }, - ); - console.log(`[${elapsed()}] nemoclaw CLI verified`); } // Verify sandbox registry (common to both paths) @@ -597,7 +598,7 @@ describe.runIf(hasRequiredVars)("Brev E2E", () => { expect(sandbox).toMatchObject({ name: "e2e-test", gpuEnabled: false, - policies: [], + policies: ["pypi", "npm"], }); console.log(`[${elapsed()}] Sandbox registry verified`); @@ -612,10 +613,8 @@ describe.runIf(hasRequiredVars)("Brev E2E", () => { console.log(` To delete: brev delete ${INSTANCE_NAME}\n`); return; } - try { - brev("delete", INSTANCE_NAME); - } catch { - // Best-effort cleanup — instance may already be gone + if (!deleteBrevInstance(INSTANCE_NAME)) { + throw new Error(`Failed to delete Brev instance "${INSTANCE_NAME}" during test cleanup`); } }); @@ -652,4 +651,21 @@ describe.runIf(hasRequiredVars)("Brev E2E", () => { }, 600_000, ); + + it.runIf(TEST_SUITE === "deploy-cli")( + "deploy CLI provisions a remote sandbox end to end", + () => { + const sandboxList = ssh( + "export PATH=$HOME/.local/bin:$PATH && openshell sandbox list 2>/dev/null", + { timeout: 30_000 }, + ); + expect(sandboxList).toContain("e2e-test"); + expect(sandboxList).toContain("Ready"); + + const registry = JSON.parse(ssh("cat ~/.nemoclaw/sandboxes.json", { timeout: 10_000 })); + expect(registry.defaultSandbox).toBe("e2e-test"); + expect(registry.sandboxes).toHaveProperty("e2e-test"); + }, + 120_000, + ); }); diff --git a/test/e2e/test-spark-install.sh b/test/e2e/test-spark-install.sh index 9d7e2d2a3..e3588443b 100755 --- a/test/e2e/test-spark-install.sh +++ b/test/e2e/test-spark-install.sh @@ -2,19 +2,16 @@ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # -# DGX Spark install smoke: setup-spark (Docker cgroupns) + install.sh — parity with -# test/integration/spark-install-cli.test.ts and spark-install.md Quick Start. +# DGX Spark install smoke: standard install.sh path on a Spark-class Linux host. # # Prerequisites: # - Linux (DGX Spark or similar); other OS exits immediately (fail) # - Docker running -# - sudo (for scripts/setup-spark.sh) unless NEMOCLAW_E2E_SPARK_SKIP_SETUP=1 # - Same env your non-interactive install needs (e.g. NEMOCLAW_NON_INTERACTIVE=1, NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1, API keys, …) # # Environment: # NEMOCLAW_NON_INTERACTIVE=1 — required (matches full-e2e install phase) # NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1 — required for non-interactive install/onboard -# NEMOCLAW_E2E_SPARK_SKIP_SETUP=1 — skip sudo setup-spark (host already configured) # NEMOCLAW_E2E_PUBLIC_INSTALL=1 — use curl|bash instead of repo install.sh # NEMOCLAW_INSTALL_SCRIPT_URL — URL when using public install (default: nemoclaw.sh) # INSTALL_LOG — log file (default: /tmp/nemoclaw-e2e-spark-install.log) @@ -55,7 +52,6 @@ else exit 1 fi -SETUP_SCRIPT="$REPO/scripts/setup-spark.sh" INSTALL_LOG="${INSTALL_LOG:-/tmp/nemoclaw-e2e-spark-install.log}" section "Phase 0: Platform" @@ -74,13 +70,6 @@ else exit 1 fi -if [ -f "$SETUP_SCRIPT" ]; then - pass "Found scripts/setup-spark.sh" -else - fail "Missing $SETUP_SCRIPT" - exit 1 -fi - if [ "${NEMOCLAW_NON_INTERACTIVE:-}" = "1" ]; then pass "NEMOCLAW_NON_INTERACTIVE=1" else @@ -95,24 +84,13 @@ else exit 1 fi -section "Phase 2: Spark Docker setup (sudo)" +section "Phase 2: Standard installer path" cd "$REPO" || { fail "cd to repo: $REPO" exit 1 } -if [ "${NEMOCLAW_E2E_SPARK_SKIP_SETUP:-0}" = "1" ]; then - info "Skipping sudo setup-spark (NEMOCLAW_E2E_SPARK_SKIP_SETUP=1)" - pass "setup-spark skipped" -else - info "Running: sudo bash scripts/setup-spark.sh" - if sudo bash "$SETUP_SCRIPT"; then - pass "setup-spark completed" - else - fail "setup-spark failed" - exit 1 - fi -fi +pass "Using generic installer flow without Spark-specific setup" section "Phase 3: Install NemoClaw (non-interactive)" info "Log: $INSTALL_LOG" diff --git a/test/install-preflight.test.js b/test/install-preflight.test.js index f468af45d..2769acea1 100644 --- a/test/install-preflight.test.js +++ b/test/install-preflight.test.js @@ -485,6 +485,26 @@ fi`, ); writeNodeStub(fakeBin); + writeExecutable( + path.join(fakeBin, "docker"), + `#!/usr/bin/env bash +if [ "$1" = "info" ]; then + echo '{"ServerVersion":"29.3.1","OperatingSystem":"Ubuntu 24.04","CgroupVersion":"2"}' + exit 0 +fi +exit 0 +`, + ); + writeExecutable( + path.join(fakeBin, "openshell"), + `#!/usr/bin/env bash +if [ "$1" = "--version" ]; then + echo "openshell 0.0.22" + exit 0 +fi +exit 0 +`, + ); writeNpmStub( fakeBin, `if [ "$1" = "pack" ]; then @@ -539,6 +559,153 @@ fi`, ); }); + it("skips onboarding when shared host preflight detects Docker is missing", () => { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-install-missing-docker-")); + const fakeBin = path.join(tmp, "bin"); + const prefix = path.join(tmp, "prefix"); + const onboardLog = path.join(tmp, "onboard.log"); + fs.mkdirSync(fakeBin); + fs.mkdirSync(path.join(prefix, "bin"), { recursive: true }); + + writeNodeStub(fakeBin); + writeExecutable( + path.join(fakeBin, "openshell"), + `#!/usr/bin/env bash +if [ "$1" = "--version" ]; then + echo "openshell 0.0.22" + exit 0 +fi +exit 0 +`, + ); + writeExecutable( + path.join(fakeBin, "docker"), + `#!/usr/bin/env bash +if [ "$1" = "info" ]; then + exit 1 +fi +exit 0 +`, + ); + writeNpmStub( + fakeBin, + `if [ "$1" = "pack" ]; then + tmpdir="$4" + mkdir -p "$tmpdir/package" + tar -czf "$tmpdir/openclaw-2026.3.11.tgz" -C "$tmpdir" package + exit 0 +fi +if [ "$1" = "install" ]; then exit 0; fi +if [ "$1" = "run" ] && { [ "$2" = "build" ] || [ "$2" = "build:cli" ] || [ "$2" = "--if-present" ]; }; then exit 0; fi +if [ "$1" = "link" ]; then + cat > "$NPM_PREFIX/bin/nemoclaw" <<'EOS' +#!/usr/bin/env bash +printf '%s\\n' "$*" >> "$NEMOCLAW_ONBOARD_LOG" +exit 0 +EOS + chmod +x "$NPM_PREFIX/bin/nemoclaw" + exit 0 +fi`, + ); + + const result = spawnSync("bash", [INSTALLER], { + cwd: path.join(import.meta.dirname, ".."), + encoding: "utf-8", + env: { + ...process.env, + HOME: tmp, + PATH: `${fakeBin}:${TEST_SYSTEM_PATH}`, + NEMOCLAW_NON_INTERACTIVE: "1", + NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1", + NPM_PREFIX: prefix, + NEMOCLAW_ONBOARD_LOG: onboardLog, + }, + }); + + const output = `${result.stdout}${result.stderr}`; + expect(result.status).toBe(0); + expect(output).toMatch(/Host preflight found issues that will prevent onboarding right now\./); + expect(output).toMatch(/Start Docker/); + expect(output).toMatch(/Skipping onboarding until the host prerequisites above are fixed\./); + expect(fs.existsSync(onboardLog)).toBe(false); + }); + + it("warns on Podman but still runs onboarding", () => { + const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-install-podman-warning-")); + const fakeBin = path.join(tmp, "bin"); + const prefix = path.join(tmp, "prefix"); + const onboardLog = path.join(tmp, "onboard.log"); + fs.mkdirSync(fakeBin); + fs.mkdirSync(path.join(prefix, "bin"), { recursive: true }); + + writeNodeStub(fakeBin); + writeExecutable( + path.join(fakeBin, "openshell"), + `#!/usr/bin/env bash +if [ "$1" = "--version" ]; then + echo "openshell 0.0.22" + exit 0 +fi +exit 0 +`, + ); + writeNpmStub( + fakeBin, + `if [ "$1" = "pack" ]; then + tmpdir="$4" + mkdir -p "$tmpdir/package" + tar -czf "$tmpdir/openclaw-2026.3.11.tgz" -C "$tmpdir" package + exit 0 +fi +if [ "$1" = "install" ]; then exit 0; fi +if [ "$1" = "run" ] && { [ "$2" = "build" ] || [ "$2" = "build:cli" ] || [ "$2" = "--if-present" ]; }; then exit 0; fi +if [ "$1" = "link" ]; then + cat > "$NPM_PREFIX/bin/nemoclaw" <<'EOS' +#!/usr/bin/env bash +printf '%s\\n' "$*" >> "$NEMOCLAW_ONBOARD_LOG" +exit 0 +EOS + chmod +x "$NPM_PREFIX/bin/nemoclaw" + exit 0 +fi`, + ); + writeExecutable( + path.join(fakeBin, "docker"), + `#!/usr/bin/env bash +if [ "$1" = "info" ]; then + echo "Podman Engine" + exit 0 +fi +exit 0 +`, + ); + + const result = spawnSync("bash", [INSTALLER], { + cwd: path.join(import.meta.dirname, ".."), + encoding: "utf-8", + env: { + ...process.env, + HOME: tmp, + PATH: `${fakeBin}:${TEST_SYSTEM_PATH}`, + NEMOCLAW_NON_INTERACTIVE: "1", + NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "1", + NPM_PREFIX: prefix, + NEMOCLAW_ONBOARD_LOG: onboardLog, + }, + }); + + const output = `${result.stdout}${result.stderr}`; + expect(result.status).toBe(0); + expect(output).toMatch(/Host preflight found warnings\./); + expect(output).toMatch(/Detected container runtime: podman/); + expect(output).toMatch( + /Podman may work in some environments, but it is not a supported runtime/, + ); + expect(fs.readFileSync(onboardLog, "utf-8")).toMatch( + /^onboard --non-interactive --yes-i-accept-third-party-software$/m, + ); + }); + it("requires explicit terms acceptance in non-interactive install mode", () => { const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-install-terms-required-")); const fakeBin = path.join(tmp, "bin"); @@ -548,6 +715,26 @@ fi`, fs.mkdirSync(path.join(prefix, "bin"), { recursive: true }); writeNodeStub(fakeBin); + writeExecutable( + path.join(fakeBin, "docker"), + `#!/usr/bin/env bash +if [ "$1" = "info" ]; then + echo '{"ServerVersion":"29.3.1","OperatingSystem":"Ubuntu 24.04","CgroupVersion":"2"}' + exit 0 +fi +exit 0 +`, + ); + writeExecutable( + path.join(fakeBin, "openshell"), + `#!/usr/bin/env bash +if [ "$1" = "--version" ]; then + echo "openshell 0.0.22" + exit 0 +fi +exit 0 +`, + ); writeNpmStub( fakeBin, `if [ "$1" = "pack" ]; then @@ -576,13 +763,16 @@ fi`, ...process.env, HOME: tmp, PATH: `${fakeBin}:${TEST_SYSTEM_PATH}`, + NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE: "", NPM_PREFIX: prefix, NEMOCLAW_ONBOARD_LOG: onboardLog, }, }); - expect(result.status).not.toBe(0); - expect(`${result.stdout}${result.stderr}`).toMatch(/--yes-i-accept-third-party-software/); + expect(result.status).toBe(1); + expect(`${result.stdout}${result.stderr}`).toMatch( + /--yes-i-accept-third-party-software|NEMOCLAW_ACCEPT_THIRD_PARTY_SOFTWARE=1/, + ); expect(fs.existsSync(onboardLog)).toBe(false); }); @@ -595,6 +785,26 @@ fi`, fs.mkdirSync(path.join(prefix, "bin"), { recursive: true }); writeNodeStub(fakeBin); + writeExecutable( + path.join(fakeBin, "docker"), + `#!/usr/bin/env bash +if [ "$1" = "info" ]; then + echo '{"ServerVersion":"29.3.1","OperatingSystem":"Ubuntu 24.04","CgroupVersion":"2"}' + exit 0 +fi +exit 0 +`, + ); + writeExecutable( + path.join(fakeBin, "openshell"), + `#!/usr/bin/env bash +if [ "$1" = "--version" ]; then + echo "openshell 0.0.22" + exit 0 +fi +exit 0 +`, + ); writeNpmStub( fakeBin, `if [ "$1" = "pack" ]; then @@ -951,6 +1161,26 @@ exit 0 fs.writeFileSync(path.join(nvmDir, "nvm.sh"), "# stub nvm\n"); writeNodeStub(fakeBin); + writeExecutable( + path.join(fakeBin, "docker"), + `#!/usr/bin/env bash +if [ "$1" = "info" ]; then + echo '{"ServerVersion":"29.3.1","OperatingSystem":"Ubuntu 24.04","CgroupVersion":"2"}' + exit 0 +fi +exit 0 +`, + ); + writeExecutable( + path.join(fakeBin, "openshell"), + `#!/usr/bin/env bash +if [ "$1" = "--version" ]; then + echo "openshell 0.0.22" + exit 0 +fi +exit 0 +`, + ); writeNpmStub( fakeBin, `if [ "$1" = "pack" ]; then exit 1; fi diff --git a/test/runner.test.js b/test/runner.test.js index 53885210f..fc07849ff 100644 --- a/test/runner.test.js +++ b/test/runner.test.js @@ -487,19 +487,15 @@ describe("regression guards", () => { expect(src.includes("delete process.env.NVIDIA_API_KEY")).toBeTruthy(); }); - it("setupSpark does not pass NVIDIA_API_KEY to sudo", () => { + it("setupSpark is a compatibility alias that does not shell out to sudo", () => { const fs = require("fs"); const src = fs.readFileSync( path.join(import.meta.dirname, "..", "bin", "nemoclaw.js"), "utf-8", ); - // Find the run() call inside setupSpark — it should not contain the key - const sparkLines = src - .split("\n") - .filter((l) => l.includes("setup-spark") && l.includes("run(")); - for (const line of sparkLines) { - expect(line.includes("NVIDIA_API_KEY")).toBe(false); - } + expect(src).toContain("`nemoclaw setup-spark` is deprecated."); + expect(src).toContain("await onboard(args);"); + expect(src).not.toContain('sudo bash "${SCRIPTS}/setup-spark.sh"'); }); it("walkthrough.sh does not embed NVIDIA_API_KEY in tmux or sandbox commands", () => { @@ -644,12 +640,102 @@ describe("regression guards", () => { expect(findShellViolations(src)).toEqual([]); }); - it("scripts/brev-setup.sh does not pipe curl to shell", () => { + it("scripts/brev-setup.sh has been removed", () => { + expect(fs.existsSync(path.join(import.meta.dirname, "..", "scripts", "brev-setup.sh"))).toBe( + false, + ); + }); + + it("services no longer tell users to install brev-setup.sh", () => { const src = fs.readFileSync( - path.join(import.meta.dirname, "..", "scripts", "brev-setup.sh"), + path.join(import.meta.dirname, "..", "src", "lib", "services.ts"), "utf-8", ); - expect(findShellViolations(src)).toEqual([]); + expect(src).not.toContain("brev-setup.sh"); + }); + + it("deploy uses the standard installer and connects to the actual sandbox name", () => { + const tsSrc = fs.readFileSync( + path.join(import.meta.dirname, "..", "src", "lib", "deploy.ts"), + "utf-8", + ); + const src = fs.readFileSync( + path.join(import.meta.dirname, "..", "bin", "nemoclaw.js"), + "utf-8", + ); + expect(src).toContain('const { executeDeploy } = require("../dist/lib/deploy")'); + expect(tsSrc).toContain("export function inferDeployProvider("); + expect(tsSrc).toContain("export function buildDeployEnvLines("); + expect(tsSrc).toContain( + "bash scripts/install.sh --non-interactive --yes-i-accept-third-party-software", + ); + expect(tsSrc).not.toContain("sandbox connect nemoclaw"); + expect(tsSrc).toContain("openshell sandbox connect ${shellQuote(sandboxName)}"); + }); + + it("deploy syncs a complete buildable checkout instead of excluding src", () => { + const src = fs.readFileSync( + path.join(import.meta.dirname, "..", "src", "lib", "deploy.ts"), + "utf-8", + ); + expect(src).not.toContain("--exclude src"); + expect(src).toContain('"${rootDir}/"'); + expect(src).toContain("--exclude dist"); + expect(src).toContain('const brevProvider = String(env.NEMOCLAW_BREV_PROVIDER || "gcp")'); + expect(src).toContain("--provider ${shellQuote(brevProvider)}"); + }); + + it("deploy supports test-friendly non-interactive skip flags", () => { + const src = fs.readFileSync( + path.join(import.meta.dirname, "..", "src", "lib", "deploy.ts"), + "utf-8", + ); + expect(src).toContain("NEMOCLAW_DEPLOY_NO_CONNECT"); + expect(src).toContain("NEMOCLAW_DEPLOY_NO_START_SERVICES"); + expect(src).toContain("Skipping interactive sandbox connect"); + }); + + it("deploy reports Brev failure states before SSH timeout", () => { + const src = fs.readFileSync( + path.join(import.meta.dirname, "..", "src", "lib", "deploy.ts"), + "utf-8", + ); + expect(src).toContain("function getBrevInstanceStatus("); + expect(src).toContain('brev", ["ls", "--json"]'); + expect(src).toContain("Brev instance '${name}' did not become ready."); + expect(src).toContain("Try: brev reset"); + expect(src).toContain("Brev status at timeout:"); + }); + + it("brev e2e suite includes a deploy-cli mode", () => { + const src = fs.readFileSync( + path.join(import.meta.dirname, "..", "test", "e2e", "brev-e2e.test.js"), + "utf-8", + ); + expect(src).toContain('TEST_SUITE === "deploy-cli"'); + expect(src).toContain("deploy CLI provisions a remote sandbox end to end"); + expect(src).toContain('NEMOCLAW_DEPLOY_NO_CONNECT: "1"'); + }); + + it("brev e2e suite relies on an authenticated brev CLI instead of a Brev API token", () => { + const src = fs.readFileSync( + path.join(import.meta.dirname, "..", "test", "e2e", "brev-e2e.test.js"), + "utf-8", + ); + expect(src).toContain("const hasAuthenticatedBrev ="); + expect(src).toContain('brev("ls")'); + expect(src).not.toContain("BREV_API_TOKEN"); + expect(src).not.toContain('brev("login", "--token"'); + }); + + it("brev e2e suite no longer contains the old brev-setup compatibility path", () => { + const src = fs.readFileSync( + path.join(import.meta.dirname, "..", "test", "e2e", "brev-e2e.test.js"), + "utf-8", + ); + expect(src).not.toContain("scripts/brev-setup.sh"); + expect(src).not.toContain("USE_LAUNCHABLE"); + expect(src).not.toContain("SKIP_VLLM=1"); }); it("bin/nemoclaw.js does not pipe curl to shell", () => {