Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions .shellcheckrc
Original file line number Diff line number Diff line change
@@ -1,9 +1,20 @@
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

# SC2059: Don't use variables in the printf format string.
# Our info/warn/ok/error helpers and banner intentionally embed ANSI color
# variables in printf format strings. This is safe because the variables
# contain only escape sequences, never user input.
#
# SC1091: Not following sourced file.
# We dynamically source nvm.sh and runtime.sh at paths that shellcheck
# cannot resolve statically.
# Kept as a fallback for files that source external dependencies not in
# the repo (e.g. nvm.sh). For repo-local sources (runtime.sh),
# external-sources + source-path below let shellcheck follow them.
disable=SC2059,SC1091

# Follow source directives so shellcheck can analyze sourced functions.
external-sources=true

# Resolve source-path hints relative to the script's own directory,
# matching the runtime behavior of `SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"`.
source-path=SCRIPTDIR
5 changes: 2 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,8 @@ The sandbox image is approximately 2.4 GB compressed. During image push, the Doc

| Platform | Supported runtimes | Notes |
|----------|--------------------|-------|
| Linux | Docker | Primary supported path today |
| macOS (Apple Silicon) | Colima, Docker Desktop | Recommended runtimes for supported macOS setups |
| macOS | Podman | Not supported yet. NemoClaw currently depends on OpenShell support for Podman on macOS. |
| Linux | Docker, Podman | Primary supported path today |
| macOS (Apple Silicon) | Colima, Docker Desktop, Podman | Supported container runtimes on macOS |
| Windows WSL | Docker Desktop (WSL backend) | Supported target path |

> **💡 Tip**
Expand Down
10 changes: 1 addition & 9 deletions bin/lib/onboard.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ const {
} = require("./inference-config");
const {
inferContainerRuntime,
isUnsupportedMacosRuntime,
shouldPatchCoredns,
} = require("./platform");
const { resolveOpenshell } = require("./resolve-openshell");
Expand Down Expand Up @@ -1177,12 +1176,6 @@ async function preflight() {
console.log(" ✓ Docker is running");

const runtime = getContainerRuntime();
if (isUnsupportedMacosRuntime(runtime)) {
console.error(" Podman on macOS is not supported by NemoClaw at this time.");
console.error(" OpenShell currently depends on Docker host-gateway behavior that Podman on macOS does not provide.");
console.error(" Use Colima or Docker Desktop on macOS instead.");
process.exit(1);
}
if (runtime !== "unknown") {
console.log(` ✓ Container runtime: ${runtime}`);
}
Expand Down Expand Up @@ -1307,10 +1300,9 @@ async function startGateway(gpu) {
sleep(2);
}

// CoreDNS fix — always run. k3s-inside-Docker has broken DNS on all platforms.
const runtime = getContainerRuntime();
if (shouldPatchCoredns(runtime)) {
console.log(" Patching CoreDNS for Colima...");
console.log(` Patching CoreDNS for ${runtime}...`);
run(`bash "${path.join(SCRIPTS, "fix-coredns.sh")}" ${GATEWAY_NAME} 2>&1 || true`, { ignoreError: true });
}
// Give DNS a moment to propagate
Expand Down
35 changes: 28 additions & 7 deletions bin/lib/platform.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,8 @@ function inferContainerRuntime(info = "") {
return "unknown";
}

function isUnsupportedMacosRuntime(runtime, opts = {}) {
const platform = opts.platform ?? process.platform;
return platform === "darwin" && runtime === "podman";
}

function shouldPatchCoredns(runtime) {
return runtime === "colima";
return runtime === "colima" || runtime === "podman";
}

function getColimaDockerSocketCandidates(opts = {}) {
Expand All @@ -52,17 +47,43 @@ function findColimaDockerSocket(opts = {}) {
return getColimaDockerSocketCandidates(opts).find((socketPath) => existsSync(socketPath)) ?? null;
}

function getPodmanSocketCandidates(opts = {}) {
const home = opts.home ?? process.env.HOME ?? "/tmp";
const platform = opts.platform ?? process.platform;
const uid = opts.uid ?? process.getuid?.() ?? 1000;

if (platform === "darwin") {
return [
path.join(home, ".local/share/containers/podman/machine/podman.sock"),
];
}

return [
`/run/user/${uid}/podman/podman.sock`,
"/run/podman/podman.sock",
];
}

function getDockerSocketCandidates(opts = {}) {
const home = opts.home ?? process.env.HOME ?? "/tmp";
const platform = opts.platform ?? process.platform;

if (platform === "darwin") {
return [
...getColimaDockerSocketCandidates({ home }),
...getPodmanSocketCandidates({ home, platform }),
path.join(home, ".docker/run/docker.sock"),
];
}

if (platform === "linux") {
return [
...getPodmanSocketCandidates({ home, platform, uid: opts.uid }),
"/run/docker.sock",
"/var/run/docker.sock",
];
}

return [];
}

Expand Down Expand Up @@ -95,8 +116,8 @@ module.exports = {
findColimaDockerSocket,
getColimaDockerSocketCandidates,
getDockerSocketCandidates,
getPodmanSocketCandidates,
inferContainerRuntime,
isUnsupportedMacosRuntime,
isWsl,
shouldPatchCoredns,
};
53 changes: 53 additions & 0 deletions docs/reference/troubleshooting.md
Original file line number Diff line number Diff line change
Expand Up @@ -189,3 +189,56 @@ $ nemoclaw <name> logs
```

Use `--follow` to stream logs in real time while debugging.

## Podman

### `open /dev/kmsg: operation not permitted`

This error appears when the Podman machine is running in rootless mode.
K3s kubelet requires `/dev/kmsg` access for its OOM watcher, which is not available in rootless containers.

Switch the Podman machine to rootful mode and restart:

```console
$ podman machine stop
$ podman machine set --rootful
$ podman machine start
```

Then destroy and recreate the gateway:

```console
$ openshell gateway destroy --name nemoclaw
$ nemoclaw onboard
```

### Image push timeout with Podman

When creating a sandbox, the 1.5 GB sandbox image push into K3s may time out through Podman's API socket.
This is a known limitation of the bollard Docker client's default timeout.

Manually push the image using the Docker CLI, which has no such timeout:

```console
$ docker images --format '{{.Repository}}:{{.Tag}}' | grep sandbox-from
$ docker save <IMAGE_NAME:TAG> | \
docker exec -i openshell-cluster-nemoclaw \
ctr -a /run/k3s/containerd/containerd.sock -n k8s.io images import -
```

After the import completes, create the sandbox manually:

```console
$ openshell sandbox create --name my-assistant --from <IMAGE_NAME:TAG>
```

### Podman machine resources

The default Podman machine has 2 GB RAM, which is insufficient for the sandbox image push and K3s cluster overhead.
Allocate at least 8 GB RAM and 4 CPUs:

```console
$ podman machine stop
$ podman machine set --cpus 6 --memory 8192
$ podman machine start
```
30 changes: 20 additions & 10 deletions scripts/fix-coredns.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,17 @@
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Fix CoreDNS on local OpenShell gateways running under Colima.
# Fix CoreDNS on local OpenShell gateways running under Colima or Podman.
#
# Problem: k3s CoreDNS forwards to /etc/resolv.conf which inside the
# CoreDNS pod resolves to 127.0.0.11 (Docker's embedded DNS). That
# address is NOT reachable from k3s pods, causing DNS to fail and
# CoreDNS pod resolves to 127.0.0.11 (Docker/Podman's embedded DNS).
# That address is NOT reachable from k3s pods, causing DNS to fail and
# CoreDNS to CrashLoop.
#
# Fix: forward CoreDNS to the container's default gateway IP, which
# is reachable from pods and routes DNS through Docker to the host.
# is reachable from pods and routes DNS through Docker/Podman to the host.
#
# Run this after `openshell gateway start` on Colima setups.
# Run this after `openshell gateway start` on Colima or Podman setups.
#
# Usage: ./scripts/fix-coredns.sh [gateway-name]

Expand All @@ -23,15 +23,25 @@ SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
# shellcheck source=./lib/runtime.sh
. "$SCRIPT_DIR/lib/runtime.sh"

COLIMA_SOCKET="$(find_colima_docker_socket || true)"
DETECTED_RUNTIME="unknown"

if [ -z "${DOCKER_HOST:-}" ]; then
COLIMA_SOCKET="$(find_colima_docker_socket || true)"
if [ -n "$COLIMA_SOCKET" ]; then
export DOCKER_HOST="unix://$COLIMA_SOCKET"
DETECTED_RUNTIME="colima"
else
echo "Skipping CoreDNS patch: Colima socket not found."
exit 0
PODMAN_SOCKET="$(find_podman_socket || true)"
if [ -n "$PODMAN_SOCKET" ]; then
export DOCKER_HOST="unix://$PODMAN_SOCKET"
DETECTED_RUNTIME="podman"
else
echo "Skipping CoreDNS patch: no Colima or Podman socket found."
exit 0
fi
fi
else
DETECTED_RUNTIME="$(docker_host_runtime "$DOCKER_HOST" || echo "custom")"
fi

# Find the cluster container
Expand All @@ -48,10 +58,10 @@ fi

CONTAINER_RESOLV_CONF="$(docker exec "$CLUSTER" cat /etc/resolv.conf 2>/dev/null || true)"
HOST_RESOLV_CONF="$(cat /etc/resolv.conf 2>/dev/null || true)"
UPSTREAM_DNS="$(resolve_coredns_upstream "$CONTAINER_RESOLV_CONF" "$HOST_RESOLV_CONF" "colima" || true)"
UPSTREAM_DNS="$(resolve_coredns_upstream "$CONTAINER_RESOLV_CONF" "$HOST_RESOLV_CONF" "$DETECTED_RUNTIME" || true)"

if [ -z "$UPSTREAM_DNS" ]; then
echo "ERROR: Could not determine a non-loopback DNS upstream for Colima."
echo "ERROR: Could not determine a non-loopback DNS upstream for $DETECTED_RUNTIME."
exit 1
fi

Expand Down
36 changes: 32 additions & 4 deletions scripts/lib/runtime.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ socket_exists() {
case ":$NEMOCLAW_TEST_SOCKET_PATHS:" in
*":$socket_path:"*) return 0 ;;
esac
return 1
fi

[ -S "$socket_path" ]
Expand Down Expand Up @@ -56,6 +57,11 @@ detect_docker_host() {
return 0
fi

if socket_path="$(find_podman_socket "$home_dir")"; then
printf 'unix://%s\n' "$socket_path"
return 0
fi

if socket_path="$(find_docker_desktop_socket "$home_dir")"; then
printf 'unix://%s\n' "$socket_path"
return 0
Expand All @@ -71,6 +77,9 @@ docker_host_runtime() {
unix://*"/.colima/default/docker.sock" | unix://*"/.config/colima/default/docker.sock")
printf 'colima\n'
;;
unix://*"/podman/machine/podman.sock" | unix://*"/podman/podman.sock")
printf 'podman\n'
;;
unix://*"/.docker/run/docker.sock")
printf 'docker-desktop\n'
;;
Expand Down Expand Up @@ -103,11 +112,30 @@ infer_container_runtime_from_info() {
fi
}

is_unsupported_macos_runtime() {
local platform="${1:-$(uname -s)}"
local runtime="${2:-unknown}"
find_podman_socket() {
local home_dir="${1:-${HOME:-/tmp}}"
local socket_path

if [ "$(uname -s)" = "Darwin" ]; then
socket_path="$home_dir/.local/share/containers/podman/machine/podman.sock"
if socket_exists "$socket_path"; then
printf '%s\n' "$socket_path"
return 0
fi
else
local uid
uid="$(id -u 2>/dev/null || echo 1000)"
for socket_path in \
"/run/user/$uid/podman/podman.sock" \
"/run/podman/podman.sock"; do
if socket_exists "$socket_path"; then
printf '%s\n' "$socket_path"
return 0
fi
done
fi

[ "$platform" = "Darwin" ] && [ "$runtime" = "podman" ]
return 1
}

is_loopback_ip() {
Expand Down
9 changes: 3 additions & 6 deletions scripts/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,6 @@ command -v docker >/dev/null || fail "docker not found"
[ -n "${NVIDIA_API_KEY:-}" ] || fail "NVIDIA_API_KEY not set. Get one from build.nvidia.com"

CONTAINER_RUNTIME="$(infer_container_runtime_from_info "$(docker info 2>/dev/null || true)")"
if is_unsupported_macos_runtime "$(uname -s)" "$CONTAINER_RUNTIME"; then
fail "Podman on macOS is not supported yet. NemoClaw currently depends on OpenShell support for Podman on macOS. Use Colima or Docker Desktop instead."
fi
if [ "$CONTAINER_RUNTIME" != "unknown" ]; then
info "Container runtime: $CONTAINER_RUNTIME"
fi
Expand Down Expand Up @@ -123,9 +120,9 @@ for i in 1 2 3 4 5; do
done
info "Gateway is healthy"

# 2. CoreDNS fix (Colima only)
if [ "$CONTAINER_RUNTIME" = "colima" ]; then
info "Patching CoreDNS for Colima..."
# 2. CoreDNS fix (Colima and Podman — nested K3s DNS is broken with these runtimes)
if [ "$CONTAINER_RUNTIME" = "colima" ] || [ "$CONTAINER_RUNTIME" = "podman" ]; then
info "Patching CoreDNS for $CONTAINER_RUNTIME..."
bash "$SCRIPT_DIR/fix-coredns.sh" nemoclaw 2>&1 || warn "CoreDNS patch failed (may not be needed)"
fi

Expand Down
11 changes: 9 additions & 2 deletions scripts/smoke-macos-install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ Usage: ./scripts/smoke-macos-install.sh [options]
Options:
--sandbox-name <name> Sandbox name to feed into install.sh
--log-dir <dir> Directory for install/uninstall logs
--runtime <name> Select runtime: colima or docker-desktop
--runtime <name> Select runtime: colima, podman, or docker-desktop
--allow-existing-state Allow running even if NemoClaw/OpenShell state already exists
--keep-logs Preserve log files after success
--remove-openshell Allow uninstall.sh to remove openshell
Expand Down Expand Up @@ -132,6 +132,13 @@ select_runtime() {
export DOCKER_HOST="unix://$socket_path"
info "Using runtime 'colima' via $socket_path"
;;
podman)
local socket_path
socket_path="$(find_podman_socket || true)"
[ -n "$socket_path" ] || fail "Requested runtime 'podman', but no Podman socket was found."
export DOCKER_HOST="unix://$socket_path"
info "Using runtime 'podman' via $socket_path"
;;
docker-desktop)
local socket_path
socket_path="$(find_docker_desktop_socket || true)"
Expand All @@ -140,7 +147,7 @@ select_runtime() {
info "Using runtime 'docker-desktop' via $socket_path"
;;
*)
fail "Unsupported runtime '$RUNTIME'. Use 'colima' or 'docker-desktop'."
fail "Unsupported runtime '$RUNTIME'. Use 'colima', 'podman', or 'docker-desktop'."
;;
esac
}
Expand Down
Loading
Loading