diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 24e41bcd3..849de1be9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -34,6 +34,35 @@ jobs: - name: Lint Python surfaces touched by lucebox tooling run: uv run --frozen --extra dev ruff check . + - name: Install shellcheck (for bash test runner) + # ubuntu-latest typically ships shellcheck pre-installed, but pin + # the dependency explicitly so the bash test runner can always rely + # on `command -v shellcheck` succeeding. + run: | + if ! command -v shellcheck >/dev/null 2>&1; then + sudo apt-get update + sudo apt-get install -y shellcheck + fi + shellcheck --version | head -3 + + - name: Typecheck lucebox CLI + run: uv run --frozen --extra dev python -m mypy --package lucebox + + - name: Unit-test lucebox CLI + # The fast workspace sync above is enough: the suite mocks the + # docker / HTTP surfaces, so no torch wheel or GPU is needed. + # Keeps the lucebox Python honest on every push. + run: uv run --frozen --extra dev pytest lucebox -q + + - name: Smoke-test lucebox.sh wrapper + # Catches `set -u` regressions, syntax errors, and stale dispatch + # handlers in the host-side wrapper + the in-container entrypoint. + # Runs shellcheck --severity=error across every shipped .sh file, + # exercises every subcommand dispatch under `set -u`, and drives the + # entrypoint's draft-resolution block through every family-glob + # branch — all on the bare runner without docker/nvidia/systemd. + run: bash scripts/test_lucebox_sh.sh + build: name: Build (cmake + uv sync --extra megakernel) runs-on: ubuntu-latest diff --git a/install.sh b/install.sh new file mode 100755 index 000000000..cff54a02e --- /dev/null +++ b/install.sh @@ -0,0 +1,138 @@ +#!/usr/bin/env bash +# install.sh — Bootstrap installer for the lucebox host wrapper. +# +# Canonical install (Luce-Org main, stable channel): +# +# curl -fsSL https://raw.githubusercontent.com/Luce-Org/lucebox-hub/main/install.sh | bash +# +# Install from a different fork / branch (dev channel). Note the env var +# is on the `bash` side of the pipe — `VAR=val curl … | bash` would attach +# it to the `curl` process, leaving `bash` with the canonical default: +# +# curl -fsSL https://raw.githubusercontent.com/easel/lucebox-hub/feat/lucebox-docker/install.sh | \ +# LUCEBOX_INSTALL_URL=https://raw.githubusercontent.com/easel/lucebox-hub/feat/lucebox-docker/lucebox.sh bash +# +# The installer bakes the source URL into the installed `lucebox.sh` as +# `LUCEBOX_INSTALLED_FROM=...`, so `lucebox update` later re-pulls from the +# same channel without the user having to remember which fork they used. +# +# Override the install destination via $LUCEBOX_INSTALL_DEST (default +# $HOME/.local/bin/lucebox). This is what `lucebox update` uses to replace +# the file in place. + +set -euo pipefail + +LUCEBOX_INSTALL_URL="${LUCEBOX_INSTALL_URL:-https://raw.githubusercontent.com/Luce-Org/lucebox-hub/main/lucebox.sh}" +DEST="${LUCEBOX_INSTALL_DEST:-$HOME/.local/bin/lucebox}" + +# ── helpers ─────────────────────────────────────────────────────────────── +C_OK=$'\033[1;32m' ; C_ERR=$'\033[1;31m' ; C_DIM=$'\033[2m' ; C_RST=$'\033[0m' +if [ ! -t 1 ] || [ "${NO_COLOR:-}" ]; then + C_OK="" ; C_ERR="" ; C_DIM="" ; C_RST="" +fi +info() { printf '%s[install]%s %s\n' "$C_DIM" "$C_RST" "$*"; } +ok() { printf '%s[install] ✓%s %s\n' "$C_OK" "$C_RST" "$*"; } +die() { printf '%s[install] ✗%s %s\n' "$C_ERR" "$C_RST" "$*" >&2; exit 1; } + +command -v curl >/dev/null 2>&1 || die "curl is required (apt-get install curl)" + +# ── fetch ───────────────────────────────────────────────────────────────── +tmp=$(mktemp -t lucebox.XXXXXX) || die "couldn't create temp file" +# shellcheck disable=SC2064 # we want $tmp expanded now, not at trap time +trap "rm -f '$tmp' '$tmp.bak'" EXIT +info "fetching $LUCEBOX_INSTALL_URL" +curl -fsSL "$LUCEBOX_INSTALL_URL" -o "$tmp" \ + || die "download failed from $LUCEBOX_INSTALL_URL" + +# ── sanity check ────────────────────────────────────────────────────────── +# Refuse to install something that isn't recognizably lucebox.sh. Catches +# 404 pages, redirects to HTML, and accidental URL typos. +head -1 "$tmp" | grep -q '^#!/usr/bin/env bash$' \ + || die "downloaded file does not look like a bash script (got: $(head -1 "$tmp"))" +grep -q '^VERSION=' "$tmp" \ + || die "downloaded file is missing VERSION marker — not lucebox.sh?" + +# ── decide what gets baked in as the persisted channel ─────────────────── +# `lucebox update` reads LUCEBOX_INSTALLED_FROM from the installed copy and +# re-fetches from it. Persisting a SHA-pinned URL is a footgun — every +# future update would re-install the same frozen SHA forever, defeating +# the point of `update`. So: +# +# 1. If $LUCEBOX_INSTALL_CHANNEL is set, that's the persisted URL +# (caller takes responsibility for picking a real branch URL). +# 2. Else if LUCEBOX_INSTALL_URL has a 40-char hex SHA segment, refuse +# to persist it — tell the user to set LUCEBOX_INSTALL_CHANNEL. +# Common case: someone curl'd from /raw// to bypass a stale CDN +# cache during dev; they meant for updates to track the branch. +# 3. Else persist LUCEBOX_INSTALL_URL as-is (branch or canonical main). +channel_url="${LUCEBOX_INSTALL_CHANNEL:-}" +if [ -z "$channel_url" ]; then + # Match a full 40-char hex SHA in the URL path, not the broader + # {7,40} range — a 7-39 char hex segment is more likely a branch + # name shaped like a short SHA (e.g. `feat/abc1234-hotfix`) than an + # actual SHA-pin. Keeping the gate at exactly 40 chars matches what + # `git rev-parse HEAD` emits and what `/raw//` URLs from + # GitHub's CDN actually carry. + if [[ "$LUCEBOX_INSTALL_URL" =~ /[0-9a-fA-F]{40}/[^/]+\.sh$ ]]; then + die "$(cat </install.sh | \\ + LUCEBOX_INSTALL_URL=/lucebox.sh \\ + LUCEBOX_INSTALL_CHANNEL=https://raw.githubusercontent.com////lucebox.sh \\ + bash +EOM +)" + fi + channel_url="$LUCEBOX_INSTALL_URL" +fi + +# Bake the channel URL into the file. Use a `|` delimiter since URLs +# contain `/`. The line is expected to exist in lucebox.sh with a `:-` +# default; we rewrite the whole assignment. +# +# The URL ends up inside a bash double-quoted literal in the installed +# script, so any of $ ` " \ in `channel_url` would break the installed +# file (or worse, allow command substitution to run at next sourcing). +# Validate that the URL is plain http(s)+ASCII-URL-safe characters; we +# don't expect arbitrary content here, only an upstream raw.github URL +# (or a forked equivalent). Escape the sed metachars (\&|) separately so +# the substitution itself round-trips. +case "$channel_url" in + *['"$`\']*) die "channel URL contains unsafe characters: $channel_url" ;; +esac +escaped_url=$(printf '%s' "$channel_url" | sed 's/[\\&|]/\\&/g') +sed "s|^LUCEBOX_INSTALLED_FROM=.*|LUCEBOX_INSTALLED_FROM=\"$escaped_url\"|" "$tmp" > "$tmp.baked" +mv "$tmp.baked" "$tmp" +grep -q "^LUCEBOX_INSTALLED_FROM=\"$escaped_url\"$" "$tmp" \ + || die "failed to bake install source into the downloaded script" + +# ── install ─────────────────────────────────────────────────────────────── +mkdir -p "$(dirname "$DEST")" +chmod +x "$tmp" +mv "$tmp" "$DEST" +trap - EXIT +ok "installed lucebox → $DEST" +info " fetched from: $LUCEBOX_INSTALL_URL" +info " update channel: $channel_url" +if [ "$LUCEBOX_INSTALL_URL" != "$channel_url" ]; then + info " (lucebox update will track the channel URL, not the fetch URL)" +fi + +# ── PATH hint ───────────────────────────────────────────────────────────── +case ":${PATH:-}:" in + *":$(dirname "$DEST"):"*) ;; + *) info " hint: add $(dirname "$DEST") to PATH so 'lucebox' is on the path" ;; +esac + +cat </dev/null || realpath "$0" 2>/dev/null || echo "$0")" +SCRIPT_NAME="$(basename "$SCRIPT_PATH")" + +# ── tunables / env overrides ─────────────────────────────────────────────── +# Host-side scalars (image registry+variant, port, container name, models +# dir). Resolution order, applied uniformly via _lucebox_resolve below: +# 1. $LUCEBOX_ per-invocation env override +# 2. config.toml
. persisted user choice (system of record) +# 3. derived / canonical default +# This keeps the wrapper and the in-container Python CLI agreeing on +# effective values — config.toml is the single source of truth, both +# sides read it. +UNIT_NAME="lucebox.service" +UNIT_PATH="${XDG_CONFIG_HOME:-$HOME/.config}/systemd/user/$UNIT_NAME" + +# CUDA driver floor for the prebuilt CUDA 12 image. +# shellcheck disable=SC2034 +MIN_DRIVER_CUDA12=525 + +# Canonical source of `lucebox.sh`. The bootstrap installer (`install.sh`) +# rewrites this line at install time to record which URL the user actually +# installed from — `lucebox update` then re-pulls from the same channel +# without losing track of forks. Falls back to the Luce-Org main branch +# when nothing was baked in (e.g. someone curl'd the script directly). +LUCEBOX_INSTALLED_FROM="${LUCEBOX_INSTALLED_FROM:-https://raw.githubusercontent.com/Luce-Org/lucebox-hub/main/lucebox.sh}" + +# Path to the persisted config.toml. Mirrors +# lucebox.config.default_config_path: $LUCEBOX_HOME/config.toml if set, +# else $HOME/.lucebox/config.toml. Read-only from this wrapper — the +# Python CLI is the writer. +_lucebox_config_path() { + if [ -n "${LUCEBOX_HOME:-}" ]; then + printf '%s/config.toml' "$LUCEBOX_HOME" + return + fi + printf '%s/.lucebox/config.toml' "$HOME" +} + +# Read a `
.` value from config.toml. Returns empty if the +# file is missing, the section/key is absent, or the value is empty. +# Handles the subset of TOML that lucebox writes: +# [section] +# key = "string" # surrounding double-quotes are stripped +# key = 8080 # bare scalars passed through verbatim +# key = true # same +# Inline `# comment` is honored. Arrays / inline tables / multi-line +# strings aren't written by the Python persister, so we don't parse them. +_lucebox_config_get() { + local dotted="$1" cfg + cfg="$(_lucebox_config_path)" + [ -f "$cfg" ] || return 0 + local section="${dotted%.*}" + local key="${dotted##*.}" + [ "$section" = "$dotted" ] && section="" + awk -v want_section="$section" -v want_key="$key" ' + BEGIN { current = "" } + /^[[:space:]]*\[/ { + t = $0 + sub(/^[[:space:]]*\[[[:space:]]*/, "", t) + sub(/[[:space:]]*\][[:space:]]*$/, "", t) + current = t + next + } + /^[[:space:]]*#/ { next } + /=/ { + if (current != want_section) next + line = $0 + sub(/#.*$/, "", line) + eq = index(line, "=") + if (eq == 0) next + k = substr(line, 1, eq - 1) + v = substr(line, eq + 1) + gsub(/^[[:space:]]+|[[:space:]]+$/, "", k) + gsub(/^[[:space:]]+|[[:space:]]+$/, "", v) + if (k != want_key) next + if (length(v) >= 2 && substr(v, 1, 1) == "\"" && substr(v, length(v), 1) == "\"") + v = substr(v, 2, length(v) - 2) + print v + exit + } + ' "$cfg" +} + +# Resolve a scalar through the precedence ladder. env_value comes from +# the caller (typically `"${LUCEBOX_FOO:-}"` — the `:-` matters under +# `set -u`). +_lucebox_resolve() { + local env_value="$1" toml_key="$2" default="$3" v + if [ -n "$env_value" ]; then + printf '%s' "$env_value" + return + fi + v="$(_lucebox_config_get "$toml_key")" + if [ -n "$v" ]; then + printf '%s' "$v" + return + fi + printf '%s' "$default" +} + +# Derive the default image URL from the install source so a fork install +# (e.g. easel/lucebox-hub) gets the fork's GHCR image automatically when +# config.toml hasn't pinned one yet. Pattern: +# https://raw.githubusercontent.com////lucebox.sh +# → ghcr.io// +# GHCR rejects mixed-case org paths so the org segment is lowercased; the +# repo name is preserved as-is. Falls back to the canonical Luce-Org image +# when the URL doesn't match the raw.githubusercontent.com pattern. +_lucebox_derive_image() { + # The ref segment can contain slashes (e.g. `feat/lucebox-docker`), so + # the middle `.+` greedily eats everything up to the trailing + # `/lucebox.sh`. The first two `[^/]+` capture org + repo, which are + # never slash-containing on GitHub. + local url="$1" org repo + if [[ "$url" =~ ^https?://raw\.githubusercontent\.com/([^/]+)/([^/]+)/.+/lucebox\.sh$ ]]; then + org=$(printf '%s' "${BASH_REMATCH[1]}" | tr '[:upper:]' '[:lower:]') + repo="${BASH_REMATCH[2]}" + printf 'ghcr.io/%s/%s' "$org" "$repo" + return + fi + printf 'ghcr.io/luce-org/lucebox-hub' +} + +# Effective scalars, env > config.toml > default. +CONTAINER_NAME=$(_lucebox_resolve "${LUCEBOX_CONTAINER:-}" runtime.container_name "lucebox") +DEFAULT_PORT=$(_lucebox_resolve "${LUCEBOX_PORT:-}" runtime.port "8080") +DEFAULT_MODELS_DIR=$(_lucebox_resolve "${LUCEBOX_MODELS:-}" paths.models "${XDG_DATA_HOME:-$HOME/.local/share}/lucebox/models") +IMAGE_BASE=$(_lucebox_resolve "${LUCEBOX_IMAGE:-}" image.registry "$(_lucebox_derive_image "$LUCEBOX_INSTALLED_FROM")") + +# ── LUCEBOX_HOST_* safe defaults (belt-and-suspenders) ──────────────────── +# `set -u` makes any unbound LUCEBOX_HOST_* read fatal. Historically this has +# been the #1 source of regressions in this wrapper: someone adds a code path +# that touches a LUCEBOX_HOST_* var before probe_host has run, the call sites +# that DO pre-probe still work, and the bug ships. To make the bug literally +# unrepresentable we seed every LUCEBOX_HOST_* with an explicit safe default +# at script-load time (these mirror probe_host's "nothing detected" state). +# probe_host then overwrites them with real values. Any future read — pre- or +# post-probe — is now well-defined. +: "${LUCEBOX_HOST_NPROC:=1}" +: "${LUCEBOX_HOST_RAM_GB:=0}" +: "${LUCEBOX_HOST_GPU_VENDOR:=none}" +: "${LUCEBOX_HOST_GPU_NAME:=}" +: "${LUCEBOX_HOST_GPU_COUNT:=0}" +: "${LUCEBOX_HOST_VRAM_GB:=0}" +: "${LUCEBOX_HOST_GPU_SM:=}" +: "${LUCEBOX_HOST_DRIVER_VERSION:=}" +: "${LUCEBOX_HOST_DRIVER_MAJOR:=0}" +: "${LUCEBOX_HOST_HAS_SYSTEMD:=0}" +: "${LUCEBOX_HOST_IS_WSL:=0}" +: "${LUCEBOX_HOST_HAS_DOCKER:=0}" +: "${LUCEBOX_HOST_DOCKER_VERSION:=}" +: "${LUCEBOX_HOST_HAS_CTK:=none}" +# Host-identity facts (item 1 — host-identity capture). These ride along +# the existing LUCEBOX_HOST_* convoy into the container so /opt/lucebox-hub/ +# HOST_INFO can be written without re-probing inside the container (where +# /proc and nvidia-smi see the container's view, not the rig's). +: "${LUCEBOX_HOST_OS_PRETTY:=}" +: "${LUCEBOX_HOST_KERNEL:=}" +: "${LUCEBOX_HOST_WSL_VERSION:=}" +: "${LUCEBOX_HOST_NVIDIA_CTK_VERSION:=}" +: "${LUCEBOX_HOST_CPU_MODEL:=}" +: "${LUCEBOX_HOST_GPU_LIST_CSV:=}" +: "${LUCEBOX_HOST_CUDA_VISIBLE_DEVICES:=}" +# Tracks whether probe_host has actually run; pieces of the code that need +# fresh host facts (e.g. cmd_check, cmd_serve) gate on this. Default 0. +: "${_LUCEBOX_HOST_PROBED:=0}" + +# ── output helpers ──────────────────────────────────────────────────────── +if [ -t 1 ] && [ -z "${NO_COLOR:-}" ]; then + C_INFO='\033[1;34m'; C_OK='\033[1;32m'; C_WARN='\033[1;33m' + C_ERR='\033[1;31m'; C_DIM='\033[2m'; C_RST='\033[0m' +else + C_INFO=''; C_OK=''; C_WARN=''; C_ERR=''; C_DIM=''; C_RST='' +fi + +info() { printf '%b[INFO]%b %s\n' "$C_INFO" "$C_RST" "$*"; } +ok() { printf '%b[OK]%b %s\n' "$C_OK" "$C_RST" "$*"; } +warn() { printf '%b[WARN]%b %s\n' "$C_WARN" "$C_RST" "$*"; } +err() { printf '%b[ERROR]%b %s\n' "$C_ERR" "$C_RST" "$*" >&2; } +hint() { printf ' %b%s%b\n' "$C_DIM" "$*" "$C_RST"; } +die() { err "$*"; exit 1; } + +# ── host probing ────────────────────────────────────────────────────────── +# Sets the LUCEBOX_HOST_* variables consumed by the in-container Python CLI +# (passed through with -e). The Python side trusts these and doesn't reprobe +# — it can't see the host's /proc anyway, only the container's. + +probe_host() { + LUCEBOX_HOST_NPROC=$(nproc 2>/dev/null || echo 1) + # RAM: try Linux /proc/meminfo first, then macOS/BSD sysctl, else 0. + LUCEBOX_HOST_RAM_GB=0 + if [ -r /proc/meminfo ]; then + LUCEBOX_HOST_RAM_GB=$(awk '/MemTotal/{printf "%.0f", $2/1024/1024}' /proc/meminfo 2>/dev/null || echo 0) + elif command -v sysctl &>/dev/null; then + mem_bytes=$(sysctl -n hw.memsize 2>/dev/null || echo 0) + LUCEBOX_HOST_RAM_GB=$(( mem_bytes / 1024 / 1024 / 1024 )) + fi + LUCEBOX_HOST_GPU_VENDOR="none" + LUCEBOX_HOST_GPU_NAME="" + LUCEBOX_HOST_GPU_COUNT=0 + LUCEBOX_HOST_VRAM_GB=0 + LUCEBOX_HOST_GPU_SM="" + LUCEBOX_HOST_DRIVER_VERSION="" + LUCEBOX_HOST_DRIVER_MAJOR=0 + + if command -v nvidia-smi &>/dev/null; then + local q + if q=$(nvidia-smi --query-gpu=name,memory.total,driver_version,compute_cap \ + --format=csv,noheader,nounits 2>/dev/null) && [ -n "$q" ]; then + LUCEBOX_HOST_GPU_VENDOR="nvidia" + LUCEBOX_HOST_GPU_NAME=$(printf '%s\n' "$q" | head -1 | awk -F', ' '{print $1}') + local mem_mib + mem_mib=$(printf '%s\n' "$q" | head -1 | awk -F', ' '{print $2}') + LUCEBOX_HOST_VRAM_GB=$((mem_mib / 1024)) + LUCEBOX_HOST_DRIVER_VERSION=$(printf '%s\n' "$q" | head -1 | awk -F', ' '{print $3}') + LUCEBOX_HOST_DRIVER_MAJOR=${LUCEBOX_HOST_DRIVER_VERSION%%.*} + local cc + cc=$(printf '%s\n' "$q" | head -1 | awk -F', ' '{print $4}') + LUCEBOX_HOST_GPU_SM="${cc//./}" + LUCEBOX_HOST_GPU_COUNT=$(printf '%s\n' "$q" | wc -l) + fi + # Multi-GPU enumeration for /props.host. The single-GPU vars + # above (GPU_NAME / GPU_SM / VRAM_GB / DRIVER_VERSION) keep + # describing GPU 0 for back-compat with cmd_check + autotune; + # the full per-GPU CSV rides along separately so HOST_INFO can + # emit the whole array. + LUCEBOX_HOST_GPU_LIST_CSV=$(nvidia-smi \ + --query-gpu=index,uuid,pci.bus_id,name,compute_cap,memory.total,power.limit \ + --format=csv,noheader 2>/dev/null || echo "") + fi + # CUDA_VISIBLE_DEVICES from the caller's env (empty default = "all GPUs"). + LUCEBOX_HOST_CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-}" + + # OS / kernel identity. /etc/os-release is the freedesktop spec for + # "what distro is this?" and we keep PRETTY_NAME verbatim (it already + # includes the version, e.g. "Ubuntu 22.04.3 LTS"). + LUCEBOX_HOST_OS_PRETTY="" + if [ -r /etc/os-release ]; then + # shellcheck source=/dev/null + LUCEBOX_HOST_OS_PRETTY=$(. /etc/os-release 2>/dev/null && printf '%s' "${PRETTY_NAME:-}") + fi + LUCEBOX_HOST_KERNEL=$(uname -r 2>/dev/null || echo "") + + # WSL version detection. "wsl2" matches the kernel-side string the + # MS-shipped WSL2 kernel embeds; "wsl1" is what the legacy translation + # layer writes. Anything else stays empty (= not WSL). + LUCEBOX_HOST_WSL_VERSION="" + if [ -r /proc/version ]; then + if grep -q "microsoft-standard-WSL2" /proc/version 2>/dev/null; then + LUCEBOX_HOST_WSL_VERSION="wsl2" + elif grep -qi "Microsoft" /proc/version 2>/dev/null; then + LUCEBOX_HOST_WSL_VERSION="wsl1" + fi + fi + + # CPU model — first "model name" hit in /proc/cpuinfo. Cheaper than + # lscpu and keeps the bash side dep-free. + LUCEBOX_HOST_CPU_MODEL="" + if [ -r /proc/cpuinfo ]; then + LUCEBOX_HOST_CPU_MODEL=$(awk -F': ' '/^model name/{print $2; exit}' /proc/cpuinfo 2>/dev/null || echo "") + fi + + LUCEBOX_HOST_HAS_SYSTEMD=0 + if command -v systemctl &>/dev/null && systemctl --user show-environment &>/dev/null; then + LUCEBOX_HOST_HAS_SYSTEMD=1 + fi + + LUCEBOX_HOST_IS_WSL=0 + if grep -qi microsoft /proc/version 2>/dev/null \ + || [ -e /proc/sys/fs/binfmt_misc/WSLInterop ]; then + LUCEBOX_HOST_IS_WSL=1 + fi + + LUCEBOX_HOST_HAS_DOCKER=0 + LUCEBOX_HOST_DOCKER_VERSION="" + if command -v docker &>/dev/null && docker ps &>/dev/null; then + LUCEBOX_HOST_HAS_DOCKER=1 + LUCEBOX_HOST_DOCKER_VERSION=$(timeout 5 docker version --format '{{.Server.Version}}' 2>/dev/null || echo "") + fi + + LUCEBOX_HOST_HAS_CTK="none" + if [ "$LUCEBOX_HOST_HAS_DOCKER" = "1" ]; then + if command -v nvidia-container-runtime &>/dev/null; then + LUCEBOX_HOST_HAS_CTK="runtime" + elif command -v nvidia-ctk &>/dev/null \ + && nvidia-ctk cdi list 2>/dev/null | grep -q 'nvidia.com/gpu'; then + LUCEBOX_HOST_HAS_CTK="cdi" + elif command -v nvidia-ctk &>/dev/null; then + LUCEBOX_HOST_HAS_CTK="installed-unwired" + fi + fi + + # NVIDIA Container Toolkit version (best-effort; empty when nvidia-ctk + # is not installed). nvidia-ctk --version prints "NVIDIA Container + # Toolkit CLI version 1.16.2" on a single line — extract the trailing + # token so the host-info JSON carries just the version, not the banner. + LUCEBOX_HOST_NVIDIA_CTK_VERSION="" + if command -v nvidia-ctk &>/dev/null; then + LUCEBOX_HOST_NVIDIA_CTK_VERSION=$(nvidia-ctk --version 2>/dev/null \ + | awk '/version/{print $NF; exit}' \ + || echo "") + fi + + export LUCEBOX_HOST_NPROC LUCEBOX_HOST_RAM_GB LUCEBOX_HOST_GPU_VENDOR + export LUCEBOX_HOST_GPU_NAME LUCEBOX_HOST_GPU_COUNT LUCEBOX_HOST_VRAM_GB + export LUCEBOX_HOST_GPU_SM LUCEBOX_HOST_DRIVER_VERSION LUCEBOX_HOST_DRIVER_MAJOR + export LUCEBOX_HOST_HAS_SYSTEMD LUCEBOX_HOST_IS_WSL + export LUCEBOX_HOST_HAS_DOCKER LUCEBOX_HOST_DOCKER_VERSION + export LUCEBOX_HOST_HAS_CTK + export LUCEBOX_HOST_OS_PRETTY LUCEBOX_HOST_KERNEL LUCEBOX_HOST_WSL_VERSION + export LUCEBOX_HOST_NVIDIA_CTK_VERSION LUCEBOX_HOST_CPU_MODEL + export LUCEBOX_HOST_GPU_LIST_CSV LUCEBOX_HOST_CUDA_VISIBLE_DEVICES + _LUCEBOX_HOST_PROBED=1 +} + +# Cheap idempotency wrapper. Anything that needs real host facts (vs the safe +# defaults seeded at script-load) calls this. Subcommands that go straight to +# `systemctl`/`journalctl` no longer need to remember to call probe_host. +ensure_probed() { + [ "$_LUCEBOX_HOST_PROBED" = "1" ] || probe_host +} + +pick_variant() { + # CUDA 12.8 is the supported image variant for this branch. Effective + # value goes through the same env > config.toml > default ladder as + # everything else so `config set image.variant=...` propagates. + _lucebox_resolve "${LUCEBOX_VARIANT:-}" image.variant "cuda12" +} + +# ── prereq checks (host-only) ───────────────────────────────────────────── +# Print-and-exit on anything that needs root to install. The Python CLI does +# the richer reporting; this is the bare minimum to make `docker run` viable. + +require_host_prereqs() { + local missing=0 + if ! command -v docker &>/dev/null; then + err "docker is not installed" + hint "Install: https://docs.docker.com/engine/install/" + missing=1 + elif ! docker ps &>/dev/null; then + err "docker daemon not reachable" + hint "sudo systemctl start docker (or: add your user to the 'docker' group, then re-login)" + missing=1 + fi + + if ! command -v nvidia-smi &>/dev/null; then + err "nvidia-smi not found — no NVIDIA driver detected" + hint "Install the NVIDIA driver: https://www.nvidia.com/Download/index.aspx" + missing=1 + elif ! nvidia-smi --query-gpu=name --format=csv,noheader &>/dev/null; then + err "nvidia-smi present but NVML calls fail — likely a driver/library mismatch" + hint "Reboot, or reinstall the matching NVIDIA driver package" + missing=1 + fi + + [ "$missing" = "0" ] || exit 1 +} + +require_ctk() { + case "$LUCEBOX_HOST_HAS_CTK" in + runtime|cdi) return 0 ;; + installed-unwired) + err "NVIDIA Container Toolkit installed but not wired into docker" + hint "sudo nvidia-ctk runtime configure --runtime=docker && sudo systemctl restart docker" + hint " or generate a CDI spec: sudo nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml" + exit 1 ;; + none|*) + err "NVIDIA Container Toolkit not installed" + hint "Install: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html" + hint "Then register with docker:" + hint " sudo nvidia-ctk runtime configure --runtime=docker && sudo systemctl restart docker" + exit 1 ;; + esac +} + +require_systemd() { + # Earlier versions of this wrapper had `start`/`stop`/`logs`/etc. drop + # straight into cmd_systemctl_passthrough without probing first, which + # tripped `set -u` on the reference below. Two layers of defence now: + # 1) top-of-script seeds LUCEBOX_HOST_HAS_SYSTEMD=0 unconditionally, so + # no read can be unbound even if probe_host is bypassed entirely. + # 2) ensure_probed runs probe_host on first call so we still get the + # real answer for the require_systemd error path. + ensure_probed + if [ "$LUCEBOX_HOST_HAS_SYSTEMD" != "1" ]; then + err "user systemd is not available — required for $1" + hint "On WSL: set 'systemd=true' under [boot] in /etc/wsl.conf, then 'wsl --shutdown'." + hint "Otherwise: install systemd, or run '$SCRIPT_NAME serve' to run in the foreground without systemd." + exit 1 + fi +} + +# ── docker run construction ─────────────────────────────────────────────── +# All the Python-CLI subcommands share the same docker run incantation: +# mount the host docker socket (so the in-container CLI can spawn server / +# bench containers on the host daemon), mount $HOME at the same path (so +# paths look identical in and out), and pass host facts via env. When an +# NVIDIA GPU is detected we also pass --gpus all so the orchestrator can +# call nvidia-smi during profile snapshot export; without it nvidia_smi_csv (and +# any downstream power/utilization fields) come back empty. + +DOCKER_SOCK_PATH="${DOCKER_HOST:-/var/run/docker.sock}" +DOCKER_SOCK_PATH="${DOCKER_SOCK_PATH#unix://}" + +# Append `-e LUCEBOX_HOST_=` for every exported host fact onto the +# named docker-argv array (bash 4.3+ nameref). The Python side reads these +# instead of reprobing — see build_orchestrator_argv / cmd_exec_in_container. +_append_host_env() { + # shellcheck disable=SC2178 # nameref to a caller's array, not a string + local -n _arr="$1" + local var + for var in $(compgen -e | grep '^LUCEBOX_HOST_' || true); do + _arr+=(-e "$var=${!var}") + done +} + +# Append the LUCEBOX_* scalar overrides (image/variant/port/container/models) +# plus the optional HF_TOKEN guard onto the named docker-argv array. Shared +# by the docker-run (build_orchestrator_argv) and docker-exec +# (cmd_exec_in_container) paths so both forward an identical env subset. +_append_scalar_env() { + # shellcheck disable=SC2178 # nameref to a caller's array, not a string + local -n _arr="$1" + local variant="$2" + _arr+=(-e "LUCEBOX_IMAGE=$IMAGE_BASE") + _arr+=(-e "LUCEBOX_VARIANT=$variant") + _arr+=(-e "LUCEBOX_PORT=$DEFAULT_PORT") + _arr+=(-e "LUCEBOX_CONTAINER=$CONTAINER_NAME") + _arr+=(-e "LUCEBOX_MODELS=$DEFAULT_MODELS_DIR") + [ -n "${HF_TOKEN:-}" ] && _arr+=(-e "HF_TOKEN=$HF_TOKEN") + return 0 +} + +# Pick docker's interactive flags: -it on a real tty, -i otherwise. +# Writes into a caller-supplied array via nameref. This MUST run in the +# caller's scope (not a subshell or `< <(...)` process substitution): the +# `[ -t 1 ]` test inspects fd 1, and inside a process substitution fd 1 is +# the pipe to the consumer, not the terminal — which would force -i even on +# a real tty and break the interactive client TUIs (lucebox claude, etc.). +_set_tty_flags() { # usage: _set_tty_flags arrayname + # shellcheck disable=SC2178 + local -n _a="$1" + if [ -t 0 ] && [ -t 1 ]; then + _a=(-it) + else + _a=(-i) + fi +} + +build_orchestrator_argv() { + local variant="$1"; shift + local tty=() + _set_tty_flags tty + local argv=(docker run --rm "${tty[@]}") + if [ "${LUCEBOX_HOST_GPU_VENDOR:-none}" = "nvidia" ]; then + argv+=(--gpus all) + fi + argv+=(--name "${CONTAINER_NAME}-cli-$$") + argv+=(--user "$(id -u):$(id -g)") + # Only bind-mount the docker socket when DOCKER_HOST actually points + # at a unix socket on this host. With DOCKER_HOST=tcp://… or ssh://… + # the path we'd construct is `tcp` or empty, and `docker run -v` would + # bark with an "invalid mount" error before the orchestrator even + # starts. The orchestrator-in-container relies on docker access only + # when actually needed; pulling that mount when the host talks to + # docker over TCP/SSH is fine. + if [ -S "$DOCKER_SOCK_PATH" ]; then + argv+=(--group-add "$(stat -c '%g' "$DOCKER_SOCK_PATH")") + argv+=(-v "$DOCKER_SOCK_PATH:/var/run/docker.sock") + fi + argv+=(-v "$HOME:$HOME") + # Bind-mount the XDG models dir explicitly (host = container path) so + # paths line up in/out. The $HOME mount above already covers it when + # XDG_DATA_HOME is unset, but an explicit -v is required when the user + # points XDG_DATA_HOME outside $HOME. + mkdir -p "$DEFAULT_MODELS_DIR" + argv+=(-v "$DEFAULT_MODELS_DIR:$DEFAULT_MODELS_DIR") + argv+=(-w "$PWD") + argv+=(-e "HOME=$HOME") + # Host facts — Python side reads these instead of reprobing. + _append_host_env argv + # User overrides for image/port/container/models scalars + HF_TOKEN. + # Always exports the resolved models dir so the in-container CLI sees + # the same path the wrapper mounts (the XDG default flows through too). + _append_scalar_env argv "$variant" + + argv+=("${IMAGE_BASE}:${variant}") + # `lucebox` is the entrypoint subcommand handled by server/scripts/entrypoint.sh + # — it execs `python -m lucebox` with whatever args we pass on. + argv+=(lucebox "$@") + printf '%s\n' "${argv[@]}" +} + +# ── subcommand implementations ──────────────────────────────────────────── + +cmd_serve() { + # Long-running foreground server. Also what systemd's ExecStart= calls. + # + # Two-stage so config.toml takes effect: + # 1. Run an ephemeral orchestrator container that emits the canonical + # server docker-run argv from .lucebox/config.toml (one arg per + # line on stdout). + # 2. Exec that argv. + # + # If stage 1 fails (image not pulled yet, no config), fall back to a + # conservative docker run — the container's own VRAM-tiered autotune + # picks reasonable defaults from there. + require_host_prereqs + ensure_probed + require_ctk + local variant + variant=$(pick_variant) + + # Pre-flight: refuse to stomp on something that's already serving this + # slot. Three states to distinguish, because silently `docker rm -f`-ing + # whatever is there hides real bugs (e.g. the user forgot they had a + # systemd unit up, and we'd happily race two servers on the same port): + # + # 1. systemd unit active → refuse, redirect to `logs`/`stop` + # 2. container running (no systemd)→ refuse, redirect to `docker logs` + # 3. container present but stopped → orphan from a SIGKILLed previous + # run (docker run --rm only cleans up on clean exit). Remove it, + # but TELL the user — they need to know their last run died dirty. + # CRITICAL: when systemd invokes US as the unit's ExecStart, is-active + # returns true *because of us* — refusing here would deadlock the unit + # in a restart loop (and historically did — commit a30dbe5 shipped this + # bug). systemd sets $INVOCATION_ID in every service exec, so its + # presence is the unambiguous "I am running as the systemd ExecStart" + # signal. Skip the unit-active check in that case; the container-state + # check below still catches a stale container holding the slot. + if [ -z "${INVOCATION_ID:-}" ] \ + && systemctl --user is-active --quiet "$UNIT_NAME" 2>/dev/null; then + err "${UNIT_NAME} is already running under systemd." + hint " $SCRIPT_NAME logs # follow the journal" + hint " $SCRIPT_NAME restart # bounce the service" + hint " $SCRIPT_NAME stop # stop the service" + exit 1 + fi + local container_state + container_state=$(docker inspect --format '{{.State.Status}}' "$CONTAINER_NAME" 2>/dev/null || echo absent) + case "$container_state" in + absent) + ;; + running|restarting) + err "Container '$CONTAINER_NAME' is already running (outside systemd)." + hint " docker logs -f $CONTAINER_NAME # follow output" + hint " $SCRIPT_NAME stop # stop it" + exit 1 + ;; + exited|created|paused|dead) + info "Removing stale '$CONTAINER_NAME' container (state=$container_state, likely from a previous unclean exit)" + docker rm -f "$CONTAINER_NAME" >/dev/null + ;; + *) + warn "Container '$CONTAINER_NAME' is in unexpected state '$container_state' — removing" + docker rm -f "$CONTAINER_NAME" >/dev/null + ;; + esac + + local orch_argv server_argv + mapfile -t orch_argv < <(build_orchestrator_argv "$variant" print-serve-argv) + + if mapfile -t server_argv < <("${orch_argv[@]}" 2>/dev/null) \ + && [ "${#server_argv[@]}" -gt 0 ] \ + && [ "${server_argv[0]}" = "docker" ]; then + info "Starting lucebox server (variant=$variant, from config.toml)" + _serve_and_track "${server_argv[@]}" + return $? + fi + + warn "Couldn't fetch server argv from container (image not pulled?) — using fallback" + info "Starting lucebox server (variant=$variant, port=$DEFAULT_PORT, defaults only)" + local fallback_models="$DEFAULT_MODELS_DIR" + mkdir -p "$fallback_models" + # Forward host facts even on the fallback path so the in-container + # entrypoint can still write /opt/lucebox-hub/HOST_INFO from the host's + # view of the rig. Matches the orchestrator path (see + # build_orchestrator_argv) — without it, HOST_INFO would be written + # with "source: unknown" any time print-serve-argv fails. + local fallback_argv=(docker run --rm + --name "$CONTAINER_NAME" + --gpus all + -p "$DEFAULT_PORT:8080" + -v "$HOME:$HOME" + -v "$fallback_models:/opt/lucebox-hub/server/models") + _append_host_env fallback_argv + fallback_argv+=("${IMAGE_BASE}:${variant}") + _serve_and_track "${fallback_argv[@]}" +} + +# Foreground server runner with controlling-process lifetime semantics: +# the docker daemon owns containers independently of the CLI, so a bare +# `exec docker run` leaves the container alive after the wrapper's parent +# (a terminal, a systemd unit, anything) goes away. `docker run --rm` only +# cleans up on the container's own clean exit, not on our death. +# +# Fix: run docker as a child, install signal traps that issue `docker stop` +# before exiting. Now `lucebox serve` behaves like a normal foreground +# program — close the terminal, kill the wrapper, send SIGTERM from +# systemd, the container goes down with it. +# +# Stops also from EXIT so even a `set -e` propagation cleans up. +_serve_and_track() { + "$@" & + local docker_pid=$! + # shellcheck disable=SC2317 # called via trap, not "unreachable" + _serve_stop() { + trap - HUP INT TERM EXIT + # Best-effort: container may already be exiting / never started. + # `docker stop` blocks up to -t seconds for graceful shutdown + # (server handles SIGTERM), then SIGKILLs. 10s is enough for the + # in-flight request to finish on a typical decode. + docker stop -t 10 "$CONTAINER_NAME" >/dev/null 2>&1 || true + wait "$docker_pid" 2>/dev/null || true + } + trap _serve_stop HUP INT TERM EXIT + wait "$docker_pid" + local rc=$? + trap - HUP INT TERM EXIT + return $rc +} + +cmd_systemd_install() { + require_host_prereqs + ensure_probed + require_systemd "service install" + local docker_bin + docker_bin=$(command -v docker) + + mkdir -p "$(dirname "$UNIT_PATH")" + # Capture the user's resolved env at install time so the unit launches + # with the same image/variant/port/models the user expected when they + # ran `lucebox install`. Systemd's user-session env is sparse — without + # this block, the wrapper inside the unit would fall back to the + # in-script defaults and silently pick a different image or models + # directory than the user's interactive session uses. + # + # ExecStartPre cleans up any orphaned container with the target name + # left behind by a previous crash (docker's `--rm` only fires on clean + # exit — a SIGKILL or daemon restart leaves the name claimed, and the + # next ExecStart would die with "name already in use" while systemd + # reports a useless "exit code 125"). + cat > "$UNIT_PATH" </dev/null | awk -F= '/^Linger=/{print $2}') + if [ "$linger" != "yes" ]; then + warn "Linger is off for $USER — the service will stop when you log out" + hint "To enable (requires sudo): sudo loginctl enable-linger \"$USER\"" + fi + + printf '\nNext:\n' + hint " $SCRIPT_NAME start # start now" + hint " $SCRIPT_NAME enable # start at every login" + hint " $SCRIPT_NAME logs # follow the journal" +} + +cmd_systemd_uninstall() { + require_systemd "service uninstall" + if systemctl --user is-active --quiet "$UNIT_NAME" 2>/dev/null; then + info "Stopping $UNIT_NAME" + systemctl --user stop "$UNIT_NAME" || true + fi + if systemctl --user is-enabled --quiet "$UNIT_NAME" 2>/dev/null; then + info "Disabling $UNIT_NAME" + systemctl --user disable "$UNIT_NAME" || true + fi + if [ -f "$UNIT_PATH" ]; then + rm -f "$UNIT_PATH" + ok "Removed $UNIT_PATH" + else + info "No unit at $UNIT_PATH — nothing to remove" + fi + systemctl --user daemon-reload + hint "Config and models are left in place. Remove them by hand if you want." +} + +cmd_systemctl_passthrough() { + local action="$1" + require_systemd "$action" + if [ ! -f "$UNIT_PATH" ]; then + err "$UNIT_NAME is not installed — run '$SCRIPT_NAME install' first" + exit 1 + fi + case "$action" in + start|restart) + # `systemctl start` is fire-and-forget for Type=exec: it returns + # success as soon as execve() completes, even if the wrapper + # exits 1 a millisecond later. That gave us the worst possible + # UX — `lucebox start` reports no error but no container ever + # binds port 8080. Poll is-active for a few seconds and dump + # status + recent journal lines so the user sees the real cause. + local current + current=$(systemctl --user is-active "$UNIT_NAME" 2>/dev/null || true) + # `start` against an already-active unit: systemctl returns 0 + # silently. That's polite for scripts but confusing for humans + # — say so explicitly. For `restart` always run through. + if [ "$action" = "start" ] && [ "$current" = "active" ]; then + ok "$UNIT_NAME is already active" + hint "logs: $SCRIPT_NAME logs" + hint "smoke: curl -s http://localhost:$DEFAULT_PORT/v1/models" + hint "(use \`$SCRIPT_NAME restart\` to bounce, \`$SCRIPT_NAME stop\` to halt)" + return 0 + fi + # `start` against a unit stuck in restart-loop ("activating") is + # the symptom of a broken ExecStart — calling start would just + # block waiting for active that never comes. Surface this + # specifically so the user goes to `lucebox logs` to find the + # exit reason rather than waiting for the poll to give up. + if [ "$action" = "start" ] && [ "$current" = "activating" ]; then + err "$UNIT_NAME is in restart-loop (state=activating)" + hint "the unit is failing and being auto-restarted by systemd" + hint " $SCRIPT_NAME stop # halt the loop first" + hint " $SCRIPT_NAME logs # find the exit reason" + exit 1 + fi + info "$action $UNIT_NAME" + if ! systemctl --user "$action" "$UNIT_NAME"; then + err "systemctl --user $action $UNIT_NAME failed" + systemctl --user status "$UNIT_NAME" --no-pager -n 30 || true + exit 1 + fi + local i state + for i in 1 2 3 4 5 6 7 8 9 10; do + state=$(systemctl --user is-active "$UNIT_NAME" 2>/dev/null || true) + case "$state" in + active) break ;; # already up — no need to keep polling + activating) ;; # still booting; keep waiting + *) break ;; # failed / inactive — fall through to error path + esac + sleep 1 + done + state=$(systemctl --user is-active "$UNIT_NAME" 2>/dev/null || true) + if [ "$state" != "active" ]; then + err "$UNIT_NAME did not reach active state (current: ${state:-unknown})" + if [ "$state" = "activating" ]; then + hint "the unit is in a restart loop — \`$SCRIPT_NAME stop\` to halt it" + fi + hint "status:" + systemctl --user status "$UNIT_NAME" --no-pager -n 30 || true + hint "recent journal:" + journalctl --user -u "$UNIT_NAME" -n 30 --no-pager || true + exit 1 + fi + ok "$UNIT_NAME is active" + hint "logs: $SCRIPT_NAME logs" + hint "smoke: curl -s http://localhost:$DEFAULT_PORT/v1/models" + ;; + stop|enable|disable) + exec systemctl --user "$action" "$UNIT_NAME" ;; + status) + exec systemctl --user status "$UNIT_NAME" --no-pager ;; + *) + die "unknown systemctl passthrough: $action" ;; + esac +} + +cmd_logs() { + require_systemd "logs" + # Pure passthrough: any flags the user wants (-f, -n, --since, ...) go + # straight to journalctl. Default is follow. + if [ $# -eq 0 ]; then + exec journalctl --user -u "$UNIT_NAME" -f + fi + exec journalctl --user -u "$UNIT_NAME" "$@" +} + +cmd_pull() { + # Pull has to run on the host. Delegating this into the container creates a + # stale-image trap: docker may start an old local tag before the fresh tag + # has been pulled. + require_host_prereqs + local variant + variant=$(pick_variant) + info "Pulling ${IMAGE_BASE}:${variant}" + exec docker pull "${IMAGE_BASE}:${variant}" +} + +cmd_update() { + # Re-run the bootstrap installer against the channel we were installed + # from. The installer is the source of truth for "how do you install + # lucebox correctly" — chmod, atomic mv, validation, baking the source + # URL back into the new copy so the channel is preserved across + # upgrades. Keeping the logic in install.sh means it can evolve + # independently (sha verify, signature check, etc.) and the installed + # `lucebox update` picks those changes up on the next run. + # + # The installer URL is derived from LUCEBOX_INSTALLED_FROM by swapping + # `lucebox.sh` → `install.sh` in the same directory, so forks don't + # need a separate registration. Override the source channel via + # $LUCEBOX_INSTALL_URL (e.g. to switch from canonical to a dev fork). + local source_url installer_url target + source_url="${LUCEBOX_INSTALL_URL:-$LUCEBOX_INSTALLED_FROM}" + if [[ "$source_url" != */lucebox.sh ]]; then + die "LUCEBOX_INSTALLED_FROM doesn't end in /lucebox.sh: $source_url" + fi + installer_url="${source_url%/lucebox.sh}/install.sh" + target=$(realpath "$SCRIPT_PATH") + + info "Updating lucebox via $installer_url" + info " source: $source_url" + info " target: $target" + + # Pass the URLs through to install.sh via env. The installer reads + # $LUCEBOX_INSTALL_URL (which we set to source_url) and + # $LUCEBOX_INSTALL_DEST (the realpath of *this* file, so a symlinked + # install replaces the actual file behind the link). + LUCEBOX_INSTALL_URL="$source_url" \ + LUCEBOX_INSTALL_DEST="$target" \ + bash -c "$(curl -fsSL "$installer_url")" \ + || die "update failed (installer exited non-zero)" +} + +cmd_completion() { + # Print shell completion script for bash / zsh / fish. Usage: + # + # # bash (in ~/.bashrc): + # source <(lucebox completion bash) + # + # # zsh (in ~/.zshrc, before `compinit`): + # source <(lucebox completion zsh) + # + # # fish: + # lucebox completion fish | source + # + # Keep this in sync with the dispatch table in main() and the sub-app + # verbs (config get/set/unset, models list/download). Adding a new + # top-level command means adding it here too. + local shell="${1:-}" + case "$shell" in + bash) + cat <<'BASH' +# lucebox bash completion. Source from ~/.bashrc: +# source <(lucebox completion bash) +_lucebox_complete() { + local cur prev cmds config_verbs models_verbs completion_shells + COMPREPLY=() + cur="${COMP_WORDS[COMP_CWORD]}" + prev="${COMP_WORDS[COMP_CWORD-1]}" + cmds="install uninstall start stop restart enable disable status logs \ + serve pull update check completion config models \ + print-run help version" + config_verbs="get set unset" + models_verbs="list download" + completion_shells="bash zsh fish" + + # Sub-app verbs / shell args. + case "$prev" in + config) COMPREPLY=( $(compgen -W "$config_verbs" -- "$cur") ); return ;; + models) COMPREPLY=( $(compgen -W "$models_verbs" -- "$cur") ); return ;; + completion) COMPREPLY=( $(compgen -W "$completion_shells" -- "$cur") ); return ;; + esac + + # Top-level command. + if [ "$COMP_CWORD" = 1 ]; then + COMPREPLY=( $(compgen -W "$cmds" -- "$cur") ) + return + fi +} +complete -F _lucebox_complete lucebox lucebox.sh +BASH + ;; + zsh) + # Bash-compat shim: zsh sources our bash completion through + # bashcompinit. Users who prefer native zsh _arguments-style + # completion can write their own; this gets `` working + # in two lines for free. + cat <<'ZSH' +# lucebox zsh completion. Source from ~/.zshrc (after compinit): +# source <(lucebox completion zsh) +autoload -Uz compinit bashcompinit +compinit +bashcompinit +ZSH + cmd_completion bash + ;; + fish) + cat <<'FISH' +# lucebox fish completion. Source from ~/.config/fish/config.fish: +# lucebox completion fish | source +complete -c lucebox -f +set -l __lucebox_cmds install uninstall start stop restart enable disable \ + status logs serve pull update check completion config models \ + print-run help version +for cmd in $__lucebox_cmds + complete -c lucebox -n "not __fish_seen_subcommand_from $__lucebox_cmds" -a $cmd +end +complete -c lucebox -n "__fish_seen_subcommand_from config" -a "get set unset" +complete -c lucebox -n "__fish_seen_subcommand_from models" -a "list download" +complete -c lucebox -n "__fish_seen_subcommand_from completion" -a "bash zsh fish" +FISH + ;; + ""|--help|-h) + cat </dev/null; then + _row 0 "docker daemon" "installed but unreachable — start the daemon or add user to 'docker' group" + else + _row 0 "docker daemon" "not installed — https://docs.docker.com/engine/install/" + fi + + # nvidia container toolkit + case "$LUCEBOX_HOST_HAS_CTK" in + runtime) _row 1 "nvidia ctk" "wired into docker (runtime)" ;; + cdi) _row 1 "nvidia ctk" "wired via CDI (nvidia.com/gpu)" ;; + installed-unwired) _row warn "nvidia ctk" "installed but not registered with docker — sudo nvidia-ctk runtime configure --runtime=docker && sudo systemctl restart docker" ;; + none|*) _row 0 "nvidia ctk" "not installed — https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html" ;; + esac + + # nvidia-smi + driver + if [ "$LUCEBOX_HOST_GPU_VENDOR" = "nvidia" ]; then + if [ "$LUCEBOX_HOST_DRIVER_MAJOR" -ge "$MIN_DRIVER_CUDA12" ]; then + _row 1 "nvidia driver" "$LUCEBOX_HOST_DRIVER_VERSION (≥ $MIN_DRIVER_CUDA12 required for cuda12)" + else + _row 0 "nvidia driver" "$LUCEBOX_HOST_DRIVER_VERSION (< $MIN_DRIVER_CUDA12 — cuda12 image will fail)" + fi + elif command -v nvidia-smi &>/dev/null; then + _row 0 "nvidia driver" "nvidia-smi present but NVML calls fail — driver/library mismatch, try reboot" + else + _row 0 "nvidia driver" "nvidia-smi not found — install the NVIDIA driver" + fi + + # GPU detail + if [ "$LUCEBOX_HOST_GPU_VENDOR" = "nvidia" ]; then + _row 1 "gpu" "$LUCEBOX_HOST_GPU_NAME × $LUCEBOX_HOST_GPU_COUNT (sm_$LUCEBOX_HOST_GPU_SM, ${LUCEBOX_HOST_VRAM_GB} GB VRAM)" + # cuda12 image arch coverage: sm_75;80;86;89;90;120 (see docker-bake.hcl) + case "$LUCEBOX_HOST_GPU_SM" in + 75|80|86|89|90|120) _row 1 "cuda12 arch" "sm_$LUCEBOX_HOST_GPU_SM covered by image" ;; + "") _row warn "cuda12 arch" "compute_cap not detected" ;; + *) _row warn "cuda12 arch" "sm_$LUCEBOX_HOST_GPU_SM not in image arch list (75;80;86;89;90;120)" ;; + esac + fi + + # systemd + if [ "$LUCEBOX_HOST_HAS_SYSTEMD" = "1" ]; then + _row 1 "user systemd" "available (needed for '$SCRIPT_NAME install')" + elif [ "$LUCEBOX_HOST_IS_WSL" = "1" ]; then + _row warn "user systemd" "WSL detected — set 'systemd=true' under [boot] in /etc/wsl.conf, then 'wsl --shutdown'" + else + _row warn "user systemd" "not available — '$SCRIPT_NAME install' (service unit) won't work; '$SCRIPT_NAME serve' (foreground) will" + fi + + # image we'd pull — marked ✗ when the host clearly can't run cuda12 + # (no nvidia driver, or no CTK wired into docker). It's still useful + # to print the line so the user knows what would be pulled, but a + # green ✓ would be misleading. + if [ "$LUCEBOX_HOST_GPU_VENDOR" != "nvidia" ]; then + _row 0 "image" "${IMAGE_BASE}:${variant} — requires NVIDIA driver" + elif [ "$LUCEBOX_HOST_HAS_CTK" = "none" ] || [ "$LUCEBOX_HOST_HAS_CTK" = "installed-unwired" ]; then + _row 0 "image" "${IMAGE_BASE}:${variant} — needs NVIDIA Container Toolkit wired into docker" + else + _row 1 "image" "${IMAGE_BASE}:${variant}" + fi + # RAM / cores (informational) + _row 1 "host" "${LUCEBOX_HOST_NPROC} cpus, ${LUCEBOX_HOST_RAM_GB} GB RAM" +} + +cmd_in_container() { + # Generic dispatcher: anything that isn't a systemd action goes here. + # Runs the in-container Python CLI with the supplied argv. + require_host_prereqs + ensure_probed + # CTK isn't strictly required for every subcommand (e.g. `config get` + # or `autotune` only touch local files), but the server-spawning + # subcommands need it. + # Letting docker error its own way is fine for the no-CTK case. + local variant + variant=$(pick_variant) + local argv + mapfile -t argv < <(build_orchestrator_argv "$variant" "$@") + exec "${argv[@]}" +} + +# Is the long-running lucebox container currently up? Used by the dispatcher +# to decide between `docker exec` into it (cheap, shares the running server's +# network namespace so localhost:8080 reaches the server) vs. `docker run` +# (cold start, isolated network — can't reach the live server). +# +# `docker ps -q -f name=^$` prints the container id when running, +# empty otherwise. The anchored regex avoids matching `lucebox-cli-12345` +# style ephemeral siblings. +_lucebox_container_running() { + # No docker on PATH → definitely not running. Don't even probe. + command -v docker >/dev/null 2>&1 || return 1 + local id + id=$(docker ps -q -f "name=^${CONTAINER_NAME}\$" 2>/dev/null || true) + [ -n "$id" ] +} + +# `docker exec` variant of cmd_in_container. Same calling convention, but: +# - shares the running container's network namespace (localhost:8080 → the +# server), filesystem, and mounts — no bind mounts needed. +# - skips the ~1-3s cold-start cost of a fresh `docker run --rm`. +# - only safe for steady-state / read-only / config-only subcommands. Any +# command that restarts the lucebox service (autotune --sweep, serve) +# would kill the very container the exec is in — caller must route those +# to cmd_in_container instead. +# +# Pass through the same env-var subset the run path uses so the in-container +# CLI sees consistent overrides whichever route it took: HOME, every +# LUCEBOX_HOST_*, the image/port/container/models scalars, and HF_TOKEN. +cmd_exec_in_container() { + require_host_prereqs + ensure_probed + local variant + variant=$(pick_variant) + local tty=() + _set_tty_flags tty + local argv=(docker exec "${tty[@]}") + argv+=(--user "$(id -u):$(id -g)") + argv+=(-w "$PWD") + argv+=(-e "HOME=$HOME") + _append_host_env argv + _append_scalar_env argv "$variant" + # The image has no top-level `lucebox` binary on PATH — that name only + # works as the first arg to /opt/lucebox-hub/server/scripts/entrypoint.sh, + # which then `exec uv run ... python -m lucebox`s. docker exec bypasses + # the image's ENTRYPOINT, so we invoke the entrypoint shim explicitly + # with `lucebox` as its SUBCMD and the user's argv tail. Keeps the + # exec path bit-for-bit equivalent to what docker run does on the + # SUBCMD=lucebox branch. + argv+=("$CONTAINER_NAME" /opt/lucebox-hub/server/scripts/entrypoint.sh lucebox "$@") + exec "${argv[@]}" +} + +# Decide whether a given (subcommand, argv) pair is safe to run via +# `docker exec` into the live container. Returns 0 (yes, prefer exec) or 1 +# (no, must use docker run / host-side). +# +# The safe-to-exec set is exactly the steady-state / read-only / hits-the- +# running-server subcommands. Anything that restarts the service, mutates +# images, or is itself the long-running service must stay on cmd_in_container. +# +_lucebox_prefer_exec() { + local cmd="$1"; shift + case "$cmd" in + config|models|check|print-run|print-serve-argv) + return 0 + ;; + *) + return 1 + ;; + esac +} + +# Top-level routing for the in-container Python CLI. Picks between exec +# (cheap, shares the live server's namespace) and run (cold start, isolated). +# +# Decision tree: +# 1. LUCEBOX_NO_EXEC=1 / --no-exec was set → always run, never exec. +# Useful for debugging the wrapper or when the in-container Python is +# stale relative to the image. +# 2. cmd is not in the prefer-exec list → run (sweep, service mutators). +# 3. container is running → exec (the fast path, hits the live server). +# 4. container is not running → run (fall back so first-run / pre-install +# flows still work without a live service). +cmd_route_to_container() { + local cmd="$1"; shift + if [ "${LUCEBOX_NO_EXEC:-0}" = "1" ]; then + cmd_in_container "$cmd" "$@" + return + fi + if _lucebox_prefer_exec "$cmd" "$@" && _lucebox_container_running; then + cmd_exec_in_container "$cmd" "$@" + return + fi + cmd_in_container "$cmd" "$@" +} + +usage() { + cat < print shell completion script (bash / zsh / fish) + models list / download / activate model presets + config read / write keys in .lucebox/config.toml + print-run print the docker-run command for the server + +Misc: + help, --help, -h this message + version, --version print version + +Environment overrides: + LUCEBOX_IMAGE image name without tag (default: ghcr.io/luce-org/lucebox-hub) + LUCEBOX_VARIANT image tag to pull/run (default: cuda12) + LUCEBOX_PORT host port for the server (default: 8080) + LUCEBOX_CONTAINER server container name (default: lucebox) + LUCEBOX_MODELS host model directory (default: \$XDG_DATA_HOME/lucebox/models + LUCEBOX_NO_EXEC=1 force docker-run for in-container subcommands even + when the container is up (equivalent to --no-exec) + HF_TOKEN propagated to \`models download\` for gated HF repos + +Container routing: + When the long-running '$CONTAINER_NAME' container is up, steady-state + subcommands (config, models, check, print-run, print-serve-argv) + 'docker exec' into it instead of starting a fresh container. This avoids + the ~1-3s docker-run cold-start AND shares the live server's network + namespace so localhost:\$LUCEBOX_PORT reaches the server. Service-restarting + commands (serve, pull, update, install, etc.) stay on the host-side / + docker-run path. Pass --no-exec (or LUCEBOX_NO_EXEC=1) to force docker-run. +EOF +} + +# ── dispatch ────────────────────────────────────────────────────────────── + +main() { + # Global flag pass: `--no-exec` anywhere before the subcommand forces the + # docker-run path even if the container is up. Equivalent to + # `LUCEBOX_NO_EXEC=1 lucebox ...`. We pop it out of argv up-front so the + # rest of dispatch doesn't have to know about it. + local args=() + while [ $# -gt 0 ]; do + case "$1" in + --no-exec) export LUCEBOX_NO_EXEC=1; shift ;; + *) args+=("$1"); shift ;; + esac + done + set -- "${args[@]}" + + local cmd="${1:-help}" + [ $# -gt 0 ] && shift + case "$cmd" in + # Systemd surface + install) cmd_systemd_install "$@" ;; + uninstall) cmd_systemd_uninstall "$@" ;; + start|stop|restart|enable|disable|status) + cmd_systemctl_passthrough "$cmd" "$@" ;; + logs) cmd_logs "$@" ;; + + # Direct server + serve) cmd_serve "$@" ;; + pull) cmd_pull "$@" ;; + + # Self-update — re-runs the bootstrap installer against the channel + # this script was installed from (LUCEBOX_INSTALLED_FROM). + update) cmd_update "$@" ;; + + # Host-only readiness check — pure shell, never enters the container. + check) cmd_check "$@" ;; + + # Shell completion — print a script the user sources into their rc + # file. Bash and zsh share the bash-style emitter (zsh users add a + # `bashcompinit; complete` shim); fish is native. + completion) cmd_completion "$@" ;; + + # Help / version + help|--help|-h) usage ;; + version|--version) printf '%s\n' "$VERSION" ;; + + # Everything else → in-container Python CLI. cmd_route_to_container + # picks between `docker exec` into the live container (cheap, shares + # the running server's network namespace) and `docker run` (cold, + # isolated) based on container state + the safe-to-exec command set. + *) cmd_route_to_container "$cmd" "$@" ;; + esac +} + +main "$@" diff --git a/lucebox/.gitignore b/lucebox/.gitignore new file mode 100644 index 000000000..15f95f0e7 --- /dev/null +++ b/lucebox/.gitignore @@ -0,0 +1,3 @@ + +# Generated by hatch-vcs at build time from git tags. +src/lucebox/_version.py diff --git a/lucebox/README.md b/lucebox/README.md new file mode 100644 index 000000000..747a49eef --- /dev/null +++ b/lucebox/README.md @@ -0,0 +1,18 @@ +# lucebox — host CLI for the lucebox-hub container + +This package ships *inside* the `ghcr.io/luce-org/lucebox-hub` Docker image +and is invoked from the host via the [`lucebox.sh`](../lucebox.sh) wrapper: + + lucebox.sh check # `docker run … lucebox check` + lucebox.sh config get + lucebox.sh print-run + +The wrapper is the only thing that runs on the host; everything else (host +checks, TOML config, docker daemon calls, model download) is Python in the +container. Host facts (driver, GPU, RAM, VRAM, systemd availability) are +passed in via `LUCEBOX_HOST_*` environment variables so the Python side +doesn't reprobe. The autotune sweep, profiling, and agent-client launchers +land in follow-up PRs. + +Subcommands are defined in [`lucebox/cli.py`](src/lucebox/cli.py). See the +top-level [README.md](../README.md) for the user-facing flow. diff --git a/lucebox/pyproject.toml b/lucebox/pyproject.toml new file mode 100644 index 000000000..5277b268d --- /dev/null +++ b/lucebox/pyproject.toml @@ -0,0 +1,54 @@ +[project] +name = "lucebox" +# Version is derived from git tags via hatch-vcs (see [tool.hatch.version] +# below). Tag `lucebox-v0.2.1` → release version `0.2.1`. Commits past a +# tag get a `.devN+g` suffix so dev installs are visibly distinct +# from releases. Single source of truth: the git tag. +dynamic = ["version"] +description = "Host-side CLI for the lucebox-hub container: launch, config, model download" +readme = "README.md" +requires-python = ">=3.11" +authors = [{ name = "Lucebox" }] +license = { text = "Apache-2.0" } + +# Kept intentionally narrow. typer pulls click+rich; tomli-w gives us TOML +# writes (stdlib tomllib only reads). httpx for the smoke + readiness probes. +# huggingface_hub for download-models — used directly (not via subprocess) +# so we can drive a Rich progress bar + verify sha256 against the repo +# metadata before re-fetching multi-GB GGUFs. +dependencies = [ + "typer>=0.12", + "rich>=13", + "httpx>=0.27", + "tomli-w>=1.0", + "huggingface_hub>=0.27", + # luce-bench is consumed lazily by the autotune sweep scorer + # (agent_replay_pass_rate in sweep.py does a function-local + # `from lucebench.areas.agent_recorded import ...` wrapped in try/except). + # It's deliberately NOT a hard dep here because the workspace can't lock + # against it until #337 (luce-bench in-tree) lands. Install with + # `uv pip install luce-bench` on the host running the scorer. +] + +[project.scripts] +lucebox = "lucebox.cli:app" + +[build-system] +requires = ["hatchling", "hatch-vcs"] +build-backend = "hatchling.build" + +[tool.hatch.version] +source = "vcs" +# Untagged checkouts (e.g. fresh clone before tagging lucebox-v0.2.1) +# resolve to this rather than 0.0.0.dev0. +fallback-version = "0.2.1.dev0" +raw-options.tag_regex = '''^lucebox-v(?P\d+\.\d+\.\d+)$''' + +[tool.hatch.build.hooks.vcs] +# Build hook writes the resolved version into src/lucebox/_version.py +# so `__init__.py` can `from lucebox._version import __version__`. +# Generated file — see lucebox/.gitignore. +version-file = "src/lucebox/_version.py" + +[tool.hatch.build.targets.wheel] +packages = ["src/lucebox"] diff --git a/lucebox/src/lucebox/__init__.py b/lucebox/src/lucebox/__init__.py new file mode 100644 index 000000000..8ca821024 --- /dev/null +++ b/lucebox/src/lucebox/__init__.py @@ -0,0 +1,16 @@ +"""lucebox — host-side CLI for the lucebox-hub container. + +Runs inside the container; the host wrapper at ../lucebox.sh handles `docker +run` plumbing and systemd integration. This package owns: TOML config, the +host-derived DFLASH_* serve heuristic, docker daemon calls (via the mounted +socket), and model download. The empirical autotune sweep, profiling, and +agent-client launchers land in follow-up PRs. +""" + +# Version is generated by hatch-vcs at build time into _version.py. +# Fresh source-tree checkouts before any build will not yet have the +# file — fall back to a dev marker so imports don't break. +try: + from lucebox._version import __version__ +except ImportError: + __version__ = "0.0.0.dev0+unbuilt" diff --git a/lucebox/src/lucebox/__main__.py b/lucebox/src/lucebox/__main__.py new file mode 100644 index 000000000..128e2ca87 --- /dev/null +++ b/lucebox/src/lucebox/__main__.py @@ -0,0 +1,6 @@ +"""Entry point for `python -m lucebox`.""" + +from lucebox.cli import app + +if __name__ == "__main__": + app() diff --git a/lucebox/src/lucebox/autotune.py b/lucebox/src/lucebox/autotune.py new file mode 100644 index 000000000..51b4f8424 --- /dev/null +++ b/lucebox/src/lucebox/autotune.py @@ -0,0 +1,80 @@ +"""Heuristic autotune: VRAM tier → DflashRuntime defaults. + +The recommended runtime is computed from HostFacts (VRAM, is_wsl) — stateless: +it takes HostFacts in and returns a fresh DflashRuntime. ``config.live_config`` +applies it so ``lucebox print-serve-argv`` / ``docker_run`` bake conservative +DFLASH_* defaults into the serve command for the detected VRAM tier. + +The empirical sweep + per-workload profiles (``lucebox autotune --sweep``) +live in a follow-up PR; this module keeps only the host-derived heuristic +that the serve path depends on. +""" + +from __future__ import annotations + +from lucebox.types import DflashRuntime, HostFacts + + +def runtime_from_host(host: HostFacts) -> DflashRuntime: + """Pick a conservative DflashRuntime that 'should work' on this VRAM tier. + + Tiers (NVIDIA, baseline = Qwen3.6-27B Q4_K_M ~18 GB total): + <12 GB — too small for 27B; pick min ctx as a floor so a fallback + start at least gets an error from the daemon rather than + a silent OOM. + 12-21 — fits but tight; cap ctx. + 22-31 — 24 GB-class consumer flagships (3090/4090/5090/5090-Laptop). + 98 K with tq3_0 KV (~2 GB KV + ~18 GB model ≈ 20 GB). + Confirmed on bragi (RTX 5090 Laptop, 23 GB VRAM) 2026-05-31. + 32-47 — RTX 6000 Ada / A100 40 GB. Full 128 K. + ≥48 — A100 80 GB / H100 / RTX 6000 Pro. Full 128 K. + + Prefix cache remains an explicit sweep tunable, but the automatic + baseline keeps it off because tool prompts currently exercise a daemon + snapshot path that is not reliable with prefix slots enabled. + Empirically confirmed on bragi 2026-05-31: prefix_cache_slots=32 + caused -19pp regression on agent_recorded (23.1% vs 42.3% baseline). + 5 previously-passing cases regressed; 0 new cases unlocked. See + docs/experiments/qwen3.6-27b-prefix-cache-regression-bragi-2026-05-31.md. + + On `lazy`: the C++ server requires `--prefill-drafter` (and `--draft`) + to be set for `--lazy-draft` to take effect, and silently ignores it + otherwise (`--lazy-draft ignored: requires both --prefill-drafter and + --draft`). Since the heuristic path does NOT set `prefill_drafter`, + we default `lazy=False` here — "what we say" matches "what runs". + Users who explicitly opt in via config.toml will be warned at server + startup that the flag is being dropped (see entrypoint.sh). + """ + if host.vram_gb <= 0: + return DflashRuntime() # no VRAM signal — stick with class defaults + + if host.vram_gb < 12: + return DflashRuntime(max_ctx=4096) + if host.vram_gb < 22: + return DflashRuntime(max_ctx=32768) + if host.vram_gb < 32: + # 22-31 GB cards. tq3_0 KV is required at 98K: model (~18-19 GB) + + # q8_0 KV at 98K (~5-6 GB) = 24-25 GB → OOM, while tq3_0 KV (~2 GB) + # leaves ~3 GB headroom. Confirmed on bragi (RTX 5090 Laptop, 23 GB + # VRAM) 2026-05-30 — q8_0 timed out on every 98K cell; all tq3_0 cells + # passed. Preset-size-aware capping (large models → 32K) lives with the + # autotune sweep in a follow-up PR. + if host.is_wsl: + # Bumped from max_ctx=65536 → 98304 on 2026-05-30 after the + # coding-agent-loop sweep on sindri proved 98K serves real + # 90K-token agentic prompts with ~3 GB VRAM headroom and no + # CUDA VMM failures. See + # docs/experiments/gemma4-26b-coding-agent-loop-sweep-2026-05-30.md. + # The original 65K cap cited unverified VMM failures — + # bisect history showed no commit reproducing them. + return DflashRuntime( + budget=16, max_ctx=98304, + cache_type_k="tq3_0", cache_type_v="tq3_0", + ) + return DflashRuntime( + max_ctx=98304, + cache_type_k="tq3_0", cache_type_v="tq3_0", + ) + if host.vram_gb < 48: + return DflashRuntime(max_ctx=131072) + return DflashRuntime(max_ctx=131072) diff --git a/lucebox/src/lucebox/cli.py b/lucebox/src/lucebox/cli.py new file mode 100644 index 000000000..f661d62cd --- /dev/null +++ b/lucebox/src/lucebox/cli.py @@ -0,0 +1,346 @@ +"""Typer app — the user-facing subcommands. + +Layout follows the host wrapper's dispatch table. Anything `lucebox` +doesn't intercept (everything outside the systemd surface) ends up here. + +Subcommand inventory: + check — readiness report + config get/set/unset — read / write a single key in config.toml + pull — docker pull the cuda12 image + print-run — emit the docker-run command for the server + print-serve-argv — same, raw argv lines (consumed by `lucebox serve`) + models — list / download presets, activate one +""" + +from __future__ import annotations + +import os +import sys +from dataclasses import replace +from pathlib import Path +from typing import Annotated + +import typer +from rich.console import Console +from rich.table import Table + +import lucebox.config as config_mod +import lucebox.docker_run as docker_run +import lucebox.download as download_mod +import lucebox.host_check as host_check +from lucebox import __version__ +from lucebox.config import config_get, config_set, config_unset, live_config +from lucebox.host_facts import from_env + +app = typer.Typer( + name="lucebox", + help="Host CLI for the lucebox-hub container. Invoked by lucebox.sh.", + no_args_is_help=True, + add_completion=False, +) +console = Console() + + +# ── helpers ──────────────────────────────────────────────────────────────── + + +def _load_or_build() -> config_mod.Config: # type: ignore[name-defined] + """env > config.toml > dataclass defaults — the canonical precedence. + + Without the env-overlay step below, `config_mod.load()` returned the + persisted config verbatim and `LUCEBOX_IMAGE` / `LUCEBOX_VARIANT` / + `LUCEBOX_PORT` / `LUCEBOX_CONTAINER` / `LUCEBOX_MODELS` from the + systemd unit's `Environment=` (or any one-shot shell export) were + silently dropped. That contradicted the precedence lucebox.sh + documents and applies — and bit sindri when its config.toml had + `[image]` without `registry`, so the dataclass default + `ghcr.io/luce-org/lucebox-hub` won over the unit's + `LUCEBOX_IMAGE=ghcr.io/easel/lucebox-hub`. + + Fix: overlay env on top of the loaded config (or the live_config + fallback when config.toml is absent). Only the five top-level + scalars have env hooks — dflash/host/model don't, by design. + """ + cfg = config_mod.load() + if cfg is None: + cfg = live_config() + # Overlay live host facts. When ``config.toml`` exists without a + # ``[host]`` block (the common case — operators don't hand-edit + # host facts), ``cfg.host`` defaults to a zero-filled ``HostFacts`` + # and the DFLASH_* serve heuristic silently falls through to the + # "no VRAM signal" path. Re-probe from env so the wrapper-exported + # LUCEBOX_HOST_* facts always win over the persisted (possibly + # absent) snapshot. + live_host = from_env() + host = live_host if live_host.vram_gb > 0 or live_host.nproc > 0 else cfg.host + return replace( + cfg, + variant=os.environ.get("LUCEBOX_VARIANT", cfg.variant), + image=os.environ.get("LUCEBOX_IMAGE", cfg.image), + container_name=os.environ.get("LUCEBOX_CONTAINER", cfg.container_name), + port=int(os.environ.get("LUCEBOX_PORT", str(cfg.port))), + models_dir=Path(os.environ.get("LUCEBOX_MODELS", str(cfg.models_dir))), + host=host, + ) + + +# ── subcommands ──────────────────────────────────────────────────────────── + + +@app.command() +def check() -> None: + """Print a readiness report (driver, docker, CTK, RAM, VRAM, systemd).""" + host = from_env() + results = host_check.run_checks(host) + worst = host_check.render(console, host, results) + if worst == "fail": + raise typer.Exit(code=1) + + +@app.command() +def pull() -> None: + """`docker pull` the image variant from config.toml.""" + cfg = _load_or_build() + tag = f"{cfg.image}:{cfg.variant}" + console.print(f"[bold]Pulling {tag}[/bold] (~14 GB; takes a while)…") + rc = docker_run.docker_pull(tag) + if rc != 0: + raise typer.Exit(code=rc) + + +@app.command("print-run") +def print_run() -> None: + """Print the docker-run command for the server (copy-pasteable).""" + cfg = _load_or_build() + spec = docker_run.server_run_spec(cfg) + print(spec.printable()) + + +@app.command("print-serve-argv") +def print_serve_argv() -> None: + """Emit the server docker-run argv, one token per line. + + Consumed by lucebox.sh's `serve` subcommand and the systemd unit. Kept as + a separate command from `print-run` so the bash side has a guaranteed + machine-readable contract that's independent of the pretty formatter. + """ + cfg = _load_or_build() + spec = docker_run.server_run_spec(cfg) + for tok in spec.argv(): + print(tok) + + +# ── config sub-app ───────────────────────────────────────────────────────── + + +config_app = typer.Typer(no_args_is_help=True, help="Read/write keys in config.toml.") +app.add_typer(config_app, name="config") + + +@config_app.command("get") +def config_get_cmd( + key: Annotated[str, typer.Argument(help="Dotted key (omit to list every key).")] = "", +) -> None: + """Print a single key (or every reachable key) with its origin annotation.""" + try: + entries = config_get(key or None) + except KeyError as exc: + console.print(f"[red]{exc}[/red]") + raise typer.Exit(code=2) from exc + for k, (value, origin) in entries.items(): + console.print(f"{k} = {value!r} ([dim]from {origin}[/dim])") + + +@config_app.command("set") +def config_set_cmd( + kv: Annotated[str, typer.Argument(help='"key=value" pair (e.g. "model.preset=qwen3.6-27b")')], +) -> None: + """Set one dotted key. Auto-creates config.toml when missing. + + Only the named key is written — other on-disk keys are preserved + untouched, unset keys stay implicit. Use `lucebox config unset` to + remove a key (next read falls back to the live default). + """ + if "=" not in kv: + console.print("[red]argument must be key=value[/red]") + raise typer.Exit(code=2) + key, _, value = kv.partition("=") + key = key.strip() + value = value.strip() + try: + config_set(key, value) + except (KeyError, ValueError) as exc: + console.print(f"[red]{exc}[/red]") + raise typer.Exit(code=2) from exc + console.print(f"[green]Set[/green] {key} = {value}") + + +@config_app.command("unset") +def config_unset_cmd( + key: Annotated[str, typer.Argument(help="Dotted key to remove from config.toml.")], +) -> None: + """Remove a key from config.toml. Next read uses the live default.""" + try: + changed = config_unset(key) + except KeyError as exc: + console.print(f"[red]{exc}[/red]") + raise typer.Exit(code=2) from exc + if changed: + console.print(f"[green]Unset[/green] {key}") + else: + console.print(f"[dim]{key} was not in config.toml; nothing to do[/dim]") + + +# ── models sub-app ───────────────────────────────────────────────────────── + + +models_app = typer.Typer( + no_args_is_help=False, help="Manage local model presets (list, download, activate)." +) +app.add_typer(models_app, name="models") + + +def _print_installed_presets() -> None: + cfg = _load_or_build() + installed = download_mod.installed_presets(cfg) + active = cfg.model.preset + console.print(f"Models dir: [bold]{cfg.models_dir}[/bold]") + if not installed: + console.print("[dim]No presets installed yet — try `lucebox models download`.[/dim]") + return + table = Table() + table.add_column("preset") + table.add_column("status") + table.add_column("size (GB)") + for pres in installed: + marker = "* " if pres.name == active else " " + size_gb = download_mod.installed_size_gb(cfg, pres) + table.add_row(f"{marker}{pres.name}", "installed", f"{size_gb:.1f}") + console.print(table) + total = sum(download_mod.installed_size_gb(cfg, p) for p in installed) + console.print(f"[dim]Total disk usage: {total:.1f} GB[/dim]") + + +@models_app.callback(invoke_without_command=True) +def models_default(ctx: typer.Context) -> None: + """Default action: list installed presets, mark active with `*`.""" + if ctx.invoked_subcommand is None: + _print_installed_presets() + + +@models_app.command("list") +def models_list() -> None: + """Show every registered preset (installed or not) with status + size.""" + cfg = _load_or_build() + active = cfg.model.preset + table = Table() + table.add_column("preset") + table.add_column("status") + table.add_column("size (GB)") + table.add_column("description") + for name in sorted(download_mod.PRESETS): + pres = download_mod.PRESETS[name] + marker = "* " if name == active else " " + status = download_mod.installed_status(cfg, pres) + size = download_mod.installed_size_gb(cfg, pres) + size_text = f"{size:.1f}" if size > 0 else f"~{pres.approx_total_gb}*" + table.add_row(f"{marker}{name}", status, size_text, pres.description or "") + console.print(table) + + +@models_app.command("download") +def models_download( + preset: Annotated[str, typer.Argument(help="Preset name (empty = recommend)")] = "", + activate: Annotated[ + bool, typer.Option("--activate", help="Also set as active preset (model.preset).") + ] = False, +) -> None: + """Fetch a preset's GGUFs into the models dir. + + With no argument and no preset configured, recommends one for this + host's VRAM tier and auto-activates it (the first-install path). + Otherwise the named preset is downloaded; pass ``--activate`` to + also flip `model.preset` to it. + """ + cfg = _load_or_build() + if not preset: + if cfg.model.preset: + console.print( + "[yellow]No preset specified and one is already active. " + "Pass an explicit preset name (or use --activate to switch).[/yellow]" + ) + raise typer.Exit(code=2) + recommended = download_mod.recommend_preset(cfg.host) + if recommended is None: + console.print( + "[red]Cannot recommend a preset for this host. " + "Run `lucebox models list` and pick one explicitly.[/red]" + ) + raise typer.Exit(code=2) + preset = recommended + activate = True + console.print( + f"[bold]Recommended preset: {preset}[/bold] " + "(no preset configured; auto-activating after download)" + ) + + try: + pres = download_mod.resolve_preset(preset) + except KeyError as exc: + console.print(f"[red]{exc}[/red]") + raise typer.Exit(code=2) from exc + + current = download_mod.status(cfg, pres) + console.print(f"Models dir: [bold]{cfg.models_dir}[/bold]") + console.print(f"Preset: [bold]{pres.name}[/bold]") + console.print( + f" target ({pres.target_repo}/{pres.target_file}):" + f" {'present' if current['target_present'] else 'will download'}" + ) + if pres.has_draft: + console.print( + f" draft ({pres.draft_repo}/{pres.draft_file}):" + f" {'present' if current['draft_present'] else 'will download'}" + ) + else: + console.print(" draft [dim](none — target-only preset)[/dim]") + + if current["target_present"] and current["draft_present"]: + console.print("[green]Already present.[/green]") + else: + console.print(f"[bold]Downloading[/bold] (~{pres.approx_total_gb} GB total)…") + rc = download_mod.download_preset(cfg, pres) + if rc != 0: + raise typer.Exit(code=rc) + console.print("[green]Done.[/green]") + + if activate: + config_set("model.preset", preset) + if pres.target_file: + config_set("model.target_file", pres.target_file) + if pres.has_draft and pres.draft_file: + config_set("model.draft_file", pres.draft_file) + else: + # Drop any stale draft_file from a previous activation; the + # active preset has no draft. + config_unset("model.draft_file") + console.print(f"[green]Activated:[/green] model.preset = {preset}") + + +@app.command() +def version() -> None: + """Print lucebox version.""" + print(__version__) + + +def main() -> None: + """Module entrypoint — `python -m lucebox`.""" + try: + app() + except KeyboardInterrupt: + console.print("\n[dim]interrupted[/dim]") + sys.exit(130) + + +if __name__ == "__main__": + main() diff --git a/lucebox/src/lucebox/config.py b/lucebox/src/lucebox/config.py new file mode 100644 index 000000000..2b5bd140e --- /dev/null +++ b/lucebox/src/lucebox/config.py @@ -0,0 +1,448 @@ +"""Sparse TOML persistence for .lucebox/config.toml. + +Single source of truth for user-overridden configuration. We track which +dotted keys were explicitly set by the user (or by commands acting on +their behalf) and serialize ONLY those keys back to disk — defaults +stay implicit, so `config.toml` reads like a diff against live defaults +and upgrades that add new fields don't gratuitously rewrite every file. + +The dotted-key surface area is small and flat: + model.preset, model.target_file, model.draft_file + port, models_dir, variant, image, container_name + dflash. for each of the 11 DflashRuntime knobs + think_max + +Load resolves the TOML file → ``Config`` object, with anything absent +filled from ``Config()`` defaults. Save writes back only the keys that +appear in the TOML doc (tracked on ``Config._user_set``). The TOML doc +itself is a plain ``dict[str, Any]`` carrying only the set keys. +""" + +from __future__ import annotations + +import os +import re +import tomllib +from collections.abc import Callable +from dataclasses import replace +from pathlib import Path +from typing import Any + +import tomli_w + +from lucebox.types import ( + Config, + DflashRuntime, + HostFacts, + ModelMeta, + Variant, + default_models_dir, +) + + +def default_config_path() -> Path: + """Where .lucebox/config.toml lives. + + Convention: under $LUCEBOX_HOME if set, otherwise $HOME/.lucebox. Lives in + the bind-mounted host home dir so the config survives container teardown + and is editable from the host. + """ + base = os.environ.get("LUCEBOX_HOME") + if base: + return Path(base) / "config.toml" + return Path.home() / ".lucebox" / "config.toml" + + +# ── dotted-key registry ──────────────────────────────────────────────────── + +def _cast_prefill_mode(v: Any) -> str: + s = str(v) + if s not in {"off", "auto", "always"}: + raise ValueError(f"prefill_mode must be off/auto/always, got {s!r}") + return s + + +def _cast_bool(v: Any) -> bool: + """Strict-ish boolean coercion for config values. + + - Native booleans pass through. + - Strings: 1/true/yes/on → True; 0/false/no/off/"" → False (case-insensitive). + - Anything else raises ``ValueError`` rather than silently coercing, + because that's what bit ``dflash.debug_thinking_logits`` — the + built-in ``bool`` caster turned ``"false"`` into ``True``. + """ + if isinstance(v, bool): + return v + if isinstance(v, str): + s = v.strip().lower() + if s in ("1", "true", "yes", "on"): + return True + if s in ("0", "false", "no", "off", ""): + return False + raise ValueError(f"cannot parse boolean: {v!r}") + if isinstance(v, int): + return bool(v) + raise ValueError(f"cannot parse boolean: {v!r}") + + +# Each entry: dotted-key → (toml_path, type_caster, default_getter). +# ``toml_path`` is the (section, field) pair on disk; ``"_root"`` means the +# key lives at the top level (no [section]). ``default_getter`` returns the +# in-memory default so ``config get`` can annotate origin. +KEY_REGISTRY: dict[str, tuple[tuple[str, str], Callable[[Any], Any]]] = { + "variant": (("image", "variant"), str), + "image": (("image", "registry"), str), + "container_name": (("runtime", "container_name"), str), + "port": (("runtime", "port"), int), + "models_dir": (("paths", "models"), str), + "model.preset": (("model", "preset"), str), + "model.target_file": (("model", "target_file"), str), + "model.draft_file": (("model", "draft_file"), str), + "dflash.budget": (("dflash", "budget"), int), + "dflash.max_ctx": (("dflash", "max_ctx"), int), + "dflash.lazy": (("dflash", "lazy"), _cast_bool), + "dflash.prefix_cache_slots": (("dflash", "prefix_cache_slots"), int), + "dflash.prefill_cache_slots": (("dflash", "prefill_cache_slots"), int), + "dflash.cache_type_k": (("dflash", "cache_type_k"), str), + "dflash.cache_type_v": (("dflash", "cache_type_v"), str), + "dflash.prefill_mode": (("dflash", "prefill_mode"), _cast_prefill_mode), + "dflash.prefill_keep_ratio": (("dflash", "prefill_keep_ratio"), float), + "dflash.prefill_threshold": (("dflash", "prefill_threshold"), int), + "dflash.prefill_drafter": (("dflash", "prefill_drafter"), str), + "dflash.think_max": (("dflash", "think_max"), int), + "dflash.fa_window": (("dflash", "fa_window"), int), + "dflash.think_soft_close_min_ratio": ( + ("dflash", "think_soft_close_min_ratio"), float), + "dflash.debug_thinking_logits": ( + ("dflash", "debug_thinking_logits"), _cast_bool), +} + + +def _doc_get(doc: dict[str, Any], section: str, field: str) -> Any: + if section == "_root": + return doc.get(field) + sub = doc.get(section) + if isinstance(sub, dict): + return sub.get(field) + return None + + +def _doc_set(doc: dict[str, Any], section: str, field: str, value: Any) -> None: + if section == "_root": + doc[field] = value + return + doc.setdefault(section, {})[field] = value + + +def _doc_unset(doc: dict[str, Any], section: str, field: str) -> bool: + """Remove a dotted key from the doc. Returns True iff something was removed.""" + if section == "_root": + if field in doc: + del doc[field] + return True + return False + sub = doc.get(section) + if isinstance(sub, dict) and field in sub: + del sub[field] + if not sub: + del doc[section] + return True + return False + + +# ── load ─────────────────────────────────────────────────────────────────── + + +def load(path: Path | None = None) -> Config | None: + """Load config.toml, or return None if missing. + + If a legacy `.env` sits next to it (or in place of it), migrate that + first and write back as TOML. + """ + path = path or default_config_path() + if path.exists(): + return _load_toml(path) + + legacy = path.with_suffix(".env") + if legacy.exists(): + cfg, doc = _load_legacy_env(legacy) + save(cfg, path, doc=doc) + return cfg + + return None + + +def _load_toml(path: Path) -> Config: + raw = tomllib.loads(path.read_text()) + return _from_dict(raw) + + +def load_doc(path: Path | None = None) -> dict[str, Any]: + """Return the raw TOML doc (a dict). Empty when no file or empty file.""" + path = path or default_config_path() + if not path.exists(): + return {} + return tomllib.loads(path.read_text()) + + +_LEGACY_KEY_MAP: dict[str, tuple[str, str, Callable[[str], Any]]] = { + "DFLASH_BUDGET": ("dflash", "budget", int), + "DFLASH_MAX_CTX": ("dflash", "max_ctx", int), + "DFLASH_LAZY": ("dflash", "lazy", + lambda v: str(v).strip().lower() in ("1", "true", "yes", "on")), + "DFLASH_PREFIX_CACHE_SLOTS": ("dflash", "prefix_cache_slots", int), + "DFLASH_PORT": ("runtime", "port", int), + "LUCEBOX_VARIANT": ("image", "variant", str), + "LUCEBOX_IMAGE": ("image", "registry", str), + "LUCEBOX_MODELS": ("paths", "models", str), +} + + +def _load_legacy_env(path: Path) -> tuple[Config, dict[str, Any]]: + """Best-effort migration from the bash-era .lucebox/config.env.""" + raw: dict[str, Any] = {} + line_re = re.compile(r"^([A-Z_][A-Z0-9_]*)=(.*)$") + for line in path.read_text().splitlines(): + line = line.strip() + if not line or line.startswith("#"): + continue + m = line_re.match(line) + if not m: + continue + key, val = m.group(1), m.group(2).strip().strip('"').strip("'") + if key not in _LEGACY_KEY_MAP: + continue + section, field, cast_fn = _LEGACY_KEY_MAP[key] + try: + raw.setdefault(section, {})[field] = cast_fn(val) + except (TypeError, ValueError): + continue + return _from_dict(raw), raw + + +def _from_dict(raw: dict[str, Any]) -> Config: + img = raw.get("image", {}) + variant: Variant = str(img.get("variant", "cuda12")) + registry = img.get("registry", "ghcr.io/luce-org/lucebox-hub") + + runtime = raw.get("runtime", {}) + port = int(runtime.get("port", 8080)) + container_name = str(runtime.get("container_name", "lucebox")) + + paths = raw.get("paths", {}) + models_dir = Path(paths.get("models", str(default_models_dir()))) + + df = raw.get("dflash", {}) + dflash = DflashRuntime( + budget=int(df.get("budget", 22)), + max_ctx=int(df.get("max_ctx", 16384)), + lazy=bool(df.get("lazy", False)), + prefix_cache_slots=int(df.get("prefix_cache_slots", 0)), + prefill_cache_slots=int(df.get("prefill_cache_slots", 0)), + cache_type_k=str(df.get("cache_type_k", "")), + cache_type_v=str(df.get("cache_type_v", "")), + prefill_mode=df.get("prefill_mode", "off"), + prefill_keep_ratio=float(df.get("prefill_keep_ratio", 0.05)), + prefill_threshold=int(df.get("prefill_threshold", 32000)), + prefill_drafter=str(df.get("prefill_drafter", "")), + think_max=int(df.get("think_max", 15488)), + fa_window=int(df.get("fa_window", 0)), + think_soft_close_min_ratio=float( + df.get("think_soft_close_min_ratio", 0.0)), + debug_thinking_logits=bool(df.get("debug_thinking_logits", False)), + ) + + host_raw = raw.get("host", {}) + host = HostFacts( + nproc=int(host_raw.get("nproc", 0)), + ram_gb=int(host_raw.get("ram_gb", 0)), + gpu_vendor=host_raw.get("gpu_vendor", "none"), + gpu_name=str(host_raw.get("gpu_name", "")), + gpu_count=int(host_raw.get("gpu_count", 0)), + vram_gb=int(host_raw.get("vram_gb", 0)), + gpu_sm=str(host_raw.get("gpu_sm", "")), + driver_version=str(host_raw.get("driver_version", "")), + driver_major=int(host_raw.get("driver_major", 0)), + has_systemd=bool(host_raw.get("has_systemd", False)), + is_wsl=bool(host_raw.get("is_wsl", False)), + has_docker=bool(host_raw.get("has_docker", False)), + docker_version=str(host_raw.get("docker_version", "")), + ctk=host_raw.get("ctk", "none"), + ) + + # `[model]` is optional — legacy configs (pre-multi-model) carry no + # such section and we want them to keep working unchanged. If + # `preset` is set but `target_file` / `draft_file` isn't, derive + # them from the registry so users only have to write one key. + mdl = raw.get("model", {}) + preset_name = str(mdl.get("preset", "")) + target_file = str(mdl.get("target_file", "")) + draft_file = str(mdl.get("draft_file", "")) + if preset_name and (not target_file or not draft_file): + from lucebox.download import PRESETS + + if preset_name in PRESETS: + pres = PRESETS[preset_name] + if not target_file: + target_file = pres.target_file + if not draft_file and pres.has_draft and pres.draft_file: + draft_file = pres.draft_file + model = ModelMeta(preset=preset_name, target_file=target_file, draft_file=draft_file) + + return Config( + variant=variant, + image=registry, + container_name=container_name, + port=port, + models_dir=models_dir, + dflash=dflash, + host=host, + model=model, + ) + + +# ── save ─────────────────────────────────────────────────────────────────── + + +def _atomic_write_doc(path: Path, doc: dict[str, Any]) -> None: + """Serialize ``doc`` to TOML and write it to ``path`` atomically. + + Write to a sibling ``.toml.tmp`` then ``replace`` so a crash mid-write + never leaves a truncated config.toml. Caller ensures ``path.parent`` exists. + """ + tmp = path.with_suffix(".toml.tmp") + tmp.write_bytes(tomli_w.dumps(doc).encode("utf-8")) + tmp.replace(path) + + +def save(cfg: Config, path: Path | None = None, *, doc: dict[str, Any] | None = None) -> Path: + """Persist a Config to ``path``. Only keys present in ``doc`` are written. + + ``doc`` is the raw TOML mapping returned by ``load_doc`` — it carries + exactly the keys the user (or a command on their behalf) has set. When + ``doc=None`` and the file exists we re-use the on-disk doc; when both + are absent we write an empty file. + """ + path = path or default_config_path() + path.parent.mkdir(parents=True, exist_ok=True) + if doc is None: + doc = load_doc(path) + _atomic_write_doc(path, doc) + # Silence unused-arg: cfg is the on-disk representation's source of + # truth for callers that want to round-trip through a Config object, + # but the sparse write never re-derives keys from it. + del cfg + return path + + +# ── dotted-key API ───────────────────────────────────────────────────────── + + +def _value_to_toml(value: Any) -> Any: + """Make a Python value safe for tomli_w (no None, Path→str).""" + if isinstance(value, Path): + return str(value) + return value + + +def _live_default(key: str) -> Any: + """Return the in-memory default for ``key`` (from a fresh Config()).""" + cfg = Config() + section_field = KEY_REGISTRY[key][0] + section, field = section_field + if section == "image": + return {"variant": cfg.variant, "registry": cfg.image}[field] + if section == "runtime": + return {"port": cfg.port, "container_name": cfg.container_name}[field] + if section == "paths": + return str(cfg.models_dir) if field == "models" else None + if section == "dflash": + return getattr(cfg.dflash, field) + if section == "model": + return getattr(cfg.model, field) + return None + + +def config_set(key: str, value: Any, *, path: Path | None = None) -> None: + """Set one dotted key and write the file. Auto-creates a missing file.""" + if key not in KEY_REGISTRY: + raise KeyError(f"unknown config key {key!r}; known: {sorted(KEY_REGISTRY)}") + section_field, caster = KEY_REGISTRY[key] + section, field = section_field + try: + cast_value = caster(value) + except (TypeError, ValueError) as exc: + raise ValueError(f"cannot coerce {value!r} for {key}: {exc}") from exc + path = path or default_config_path() + doc = load_doc(path) if path.exists() else {} + _doc_set(doc, section, field, _value_to_toml(cast_value)) + path.parent.mkdir(parents=True, exist_ok=True) + _atomic_write_doc(path, doc) + + +def config_unset(key: str, *, path: Path | None = None) -> bool: + """Remove a dotted key from the file. Returns True if something changed.""" + if key not in KEY_REGISTRY: + raise KeyError(f"unknown config key {key!r}; known: {sorted(KEY_REGISTRY)}") + section_field, _ = KEY_REGISTRY[key] + section, field = section_field + path = path or default_config_path() + if not path.exists(): + return False + doc = load_doc(path) + changed = _doc_unset(doc, section, field) + if changed: + # Leave the file in place even when empty — `config set` will + # repopulate; deleting would surprise users who expect their + # config dir to exist. + _atomic_write_doc(path, doc) + return changed + + +def config_get(key: str | None = None, *, path: Path | None = None) -> dict[str, tuple[Any, str]]: + """Return ``{key: (value, origin)}``. ``origin`` is ``"file"`` or ``"default"``. + + When ``key`` is None or empty, every registered key is returned. + Otherwise just that one key (still as a single-item dict, for caller + uniformity). + """ + path = path or default_config_path() + doc = load_doc(path) if path.exists() else {} + keys = [key] if key else list(KEY_REGISTRY) + out: dict[str, tuple[Any, str]] = {} + for k in keys: + if k not in KEY_REGISTRY: + raise KeyError(f"unknown config key {k!r}; known: {sorted(KEY_REGISTRY)}") + section_field, _ = KEY_REGISTRY[k] + section, field = section_field + in_file = _doc_get(doc, section, field) + if in_file is not None: + out[k] = (in_file, "file") + else: + out[k] = (_live_default(k), "default") + return out + + +def live_config() -> Config: + """Build a fresh Config from current host facts + the DFLASH_* heuristic. + + Used as the no-config fallback in ``cli._load_or_build`` and reused by + the ``models`` sub-app, so the host probe + heuristic + env-override + logic lives in one place rather than being duplicated per caller. + """ + # Lazy import to avoid the autotune ↔ config import cycle the importer + # would hit if this moved to module scope. + import lucebox.autotune as autotune_mod + from lucebox.host_facts import from_env + + host = from_env() + default = Config() + return replace( + default, + variant=os.environ.get("LUCEBOX_VARIANT", "cuda12"), + image=os.environ.get("LUCEBOX_IMAGE", default.image), + container_name=os.environ.get("LUCEBOX_CONTAINER", default.container_name), + port=int(os.environ.get("LUCEBOX_PORT", str(default.port))), + models_dir=Path(os.environ.get("LUCEBOX_MODELS", str(default.models_dir))), + dflash=autotune_mod.runtime_from_host(host), + host=host, + ) diff --git a/lucebox/src/lucebox/docker_run.py b/lucebox/src/lucebox/docker_run.py new file mode 100644 index 000000000..ff3615b26 --- /dev/null +++ b/lucebox/src/lucebox/docker_run.py @@ -0,0 +1,232 @@ +"""Build and execute `docker run` argv for the server and download containers. + +We shell out to the `docker` CLI rather than using the docker SDK because +(a) the CLI is the user-visible contract — errors look the same whether +issued by lucebox or the user; (b) zero import cost; (c) trivially mockable +via subprocess in tests. Wrap everything in one module so swapping to the +SDK later is a single-file change. +""" + +from __future__ import annotations + +import os +import shlex +import subprocess +from dataclasses import dataclass +from pathlib import Path + +from lucebox.types import Config + + +def _host_facts_env() -> list[tuple[str, str]]: + """Forward LUCEBOX_HOST_* from the orchestrator's env into the server. + + lucebox.sh's probe_host() exports every host-identity fact (OS, + kernel, GPU list CSV, CTK version, …) before invoking ``docker run`` + on the orchestrator. The orchestrator inherits them and we pass + them through verbatim so the server entrypoint can write + /opt/lucebox-hub/HOST_INFO without re-probing inside the container + (where /proc and nvidia-smi see the container's view, not the + rig's). See entrypoint.sh::write_host_info and http_server.cpp's + /props.host block. + """ + out: list[tuple[str, str]] = [] + for key, value in sorted(os.environ.items()): + if key.startswith("LUCEBOX_HOST_"): + out.append((key, value)) + return out + + +def _resolve_model_files(cfg: Config) -> tuple[str, str, str]: + """Return (target_file, draft_file, draft_dir) for DFLASH_TARGET / DFLASH_DRAFT. + + Resolution order — first non-empty wins per field: + 1. cfg.model.target_file / draft_file (explicit override in config.toml) + 2. PRESETS[cfg.model.preset].target_file / draft_file / speculator_dir (registry) + 3. "" (entrypoint autodetect path runs unchanged). + + ``draft_dir`` is a directory name under ``models/draft/`` holding a + safetensors speculator (e.g. ``laguna-xs2-speculator``). It is only set + when the preset declares one AND the directory exists on disk; otherwise + it is empty. When non-empty, docker_run_spec uses it as DFLASH_DRAFT + (a directory path) instead of the GGUF-file path, allowing the entrypoint + to discover the safetensors file inside it. + + Imported lazily to avoid the lucebox.types ↔ lucebox.download circular + import that surfaces when this module is imported from ``__init__``. + """ + target = cfg.model.target_file + draft = cfg.model.draft_file + draft_dir = "" + if (not target or not draft) and cfg.model.preset: + from lucebox.download import PRESETS + + pres = PRESETS.get(cfg.model.preset) + if pres is not None: + if not target: + target = pres.target_file + if not draft and pres.has_draft and pres.draft_file: + draft = pres.draft_file + if not draft and pres.speculator_dir: + spec_path = cfg.models_dir / "draft" / pres.speculator_dir + if spec_path.is_dir(): + draft_dir = pres.speculator_dir + return target, draft, draft_dir + + +def _runtime_volumes(cfg: Config) -> tuple[tuple[str, str], ...]: + """Mount models plus $HOME so absolute symlink targets remain valid.""" + home = str(Path.home()) + models = str(cfg.models_dir) + volumes = [(models, "/opt/lucebox-hub/server/models")] + if home != models: + volumes.append((home, home)) + return tuple(volumes) + + +@dataclass(frozen=True, slots=True) +class DockerRunSpec: + """Pre-render of a docker-run command. Render via `argv()` or `printable()`.""" + + image: str + name: str + gpus: bool = True + detach: bool = False + remove: bool = True + port_publish: tuple[int, int] | None = None # (host, container) + volumes: tuple[tuple[str, str], ...] = () + env: tuple[tuple[str, str], ...] = () + entrypoint_args: tuple[str, ...] = () + extra: tuple[str, ...] = () + + def argv(self) -> list[str]: + out = ["docker", "run"] + if self.remove: + out.append("--rm") + if self.detach: + out.append("-d") + out += ["--name", self.name] + if self.gpus: + out += ["--gpus", "all"] + if self.port_publish is not None: + host, container = self.port_publish + out += ["-p", f"{host}:{container}"] + for host_path, container_path in self.volumes: + out += ["-v", f"{host_path}:{container_path}"] + for k, v in self.env: + out += ["-e", f"{k}={v}"] + out += list(self.extra) + out.append(self.image) + out += list(self.entrypoint_args) + return out + + def printable(self) -> str: + """Human-readable, one-flag-per-line docker run. Copy-pasteable.""" + argv = self.argv() + if not argv: + return "" + out = argv[0] + i = 1 + while i < len(argv): + tok = argv[i] + out += " \\\n " + tok + # Glue value-taking flags onto the same line. + if tok in { + "-p", + "-v", + "-e", + "--name", + "--gpus", + "--env", + "--volume", + "--publish", + "--entrypoint", + } and i + 1 < len(argv): + i += 1 + out += " " + shlex.quote(argv[i]) + i += 1 + return out + + +# ── server argv from Config ──────────────────────────────────────────────── + + +def server_run_spec(cfg: Config) -> DockerRunSpec: + """Long-running OpenAI-compatible server. Foreground (systemd manages + lifecycle), --gpus all, models bind-mounted, DFLASH_* propagated. + """ + # LUCEBOX_HOST_* first so they ride out front in the rendered argv, + # making it obvious in `print-run` output what host facts get forwarded. + env: list[tuple[str, str]] = list(_host_facts_env()) + env += [ + ("DFLASH_BUDGET", str(cfg.dflash.budget)), + ("DFLASH_MAX_CTX", str(cfg.dflash.max_ctx)), + ("DFLASH_PREFIX_CACHE_SLOTS", str(cfg.dflash.prefix_cache_slots)), + ("DFLASH_PREFILL_CACHE_SLOTS", str(cfg.dflash.prefill_cache_slots)), + ("DFLASH_THINK_MAX", str(cfg.dflash.think_max)), + ("DFLASH_PORT", "8080"), + ] + # Resolve target/draft GGUFs in priority order: + # 1. cfg.model.target_file / draft_file (explicit override in config.toml) + # 2. PRESETS[cfg.model.preset].target_file / draft_file / speculator_dir (registry) + # 3. unset — entrypoint's autodetect path runs unchanged. + # Container view of the models dir is /opt/lucebox-hub/server/models + # (see _runtime_volumes); the entrypoint reads DFLASH_TARGET / DFLASH_DRAFT. + # draft_dir is a subdirectory of models/draft/ holding a safetensors speculator; + # it takes effect only when draft_file is empty and the directory exists on disk. + target_file, draft_file, draft_dir = _resolve_model_files(cfg) + if target_file: + env.append(("DFLASH_TARGET", f"/opt/lucebox-hub/server/models/{target_file}")) + if draft_file: + env.append(("DFLASH_DRAFT", f"/opt/lucebox-hub/server/models/draft/{draft_file}")) + elif draft_dir: + env.append(("DFLASH_DRAFT", f"/opt/lucebox-hub/server/models/draft/{draft_dir}")) + if cfg.dflash.lazy: + env.append(("DFLASH_LAZY", "1")) + if cfg.dflash.cache_type_k: + env.append(("DFLASH_CACHE_TYPE_K", cfg.dflash.cache_type_k)) + if cfg.dflash.cache_type_v: + env.append(("DFLASH_CACHE_TYPE_V", cfg.dflash.cache_type_v)) + if cfg.dflash.prefill_mode != "off": + env += [ + ("DFLASH_PREFILL_MODE", cfg.dflash.prefill_mode), + ("DFLASH_PREFILL_KEEP", str(cfg.dflash.prefill_keep_ratio)), + ("DFLASH_PREFILL_THRESHOLD", str(cfg.dflash.prefill_threshold)), + ] + if cfg.dflash.prefill_drafter: + env.append(("DFLASH_PREFILL_DRAFTER", cfg.dflash.prefill_drafter)) + # fa_window=0 is the server's own default (full attention); only emit + # the env when the operator has selected a sparse decode window. The + # entrypoint mirrors this guard so an unset env reproduces the + # server's stock behavior. + if cfg.dflash.fa_window > 0: + env.append(("DFLASH_FA_WINDOW", str(cfg.dflash.fa_window))) + # Soft-close ratio: 0.0 is server-side disabled (byte-identical + # to pre-PR-#326 behavior). Emit only when nonzero to keep the + # docker env minimal and mirror the entrypoint's `case` guard. + if cfg.dflash.think_soft_close_min_ratio > 0.0: + env.append(( + "DFLASH_THINK_SOFT_CLOSE_MIN_RATIO", + f"{cfg.dflash.think_soft_close_min_ratio:g}", + )) + if cfg.dflash.debug_thinking_logits: + env.append(("DFLASH_DEBUG_THINKING_LOGITS", "1")) + + return DockerRunSpec( + image=f"{cfg.image}:{cfg.variant}", + name=cfg.container_name, + gpus=True, + remove=True, + detach=False, + port_publish=(cfg.port, 8080), + volumes=_runtime_volumes(cfg), + env=tuple(env), + ) + + +# ── subprocess helpers ───────────────────────────────────────────────────── + + +def docker_pull(image_tag: str) -> int: + """Pull an image, streaming progress. Returns docker's exit code.""" + return subprocess.call(["docker", "pull", image_tag]) diff --git a/lucebox/src/lucebox/download.py b/lucebox/src/lucebox/download.py new file mode 100644 index 000000000..df1d5c96f --- /dev/null +++ b/lucebox/src/lucebox/download.py @@ -0,0 +1,515 @@ +"""Model download orchestration. + +Runs *inside* the orchestrator container. Uses `huggingface_hub` directly +(no subprocess) so we can: + + * drive a Rich progress bar based on real byte counts (the previous + `uvx hf download` subprocess produced no visible progress inside the + container — hf-xet's TTY detection misfires there), + * verify each candidate file's size and sha256 against the repo + metadata BEFORE downloading, so a re-run on a host that already has + the target GGUF (e.g. previous download into the same models_dir) + skips the multi-GB fetch entirely. + +The :data:`PRESETS` registry encodes the canonical (target_repo, +target_file, draft_repo, draft_file) tuple per model — selectable via +``lucebox models download ``. ``DEFAULT_PRESET`` stays pinned to +Qwen3.6-27B for back-compat with callers that pre-date the registry. +Drafts are optional: presets that have no published DFlash draft +(e.g. Laguna's speculator is safetensors, not GGUF) carry +``draft_repo=None`` and run target-only. +""" + +from __future__ import annotations + +import hashlib +import os +import threading +import time +from dataclasses import dataclass +from pathlib import Path + +# hf-xet (huggingface_hub ≥ 1.16) streams the entire file in one final +# burst — the polling-based progress bar sits at 0% for ~14 minutes +# then snaps to 100% on a 17 GB GGUF. Force the chunked Python +# downloader instead so bytes grow continuously and the Rich bar tracks +# reality. Set before importing hf_hub_download so the import picks +# the env up. `setdefault` lets a user override on the command line. +os.environ.setdefault("HF_HUB_DISABLE_XET", "1") + +from huggingface_hub import HfApi, hf_hub_download # noqa: E402 +from huggingface_hub._local_folder import get_local_download_paths # noqa: E402 +from rich.console import Console +from rich.progress import ( + BarColumn, + DownloadColumn, + Progress, + TextColumn, + TimeRemainingColumn, + TransferSpeedColumn, +) + +from lucebox.types import Config, HostFacts + + +@dataclass(frozen=True, slots=True) +class ModelPreset: + """Canonical (target, draft) repo+filename pair for a supported model. + + ``draft_repo`` and ``draft_file`` may both be ``None`` for models + where no GGUF DFlash draft is published (e.g. Laguna's safetensors + speculator). In that case the entrypoint runs target-only — DFlash + speculative decoding is disabled but the server still works. + + ``speculator_dir`` names a directory under ``models/draft/`` that holds + a safetensors-format speculator (e.g. ``model.safetensors``). When + present on disk the server launch sets ``DFLASH_DRAFT`` to that + directory; absent, the server runs target-only. Unlike ``draft_file`` + (which marks the preset as incomplete when missing), ``speculator_dir`` + is optional supplementary hardware and doesn't affect installed_status. + """ + + name: str + target_repo: str + target_file: str + draft_repo: str | None + draft_file: str | None + approx_total_gb: int + description: str = "" + speculator_dir: str | None = None + + @property + def has_draft(self) -> bool: + return bool(self.draft_repo and self.draft_file) + + +# Registry of supported models. Keyed by preset name; the CLI surface +# exposes these via `lucebox models download ` and the +# `lucebox models list` table. The values come straight from the model +# cards under share/model_cards/ — keep them in sync. +PRESETS: dict[str, ModelPreset] = { + "qwen3.6-27b": ModelPreset( + name="qwen3.6-27b", + target_repo="unsloth/Qwen3.6-27B-GGUF", + target_file="Qwen3.6-27B-Q4_K_M.gguf", + draft_repo="spiritbuun/Qwen3.6-27B-DFlash-GGUF", + draft_file="dflash-draft-3.6-q4_k_m.gguf", + approx_total_gb=17, + description="Qwen3.6 27B dense (Q4_K_M) + Qwen3.6 DFlash draft. Lucebox default.", + ), + "gemma-4-26b": ModelPreset( + name="gemma-4-26b", + target_repo="bartowski/google_gemma-4-26B-A4B-it-GGUF", + target_file="google_gemma-4-26B-A4B-it-Q4_K_M.gguf", + draft_repo="Lucebox/gemma-4-26B-A4B-it-DFlash-GGUF", + draft_file="gemma-4-26B-A4B-it-DFlash-q8_0.gguf", + approx_total_gb=18, + description="Gemma 4 26B-A4B IT MoE (Q4_K_M) + Lucebox DFlash q8_0 draft.", + ), + "gemma-4-31b": ModelPreset( + name="gemma-4-31b", + target_repo="bartowski/google_gemma-4-31B-it-GGUF", + target_file="google_gemma-4-31B-it-Q4_K_M.gguf", + draft_repo="Lucebox/gemma-4-31B-it-DFlash-GGUF", + draft_file="gemma-4-31B-it-DFlash-q8_0.gguf", + approx_total_gb=21, + description="Gemma 4 31B IT dense (Q4_K_M) + Lucebox DFlash q8_0 draft.", + ), + "laguna-xs.2": ModelPreset( + name="laguna-xs.2", + target_repo="Lucebox/Laguna-XS.2-GGUF", + target_file="laguna-xs2-Q4_K_M.gguf", + # Laguna's DFlash speculator is safetensors-format + # (poolside/Laguna-XS.2-speculator.dflash), downloaded manually + # into models/draft/laguna-xs2-speculator/. The download command + # doesn't fetch it automatically — it's opt-in. When present, + # speculator_dir wires it into DFLASH_DRAFT at server launch. + draft_repo=None, + draft_file=None, + speculator_dir="laguna-xs2-speculator", + approx_total_gb=20, + description=( + "Laguna-XS.2 MoE code model (Q4_K_M). " + "DFlash safetensors speculator in draft/laguna-xs2-speculator/ " + "is used automatically when present." + ), + ), + "qwen3.6-moe": ModelPreset( + name="qwen3.6-moe", + target_repo="unsloth/Qwen3.6-35B-A3B-GGUF", + # Unsloth's MoE repo publishes both a "UD" (dynamic) and a plain + # Q4_K_M family. Verified 2026-05-28 via HfApi.repo_info: the + # `-UD-Q4_K_M.gguf` variant (22.1 GB) is the canonical Q4_K_M + # release — there is no plain `Q4_K_M.gguf` on the MoE repo. + target_file="Qwen3.6-35B-A3B-UD-Q4_K_M.gguf", + # No DFlash draft GGUF has been published for the MoE variant + # (probed Lucebox/* and spiritbuun/* repos 2026-05-28 — none + # exist). Target-only, mirroring laguna-xs.2's wiring. The + # lucebox C++ server speaks the `qwen35moe` arch natively + # (server/src/qwen35moe/) so this runs without a draft. + draft_repo=None, + draft_file=None, + approx_total_gb=22, + description=( + "Qwen3.6 35B-A3B MoE (3B active per token), Q4_K_M unsloth " + "dynamic quant. Target-only — no DFlash MoE draft published " + "yet. Uses lucebox's qwen35moe arch backend." + ), + ), +} + +DEFAULT_PRESET = PRESETS["qwen3.6-27b"] + + +def resolve_preset(name: str | None) -> ModelPreset: + """Look up a preset by name, with a friendly error on typos. + + ``None`` (or empty string) resolves to :data:`DEFAULT_PRESET` so + callers and the CLI default both flow through one code path. + """ + if not name: + return DEFAULT_PRESET + if name in PRESETS: + return PRESETS[name] + # Build a suggestion list — show every known preset; the user's + # search space is small (4 entries today) so listing them all is + # cheaper and clearer than a fuzzy-match heuristic. + known = ", ".join(sorted(PRESETS.keys())) + raise KeyError(f"unknown preset {name!r}. Known presets: {known}") + + +def _file_meta(api: HfApi, repo_id: str, filename: str) -> tuple[int, str | None]: + """Return (expected_size, lfs_sha256_or_None) for filename in repo_id.""" + info = api.model_info(repo_id, files_metadata=True) + for sib in info.siblings or []: + if sib.rfilename == filename: + sha = getattr(sib.lfs, "sha256", None) if sib.lfs else None + return int(sib.size or 0), sha + raise FileNotFoundError(f"{filename} not present in repo {repo_id}") + + +def _sha256(path: Path, chunk_mb: int = 16) -> str: + h = hashlib.sha256() + chunk = chunk_mb * 1024 * 1024 + with path.open("rb") as f: + while buf := f.read(chunk): + h.update(buf) + return h.hexdigest() + + +def _local_matches(path: Path, size: int, sha256: str | None, console: Console) -> bool: + """True iff a local file at `path` matches the expected size + sha256. + + Size mismatch shortcircuits (cheap). Sha256 is verified for LFS files + (multi-GB GGUFs always carry one) and skipped when the repo doesn't + expose a hash. Hashing 17 GB takes ~30s on a fast SSD — worth it to + avoid a multi-GB re-download on rate-limited / metered links. + """ + if not path.exists(): + return False + actual_size = path.stat().st_size + if actual_size != size: + console.print( + f" [yellow]✗[/yellow] {path.name} present but size {actual_size:,} != " + f"expected {size:,} — will re-download" + ) + return False + if sha256: + console.print(f" [dim]verifying sha256 of {path.name} ({actual_size / 1e9:.1f} GB)…[/dim]") + actual_sha = _sha256(path) + if actual_sha != sha256: + console.print( + f" [yellow]✗[/yellow] {path.name} sha256 {actual_sha[:12]}… != " + f"expected {sha256[:12]}… — will re-download" + ) + return False + return True + + +def _incomplete_path_candidates(local_dir: Path, filename: str, etag: str | None) -> list[Path]: + """Return likely paths of the partial file currently being written. + + huggingface_hub 1.x (with hf-xet) stages downloads under + ``{local_dir}/.cache/huggingface/download/`` using a *hashed* name — + ``{short_hash(metadata_filename)}.{etag}.incomplete`` — so a naive + ``{filename}.incomplete`` poll never sees any growth and the + progress bar sits at 0 % for the whole multi-GB transfer. + + We get the *exact* expected staging path from + ``get_local_download_paths().incomplete_path(etag)`` when we already + know the LFS sha256 (which acts as the etag for Xet downloads), and + fall back to globbing every ``*.incomplete`` in the staging dir + otherwise. The legacy non-Xet downloader writes a ``.incomplete`` + next to the destination blob in ``~/.cache/huggingface/hub`` — but + when ``local_dir`` is set hf-hub always uses the local staging dir, + so the two candidates above cover every code path we hit. + """ + paths = get_local_download_paths(local_dir, filename) + candidates: list[Path] = [] + if etag: + candidates.append(paths.incomplete_path(etag)) + # Fallback: every .incomplete file in the staging dir. This is what + # rescues us when sha256 is unknown (non-LFS file) or when hf-hub + # changes the etag derivation again in some future release. + candidates.append(paths.metadata_path.parent) # sentinel: glob this dir + return candidates + + +def _current_bytes(target: Path, candidates: list[Path]) -> int: + """Best-effort byte count of the file currently being written.""" + if target.exists(): + try: + return target.stat().st_size + except OSError: + pass + for c in candidates: + if c.is_dir(): + # Glob every .incomplete in the staging dir; return the + # largest (there's typically only one in-flight transfer). + largest = 0 + try: + for p in c.glob("*.incomplete"): + try: + largest = max(largest, p.stat().st_size) + except OSError: + continue + except OSError: + continue + if largest: + return largest + else: + try: + if c.exists(): + return c.stat().st_size + except OSError: + continue + return 0 + + +def _download_with_progress( + repo_id: str, + filename: str, + local_dir: Path, + expected_size: int, + console: Console, + etag: str | None = None, +) -> Path: + """Download a single HF file with a Rich progress bar. + + Runs hf_hub_download in a worker thread; the main thread polls the + growing file size and updates the Rich progress bar. The polled + target is computed via ``get_local_download_paths`` so we hit the + actual hf-xet staging path (a hashed filename under + ``.cache/huggingface/download/``), not a guess. + """ + local_dir.mkdir(parents=True, exist_ok=True) + target = local_dir / filename + candidates = _incomplete_path_candidates(local_dir, filename, etag) + + result: list[str | None] = [None] + error: list[BaseException | None] = [None] + + def _worker() -> None: + try: + result[0] = hf_hub_download( + repo_id=repo_id, + filename=filename, + local_dir=str(local_dir), + ) + except BaseException as exc: # propagate to main thread + error[0] = exc + + t = threading.Thread(target=_worker, daemon=True) + t.start() + + with Progress( + TextColumn("[cyan]{task.description}"), + BarColumn(bar_width=40), + DownloadColumn(), + TransferSpeedColumn(), + TimeRemainingColumn(), + console=console, + transient=False, + ) as progress: + task = progress.add_task(filename, total=expected_size or 1) + while t.is_alive(): + current = _current_bytes(target, candidates) + # Always tick the bar — even at 0 bytes — so Rich repaints + # the spinner/ETA and the user sees the UI is alive within + # the first poll tick rather than a blank "Downloading…" line. + progress.update(task, completed=min(current, expected_size or current or 1)) + time.sleep(0.5) + # Final tick after the worker finishes so the bar paints 100%. + if target.exists(): + progress.update(task, completed=target.stat().st_size) + + t.join(timeout=5) + if error[0] is not None: + raise error[0] + if result[0] is None: + raise RuntimeError(f"hf_hub_download returned no path for {filename}") + return Path(result[0]) + + +def _fetch( + api: HfApi, + repo_id: str, + filename: str, + local_dir: Path, + console: Console, +) -> Path: + """Verify-or-download a single file. Skips when the local copy matches.""" + size, sha = _file_meta(api, repo_id, filename) + target = local_dir / filename + if _local_matches(target, size, sha, console): + console.print(f" [green]✓[/green] {filename} already present (size + sha256 match)") + return target + # `sha` doubles as the etag for hf-xet's staging path + # ({local_dir}/.cache/huggingface/download/{hash}.{etag}.incomplete); + # passing it through is what makes the Rich progress bar see real + # byte counts during the multi-GB transfer. + return _download_with_progress(repo_id, filename, local_dir, size, console, etag=sha) + + +def download_preset(cfg: Config, preset: ModelPreset | None = None) -> int: + """Fetch the target GGUF + (optional) DFlash draft into cfg.models_dir. + + Returns 0 on success, non-zero on failure. Verifies each file's size + and (LFS) sha256 against the repo metadata before downloading, so a + repeat run with the files already on disk is a no-op + sha256 walk. + + ``preset=None`` resolves to :data:`DEFAULT_PRESET` for back-compat; + presets with ``has_draft=False`` (e.g. Laguna) skip the draft fetch + entirely and let the server run target-only. + """ + preset = preset or DEFAULT_PRESET + console = Console() + api = HfApi() + models = cfg.models_dir + models.mkdir(parents=True, exist_ok=True) + draft = models / "draft" + draft.mkdir(exist_ok=True) + + try: + _fetch(api, preset.target_repo, preset.target_file, models, console) + if preset.has_draft: + # Narrow the optionals for the type-checker — has_draft is + # exactly the predicate that proves these aren't None. + assert preset.draft_repo is not None and preset.draft_file is not None + _fetch(api, preset.draft_repo, preset.draft_file, draft, console) + else: + console.print( + f" [dim]no DFlash draft published for {preset.name} — running target-only[/dim]" + ) + except Exception as exc: + console.print(f"[red]download failed:[/red] {exc}") + return 1 + return 0 + + +def _local_target_path(cfg: Config, preset: ModelPreset) -> Path: + return cfg.models_dir / preset.target_file + + +def _local_draft_path(cfg: Config, preset: ModelPreset) -> Path | None: + if not (preset.has_draft and preset.draft_file): + return None + return cfg.models_dir / "draft" / preset.draft_file + + +def installed_status(cfg: Config, preset: ModelPreset) -> str: + """Return ``"installed"`` / ``"partial"`` / ``"absent"`` for a preset. + + Size-only — doesn't hash. ``"installed"`` requires the target (and + draft when one is published) to exist on disk; ``"partial"`` means + at least one of the two is present but the set is incomplete. + """ + target_exists = _local_target_path(cfg, preset).exists() + draft_path = _local_draft_path(cfg, preset) + if draft_path is None: + return "installed" if target_exists else "absent" + draft_exists = draft_path.exists() + if target_exists and draft_exists: + return "installed" + if target_exists or draft_exists: + return "partial" + return "absent" + + +def installed_size_gb(cfg: Config, preset: ModelPreset) -> float: + """Sum of on-disk byte sizes for the preset's files, in GB (binary 1e9).""" + total = 0 + target = _local_target_path(cfg, preset) + if target.exists(): + try: + total += target.stat().st_size + except OSError: + pass + draft = _local_draft_path(cfg, preset) + if draft is not None and draft.exists(): + try: + total += draft.stat().st_size + except OSError: + pass + return total / 1e9 + + +def installed_presets(cfg: Config) -> list[ModelPreset]: + """Return every preset whose files are currently present in cfg.models_dir. + + "Present" follows ``installed_status`` — fully installed only. + Partial states (target without draft, etc.) are excluded so the + default ``lucebox models`` view stays uncluttered. + """ + out: list[ModelPreset] = [] + for name in sorted(PRESETS): + pres = PRESETS[name] + if installed_status(cfg, pres) == "installed": + out.append(pres) + return out + + +def status(cfg: Config, preset: ModelPreset | None = None) -> dict[str, bool]: + """Quick presence check — what's already on disk? Size-only, no sha256. + + For presets without a published DFlash draft, ``draft_present`` is + reported as ``True`` (nothing to fetch → nothing missing). That + keeps the "all present, nothing to do" UX path uniform whether or + not a draft exists. + """ + preset = preset or DEFAULT_PRESET + api = HfApi() + out: dict[str, bool] = {} + try: + size, _ = _file_meta(api, preset.target_repo, preset.target_file) + local = cfg.models_dir / preset.target_file + out["target_present"] = local.exists() and local.stat().st_size == size + except Exception: + out["target_present"] = False + + if preset.has_draft: + assert preset.draft_repo is not None and preset.draft_file is not None + try: + size, _ = _file_meta(api, preset.draft_repo, preset.draft_file) + local = cfg.models_dir / "draft" / preset.draft_file + out["draft_present"] = local.exists() and local.stat().st_size == size + except Exception: + out["draft_present"] = False + else: + out["draft_present"] = True + return out + + +def recommend_preset(host: HostFacts) -> str | None: + """Pick a default preset for first-run install. None = ask the user. + + Tiers follow the model size catalog: 22 GB+ → Qwen3.6-27B (the + Lucebox default), 16-21 GB → Laguna-XS.2 (small target-only). Below + 16 GB we punt and let the user pick explicitly — the registered + presets all need at least 16 GB to run usefully. + """ + if host.vram_gb >= 22: + return "qwen3.6-27b" + if host.vram_gb >= 16: + return "laguna-xs.2" + return None diff --git a/lucebox/src/lucebox/host_check.py b/lucebox/src/lucebox/host_check.py new file mode 100644 index 000000000..2ce8d3889 --- /dev/null +++ b/lucebox/src/lucebox/host_check.py @@ -0,0 +1,232 @@ +"""Readiness check: aggregate HostFacts (provided by lucebox.sh) with the +docker-daemon checks we can do from inside the container via the mounted +socket. Prints a status report and returns an aggregate severity. +""" + +from __future__ import annotations + +import os +from dataclasses import dataclass +from typing import Literal + +from rich.console import Console + +from lucebox.types import HostFacts + +Severity = Literal["ok", "warn", "fail"] +_SEVERITY_ORDER: dict[Severity, int] = {"ok": 0, "warn": 1, "fail": 2} + + +@dataclass(frozen=True, slots=True) +class CheckResult: + name: str + severity: Severity + message: str + hint: str | None = None + + +def run_checks(host: HostFacts) -> list[CheckResult]: + return [ + _check_docker(host), + _check_nvidia_driver(host), + _check_ctk(host), + _check_ram(host), + _check_vram(host), + _check_systemd(host), + ] + + +def _check_docker(host: HostFacts) -> CheckResult: + if not host.has_docker: + return CheckResult( + "docker", + "fail", + "docker daemon unreachable", + "sudo systemctl start docker, or add your user to the 'docker' group", + ) + return CheckResult("docker", "ok", f"daemon reachable ({host.docker_version})") + + +def _check_nvidia_driver(host: HostFacts) -> CheckResult: + if host.gpu_vendor != "nvidia": + if host.gpu_vendor == "amd": + return CheckResult( + "gpu", + "fail", + "AMD GPU detected — prebuilt images are NVIDIA-only", + "Build dflash from source with HIP; see dflash/README.md", + ) + return CheckResult("gpu", "fail", "no NVIDIA GPU detected") + if not host.driver_version: + return CheckResult( + "driver", + "warn", + "nvidia-smi present but NVML query failed (likely driver/library mismatch)", + "reboot, or reinstall the matching NVIDIA driver", + ) + if host.driver_major < 525: + return CheckResult( + "driver", + "fail", + f"driver r{host.driver_major} too old (need r525+ for cuda12)", + "upgrade the NVIDIA driver", + ) + return CheckResult("driver", "ok", f"nvidia r{host.driver_major} ({host.driver_version})") + + +def _check_ctk(host: HostFacts) -> CheckResult: + match host.ctk: + case "runtime": + return CheckResult("ctk", "ok", "NVIDIA Container Toolkit registered as docker runtime") + case "cdi": + return CheckResult("ctk", "ok", "NVIDIA Container Toolkit available via CDI") + case "installed-unwired": + return CheckResult( + "ctk", + "warn", + "NVIDIA Container Toolkit installed but not wired into docker", + "sudo nvidia-ctk runtime configure --runtime=docker && " + "sudo systemctl restart docker", + ) + case _: + return CheckResult( + "ctk", + "fail", + "NVIDIA Container Toolkit not installed", + "https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html", + ) + + +def _check_ram(host: HostFacts) -> CheckResult: + if host.ram_gb == 0: + return CheckResult("ram", "warn", "RAM unknown") + if host.ram_gb < 16: + return CheckResult("ram", "warn", f"{host.ram_gb} GB RAM — model load may swap") + return CheckResult("ram", "ok", f"{host.ram_gb} GB RAM") + + +def _check_vram(host: HostFacts) -> CheckResult: + if host.vram_gb == 0: + return CheckResult("vram", "warn", "VRAM unknown") + if host.vram_gb < 12: + return CheckResult( + "vram", + "fail", + f"VRAM {host.vram_gb} GB < 12 GB — 27B target won't fit", + "use a smaller model preset or larger GPU", + ) + if host.vram_gb < 22: + return CheckResult( + "vram", + "warn", + f"VRAM {host.vram_gb} GB — 27B fits but max_ctx will be capped near 32K", + ) + return CheckResult("vram", "ok", f"VRAM {host.vram_gb} GB ({host.gpu_name})") + + +def _check_systemd(host: HostFacts) -> CheckResult: + if not host.has_systemd: + return CheckResult( + "systemd", + "warn", + "user systemd not available", + "WSL: enable systemd in /etc/wsl.conf; otherwise 'lucebox serve' " + "still works in the foreground", + ) + return CheckResult("systemd", "ok", "user systemd available") + + +def aggregate(results: list[CheckResult]) -> Severity: + worst: Severity = "ok" + for r in results: + if _SEVERITY_ORDER[r.severity] > _SEVERITY_ORDER[worst]: + worst = r.severity + return worst + + +def render(console: Console, host: HostFacts, results: list[CheckResult]) -> Severity: + """Print a status block, return the worst severity.""" + summary = f"[bold]Host:[/bold] {host.nproc} CPUs · {host.ram_gb} GB RAM" + if host.gpu_vendor == "nvidia" and host.gpu_name: + summary += f" · {host.gpu_name} · {host.vram_gb} GB VRAM" + ( + f" (sm_{host.gpu_sm})" if host.gpu_sm else "" + ) + if host.is_wsl: + summary += " · WSL2" + console.print(summary) + console.print() + + sev_style = { + "ok": "[green]OK[/green]", + "warn": "[yellow]WARN[/yellow]", + "fail": "[red]FAIL[/red]", + } + for r in results: + console.print(f" {sev_style[r.severity]:<22} {r.name:<8} {r.message}") + if r.hint: + console.print(f" {'':<22} {'':<8} [dim]{r.hint}[/dim]") + + render_host_facts(console) + + worst = aggregate(results) + console.print() + if worst == "ok": + console.print("[green]All checks passed.[/green]") + elif worst == "warn": + console.print("[yellow]Checks passed with warnings.[/yellow]") + else: + console.print( + "[red]Critical checks failed — fix the issues above before 'lucebox start'.[/red]" + ) + return worst + + +def render_host_facts(console: Console) -> None: + """Print a pretty 'Host facts' section sourced from LUCEBOX_HOST_*. + + Same data that ends up in /opt/lucebox-hub/HOST_INFO inside the + container — printed here so the operator can sanity-check the + rig classification BEFORE starting a long bench run, and so the + CI exit-code gate (the pass/fail checks above) stays orthogonal + to the informational host facts. + + Reads from the same LUCEBOX_HOST_* env the host wrapper exports + (see lucebox.sh::probe_host). Quiet — emits the section header + even when most facts are unset, since "no host facts probed at + all" is itself a useful signal. + """ + console.print() + console.print("[bold]Host facts[/bold] (LUCEBOX_HOST_*, surfaced as /props.host)") + facts = [ + ("os", os.environ.get("LUCEBOX_HOST_OS_PRETTY", "")), + ("kernel", os.environ.get("LUCEBOX_HOST_KERNEL", "")), + ("wsl_version", os.environ.get("LUCEBOX_HOST_WSL_VERSION", "")), + ("docker", os.environ.get("LUCEBOX_HOST_DOCKER_VERSION", "")), + ("nvidia_driver", os.environ.get("LUCEBOX_HOST_DRIVER_VERSION", "")), + ("nvidia_ctk", os.environ.get("LUCEBOX_HOST_NVIDIA_CTK_VERSION", "")), + ("cpu", os.environ.get("LUCEBOX_HOST_CPU_MODEL", "")), + ("cuda_visible_devices", os.environ.get("LUCEBOX_HOST_CUDA_VISIBLE_DEVICES", "")), + ] + for key, value in facts: + display = value if value else "[dim](unset)[/dim]" + console.print(f" {key:<22} {display}") + + # Multi-GPU table — one line per device. LUCEBOX_HOST_GPU_LIST_CSV + # carries the verbatim nvidia-smi CSV the host wrapper probed. + csv = os.environ.get("LUCEBOX_HOST_GPU_LIST_CSV", "") + if csv: + console.print(" gpus:") + for line in csv.splitlines(): + line = line.strip() + if not line: + continue + parts = [c.strip() for c in line.split(",")] + if len(parts) >= 7: + idx, _uuid, _pci, name, sm, mem, plimit = parts[:7] + console.print( + f" [{idx}] {name} (sm_{sm}, {mem}, {plimit})" + ) + else: + console.print(f" {line}") + else: + console.print(" gpus [dim](none — nvidia-smi unavailable)[/dim]") diff --git a/lucebox/src/lucebox/host_facts.py b/lucebox/src/lucebox/host_facts.py new file mode 100644 index 000000000..5deb6721a --- /dev/null +++ b/lucebox/src/lucebox/host_facts.py @@ -0,0 +1,58 @@ +"""Read HostFacts from the LUCEBOX_HOST_* env vars that lucebox.sh exports. + +We deliberately don't try to detect anything ourselves on the Python side — +inside the container, /proc/meminfo reports the container's view, not the +host's, and nvidia-smi may or may not be available depending on how the +caller invoked us. The host wrapper is the only thing that can see the +truth, and it's already paid for the probe. +""" + +from __future__ import annotations + +import os +from typing import cast + +from lucebox.types import CtkStatus, GpuVendor, HostFacts + + +def _env_int(key: str, default: int = 0) -> int: + raw = os.environ.get(key, "").strip() + if not raw: + return default + try: + return int(raw) + except ValueError: + return default + + +def _env_bool(key: str) -> bool: + return os.environ.get(key, "").strip() in {"1", "true", "yes", "on"} + + +def from_env() -> HostFacts: + vendor: GpuVendor = "none" + raw_vendor = os.environ.get("LUCEBOX_HOST_GPU_VENDOR", "none") + if raw_vendor in {"nvidia", "amd", "none"}: + vendor = cast(GpuVendor, raw_vendor) + + ctk: CtkStatus = "none" + raw_ctk = os.environ.get("LUCEBOX_HOST_HAS_CTK", "none") + if raw_ctk in {"runtime", "cdi", "installed-unwired", "none"}: + ctk = cast(CtkStatus, raw_ctk) + + return HostFacts( + nproc=_env_int("LUCEBOX_HOST_NPROC"), + ram_gb=_env_int("LUCEBOX_HOST_RAM_GB"), + gpu_vendor=vendor, + gpu_name=os.environ.get("LUCEBOX_HOST_GPU_NAME", ""), + gpu_count=_env_int("LUCEBOX_HOST_GPU_COUNT"), + vram_gb=_env_int("LUCEBOX_HOST_VRAM_GB"), + gpu_sm=os.environ.get("LUCEBOX_HOST_GPU_SM", ""), + driver_version=os.environ.get("LUCEBOX_HOST_DRIVER_VERSION", ""), + driver_major=_env_int("LUCEBOX_HOST_DRIVER_MAJOR"), + has_systemd=_env_bool("LUCEBOX_HOST_HAS_SYSTEMD"), + is_wsl=_env_bool("LUCEBOX_HOST_IS_WSL"), + has_docker=_env_bool("LUCEBOX_HOST_HAS_DOCKER"), + docker_version=os.environ.get("LUCEBOX_HOST_DOCKER_VERSION", ""), + ctk=ctk, + ) diff --git a/lucebox/src/lucebox/py.typed b/lucebox/src/lucebox/py.typed new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/lucebox/src/lucebox/py.typed @@ -0,0 +1 @@ + diff --git a/lucebox/src/lucebox/types.py b/lucebox/src/lucebox/types.py new file mode 100644 index 000000000..e1d3620d7 --- /dev/null +++ b/lucebox/src/lucebox/types.py @@ -0,0 +1,140 @@ +"""Shared dataclasses passed between modules. + +HostFacts is populated from the LUCEBOX_HOST_* env vars set by lucebox.sh. +Config is what we serialize to/from .lucebox/config.toml. Both are frozen so +mistakes (e.g. mutating a config after autotune wrote it) fail loudly. +""" + +from __future__ import annotations + +import os +from dataclasses import dataclass, field +from pathlib import Path +from typing import Literal + +Variant = str +CtkStatus = Literal["runtime", "cdi", "installed-unwired", "none"] + + +def default_models_dir() -> Path: + """Resolve the default models directory under the XDG Base Directory spec. + + $XDG_DATA_HOME (default ~/.local/share) is the conventional location for + user-specific data files on Linux + macOS. Lucebox nests its model store + under that so downloads live alongside other per-user app data instead + of cluttering $HOME directly. The host wrapper bind-mounts this path + into the container so paths line up in and out of the image. + """ + base = os.environ.get("XDG_DATA_HOME") or str(Path.home() / ".local" / "share") + return Path(base) / "lucebox" / "models" + + +GpuVendor = Literal["nvidia", "amd", "none"] + + +@dataclass(frozen=True, slots=True) +class HostFacts: + """Probed once by lucebox.sh, passed in via env vars. Single source of + truth on the Python side — we never reprobe (we can't see host /proc).""" + + nproc: int = 0 + ram_gb: int = 0 + gpu_vendor: GpuVendor = "none" + gpu_name: str = "" + gpu_count: int = 0 + vram_gb: int = 0 + gpu_sm: str = "" # e.g. "120" — matches docker-bake arch lists + driver_version: str = "" # e.g. "595.71.05" + driver_major: int = 0 + has_systemd: bool = False + is_wsl: bool = False + has_docker: bool = False + docker_version: str = "" + ctk: CtkStatus = "none" + + +@dataclass(frozen=True, slots=True) +class DflashRuntime: + """The DFLASH_* knobs as typed values. Serialized under [dflash] in TOML + and emitted as -e DFLASH_FOO=bar args to docker run. + + The 11 fields below (budget through prefill_drafter) form the strict + allowlist mirrored by lucebench's snapshot config.json — keep both + in lockstep. ``think_max`` is a separate phase-1 thinking cap that + isn't part of the runtime snapshot allowlist (it's per-request, not + per-server). + """ + + budget: int = 22 + max_ctx: int = 16384 + lazy: bool = False + prefix_cache_slots: int = 0 + prefill_cache_slots: int = 0 + cache_type_k: str = "" + cache_type_v: str = "" + prefill_mode: Literal["off", "auto", "always"] = "off" + prefill_keep_ratio: float = 0.05 + prefill_threshold: int = 32000 + prefill_drafter: str = "" + # Phase-1 (thinking) cap when a request opts into thinking. Default mirrors + # antirez/ds4 ds4_eval.c: think_max_tokens = max_tokens - hard_limit_reply + # budget = 16000 - 512 = 15488. The server's own hardcoded default is 10000. + think_max: int = 15488 + # Flash-attention sliding-window on full-attention layers. 0 = full + # attention (server default). On gemma4's hybrid iSWA the full-attn + # layers grow KV linearly with max_ctx; a sparse fa_window keeps + # decode compute bounded on long prompts without changing the KV + # footprint. Q: passed through to the server's `--fa-window ` + # flag (see server/src/server/server_main.cpp). + fa_window: int = 0 + # Soft-close thinking termination dial (PR #326 in lucebox-hub). + # Lets the AR loop force early when the close-token logit + # comes within this probability ratio of the chosen-token logit. + # Range [0.0, 1.0]; 0.0 = disabled (byte-identical to pre-change + # behaviour). 0.5 = close when close-token prob >= 0.5 * chosen-token + # prob; 0.9 = aggressive. Qwen3.5/3.6 AR path only in v1. Surfaced + # to the server via DFLASH_THINK_SOFT_CLOSE_MIN_RATIO → + # --think-soft-close-min-ratio. + think_soft_close_min_ratio: float = 0.0 + # Diagnostic: when True, surface --debug-thinking-logits to the + # server CLI via DFLASH_DEBUG_THINKING_LOGITS=1, producing one + # stderr line per thinking AR step recording the close-vs-chosen + # logit gap. Used to fit a sliding-ratio curve from real trajectory + # data. Heavy stderr (one line per thinking token across all + # in-flight requests); leave off in production. + debug_thinking_logits: bool = False + + +@dataclass(frozen=True, slots=True) +class ModelMeta: + """Which preset the operator picked at configure/download time. + + Persisted under ``[model]`` in config.toml so `lucebox serve` can + pass ``DFLASH_TARGET=/opt/lucebox-hub/server/models/`` and + ``DFLASH_DRAFT`` for the draft GGUF (when one is published for the + preset). The entrypoint's "multiple candidate GGUFs" branch never + has to guess which one to load. + + ``target_file`` and ``draft_file`` are advanced overrides — when set + they win over the preset's registry default. Empty strings mean + "fall back to the registry value for [model] preset, then to the + entrypoint's autodetect". + """ + + preset: str = "" + target_file: str = "" + draft_file: str = "" + + +@dataclass(frozen=True, slots=True) +class Config: + """The whole config.toml, materialized.""" + + variant: Variant = "cuda12" + image: str = "ghcr.io/luce-org/lucebox-hub" + container_name: str = "lucebox" + port: int = 8080 + models_dir: Path = field(default_factory=default_models_dir) + dflash: DflashRuntime = field(default_factory=DflashRuntime) + host: HostFacts = field(default_factory=HostFacts) + model: ModelMeta = field(default_factory=ModelMeta) diff --git a/lucebox/tests/test_autotune.py b/lucebox/tests/test_autotune.py new file mode 100644 index 000000000..c47c17c9e --- /dev/null +++ b/lucebox/tests/test_autotune.py @@ -0,0 +1,48 @@ +from lucebox.autotune import runtime_from_host +from lucebox.types import HostFacts + + +def test_wsl_24gb_defaults_leave_cuda_headroom() -> None: + runtime = runtime_from_host(HostFacts(vram_gb=24, is_wsl=True)) + + assert runtime.budget == 16 + # Bumped 65536 → 98304 on 2026-05-30 after the gemma4-26b coding- + # agent-loop sweep proved 98K serves 90K-token agentic prompts + # with ~3 GB VRAM headroom and no CUDA VMM failures on the 3090 Ti + # WSL configuration (see + # docs/experiments/gemma4-26b-coding-agent-loop-sweep-2026-05-30.md). + assert runtime.max_ctx == 98304 + # lazy is False because the heuristic path does NOT set prefill_drafter, + # and the C++ server silently ignores --lazy-draft without it. Flipping + # to False makes the host config match runtime behaviour. See the + # `entrypoint.sh` warning emitted when the two are out-of-sync. + assert runtime.lazy is False + assert runtime.prefix_cache_slots == 0 + + +def test_native_24gb_caps_context_below_vmm_failure_boundary() -> None: + runtime = runtime_from_host(HostFacts(vram_gb=24, is_wsl=False)) + + assert runtime.budget == 22 + assert runtime.max_ctx == 98304 + assert runtime.lazy is False # see WSL test above + assert runtime.prefix_cache_slots == 0 + + +def test_no_heuristic_tier_sets_lazy_without_prefill_drafter() -> None: + """Regression for the `--lazy-draft ignored` silent no-op. + + The C++ dflash_server drops `--lazy-draft` unless `--prefill-drafter` + is also passed. The heuristic doesn't set `prefill_drafter`, so any + tier that sets `lazy=True` would produce a host config that doesn't + match what actually ran — exactly the mismatch the sindri decode + sweep tripped over (every docker.stderr contained the warning). + """ + for vram in (0, 8, 16, 24, 40, 80): + for is_wsl in (False, True): + rt = runtime_from_host(HostFacts(vram_gb=vram, is_wsl=is_wsl)) + if rt.lazy: + assert rt.prefill_drafter, ( + f"vram={vram} is_wsl={is_wsl}: lazy=True without " + f"prefill_drafter → silent no-op on the C++ server" + ) diff --git a/lucebox/tests/test_check.py b/lucebox/tests/test_check.py new file mode 100644 index 000000000..3fdd469d9 --- /dev/null +++ b/lucebox/tests/test_check.py @@ -0,0 +1,118 @@ +"""Tests for ``lucebox check`` — readiness report. + +The check command has two surfaces that must stay independent: + + * pass/fail checks → drive the exit code, so the command is usable + as a CI exit-code gate; + * Host facts section → informational, prints the LUCEBOX_HOST_* + convoy that gets baked into /opt/lucebox-hub/HOST_INFO inside + the container. +""" + +from __future__ import annotations + +import pytest +from lucebox.cli import app +from lucebox.types import HostFacts +from rich.console import Console +from typer.testing import CliRunner + +from lucebox import host_check + + +def test_check_prints_host_facts_section(monkeypatch: pytest.MonkeyPatch) -> None: + """`lucebox check` includes a Host facts block sourced from LUCEBOX_HOST_*.""" + monkeypatch.setenv("LUCEBOX_HOST_OS_PRETTY", "Ubuntu 22.04.3 LTS") + monkeypatch.setenv("LUCEBOX_HOST_KERNEL", "6.6.87.2-microsoft-standard-WSL2") + monkeypatch.setenv("LUCEBOX_HOST_WSL_VERSION", "wsl2") + monkeypatch.setenv("LUCEBOX_HOST_DOCKER_VERSION", "29.1.3") + monkeypatch.setenv("LUCEBOX_HOST_DRIVER_VERSION", "596.36") + monkeypatch.setenv("LUCEBOX_HOST_NVIDIA_CTK_VERSION", "1.16.2") + monkeypatch.setenv("LUCEBOX_HOST_CPU_MODEL", "Intel Test CPU") + monkeypatch.setenv( + "LUCEBOX_HOST_GPU_LIST_CSV", + "0, GPU-abc, 00000000:01:00.0, NVIDIA RTX 5090, 12.0, 24576 MiB, 175.00 W", + ) + # Stub HostFacts so the pass/fail checks succeed at least minimally. + # `cli.check` imports `from_env` into its module namespace, so patch + # both names. + def stub() -> HostFacts: + return HostFacts( + nproc=24, + ram_gb=64, + gpu_vendor="nvidia", + gpu_name="NVIDIA RTX 5090", + gpu_count=1, + vram_gb=24, + gpu_sm="120", + driver_version="596.36", + driver_major=596, + has_systemd=True, + is_wsl=True, + has_docker=True, + docker_version="29.1.3", + ctk="runtime", + ) + monkeypatch.setattr("lucebox.host_facts.from_env", stub) + monkeypatch.setattr("lucebox.cli.from_env", stub) + result = CliRunner().invoke(app, ["check"]) + # The pass/fail half of `check` should still exit 0 on this stubbed host. + assert result.exit_code == 0, result.stdout + assert "Host facts" in result.stdout + assert "Ubuntu 22.04.3 LTS" in result.stdout + assert "wsl2" in result.stdout + assert "1.16.2" in result.stdout + assert "Intel Test CPU" in result.stdout + # Multi-GPU table line. + assert "NVIDIA RTX 5090" in result.stdout + + +def test_render_host_facts_unset_env_shows_placeholders( + monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str] +) -> None: + """All LUCEBOX_HOST_* unset → section still renders with explicit (unset) markers.""" + for k in list(__import__("os").environ): + if k.startswith("LUCEBOX_HOST_"): + monkeypatch.delenv(k, raising=False) + console = Console(force_terminal=False, no_color=True, record=True) + host_check.render_host_facts(console) + text = console.export_text() + assert "Host facts" in text + # Multi-line section renders even when no env was passed in. + assert "(unset)" in text + assert "gpus" in text + + +def test_check_exit_code_independent_of_host_facts( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Host facts section must not change the exit-code semantics of check. + + Drives the pass/fail logic through a known-fail HostFacts (no docker) + and asserts the exit code is still 1, regardless of what the Host + facts block prints. + """ + monkeypatch.setenv("LUCEBOX_HOST_OS_PRETTY", "Bare Linux") + def stub() -> HostFacts: + return HostFacts( + nproc=8, + ram_gb=16, + gpu_vendor="nvidia", + gpu_name="X", + gpu_count=1, + vram_gb=24, + gpu_sm="86", + driver_version="555.00", + driver_major=555, + has_systemd=False, + is_wsl=False, + has_docker=False, # → fail + docker_version="", + ctk="none", # also fail + ) + monkeypatch.setattr("lucebox.host_facts.from_env", stub) + monkeypatch.setattr("lucebox.cli.from_env", stub) + result = CliRunner().invoke(app, ["check"]) + assert result.exit_code == 1 + # Host facts block still printed despite the failure. + assert "Host facts" in result.stdout diff --git a/lucebox/tests/test_cli.py b/lucebox/tests/test_cli.py new file mode 100644 index 000000000..f7628e8b7 --- /dev/null +++ b/lucebox/tests/test_cli.py @@ -0,0 +1,102 @@ +"""Tests for the top-level Typer surface.""" + +from __future__ import annotations + +import os + +import pytest +from lucebox.cli import app +from typer.testing import CliRunner + + +def test_config_subcommand_is_registered() -> None: + result = CliRunner().invoke(app, ["config", "--help"]) + assert result.exit_code == 0 + assert "get" in result.output + assert "set" in result.output + assert "unset" in result.output + + +def test_models_subcommand_is_registered() -> None: + result = CliRunner().invoke(app, ["models", "--help"]) + assert result.exit_code == 0 + assert "list" in result.output + assert "download" in result.output + + +@pytest.mark.parametrize( + "verb", + [ + "autotune", + "sweep", + "profile", + "smoke", + "claude", + "codex", + "opencode", + "hermes", + "pi", + "openclaw", + ], +) +def test_deferred_verbs_are_not_registered(verb: str) -> None: + """autotune/sweep, profile/smoke and the client launchers are deferred to + follow-up PRs — this core CLI (launch / serve / install / download) must + not expose them.""" + result = CliRunner().invoke(app, [verb, "--help"]) + assert result.exit_code != 0 + + +def test_core_verbs_present_in_app() -> None: + """The core launch/serve surface stays wired into the Typer command table.""" + registered = { + c.name or (c.callback.__name__ if c.callback else "") + for c in app.registered_commands + } + for verb in ("check", "pull", "print-run", "print-serve-argv", "version"): + assert verb in registered + + +def test_legacy_subcommands_are_removed() -> None: + """`configure` and `download-models` were folded into config/models.""" + cfg = CliRunner().invoke(app, ["configure", "--help"]) + assert cfg.exit_code != 0 + dl = CliRunner().invoke(app, ["download-models", "--help"]) + assert dl.exit_code != 0 + + +def test_server_run_spec_forwards_lucebox_host_env(monkeypatch) -> None: + """server_run_spec carries LUCEBOX_HOST_* from the orchestrator into the server. + + lucebox.sh exports the LUCEBOX_HOST_* convoy before `docker run` on the + orchestrator; the orchestrator inherits them and we forward each one + as ``-e KEY=VALUE`` to the server container so entrypoint.sh's + write_host_info() can populate /opt/lucebox-hub/HOST_INFO. + """ + import lucebox.docker_run as docker_run + from lucebox.config import live_config + + # Scrub any pre-existing LUCEBOX_HOST_* env so the test sees only what we set. + for k in list(os.environ): + if k.startswith("LUCEBOX_HOST_"): + monkeypatch.delenv(k, raising=False) + monkeypatch.setenv("LUCEBOX_HOST_OS_PRETTY", "Ubuntu 22.04.3 LTS") + monkeypatch.setenv("LUCEBOX_HOST_KERNEL", "6.6.87.2-microsoft-standard-WSL2") + monkeypatch.setenv("LUCEBOX_HOST_WSL_VERSION", "wsl2") + monkeypatch.setenv( + "LUCEBOX_HOST_GPU_LIST_CSV", + "0, GPU-x, 00000000:01:00.0, NVIDIA RTX 5090, 12.0, 24576 MiB, 175.00 W", + ) + + cfg = live_config() + spec = docker_run.server_run_spec(cfg) + env_keys = {k for k, _ in spec.env} + assert "LUCEBOX_HOST_OS_PRETTY" in env_keys + assert "LUCEBOX_HOST_KERNEL" in env_keys + assert "LUCEBOX_HOST_WSL_VERSION" in env_keys + assert "LUCEBOX_HOST_GPU_LIST_CSV" in env_keys + # DFLASH_* still present. + assert "DFLASH_BUDGET" in env_keys + # Values surface verbatim. + env_map = dict(spec.env) + assert env_map["LUCEBOX_HOST_OS_PRETTY"] == "Ubuntu 22.04.3 LTS" diff --git a/lucebox/tests/test_config.py b/lucebox/tests/test_config.py new file mode 100644 index 000000000..d60f3d882 --- /dev/null +++ b/lucebox/tests/test_config.py @@ -0,0 +1,176 @@ +"""Tests for the sparse TOML config persistence layer.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest +from lucebox.config import config_get, config_set, config_unset + +from lucebox import config + + +def test_legacy_env_migration_skips_invalid_values(tmp_path: Path) -> None: + legacy = tmp_path / "config.env" + legacy.write_text("DFLASH_BUDGET=not-an-int\nDFLASH_MAX_CTX=65536\nDFLASH_LAZY=true\n") + + cfg, _doc = config._load_legacy_env(legacy) + + assert cfg.dflash.budget == 22 + assert cfg.dflash.max_ctx == 65536 + assert cfg.dflash.lazy is True + + +def test_image_variant_round_trips_from_toml(tmp_path: Path) -> None: + path = tmp_path / "config.toml" + path.write_text( + "[image]\n" + 'registry = "ghcr.io/luce-org/lucebox-hub"\n' + 'variant = "integration-props-uv-squared-clean-cuda12"\n' + ) + + cfg = config._load_toml(path) + + assert cfg.image == "ghcr.io/luce-org/lucebox-hub" + assert cfg.variant == "integration-props-uv-squared-clean-cuda12" + + +def test_model_preset_round_trips_through_set_and_load(tmp_path: Path) -> None: + """Setting model.preset writes a sparse TOML doc that loads back correctly.""" + path = tmp_path / "config.toml" + config_set("model.preset", "gemma-4-26b", path=path) + config_set("model.target_file", "google_gemma-4-26B-A4B-it-Q4_K_M.gguf", path=path) + + cfg = config._load_toml(path) + assert cfg.model.preset == "gemma-4-26b" + assert cfg.model.target_file == "google_gemma-4-26B-A4B-it-Q4_K_M.gguf" + + +def test_legacy_config_without_model_section_stays_unpinned(tmp_path: Path) -> None: + """Legacy configs (no [model] section) must NOT silently pin to qwen.""" + path = tmp_path / "config.toml" + path.write_text('[image]\nvariant = "cuda12"\n') + + cfg = config._load_toml(path) + + assert cfg.model.preset == "" + assert cfg.model.target_file == "" + assert cfg.model.draft_file == "" + + +def test_model_section_picks_target_file_from_registry(tmp_path: Path) -> None: + """A bare [model] preset="..." entry pulls target_file from the registry.""" + path = tmp_path / "config.toml" + path.write_text('[model]\npreset = "gemma-4-31b"\n') + + cfg = config._load_toml(path) + + assert cfg.model.preset == "gemma-4-31b" + assert cfg.model.target_file == "google_gemma-4-31B-it-Q4_K_M.gguf" + + +def test_model_section_picks_draft_file_from_registry(tmp_path: Path) -> None: + """When preset has a published draft GGUF, [model] preset="..." picks draft_file too.""" + path = tmp_path / "config.toml" + path.write_text('[model]\npreset = "qwen3.6-27b"\n') + + cfg = config._load_toml(path) + assert cfg.model.preset == "qwen3.6-27b" + assert cfg.model.draft_file == "dflash-draft-3.6-q4_k_m.gguf" + + +def test_config_set_writes_only_named_key(tmp_path: Path) -> None: + """Sparse persistence: setting one key does NOT serialize every default.""" + path = tmp_path / "config.toml" + config_set("dflash.budget", 16, path=path) + body = path.read_text() + # The only [dflash] field that should appear is budget — none of the others. + assert "[dflash]" in body + assert "budget = 16" in body + assert "max_ctx" not in body # not user-set, must not appear + assert "lazy" not in body + assert "[host]" not in body # whole section absent + assert "[image]" not in body # not touched either + + +def test_config_set_preserves_existing_keys(tmp_path: Path) -> None: + """Setting a new key leaves previously-set keys intact.""" + path = tmp_path / "config.toml" + config_set("dflash.budget", 16, path=path) + config_set("model.preset", "qwen3.6-27b", path=path) + body = path.read_text() + assert "budget = 16" in body + assert 'preset = "qwen3.6-27b"' in body + + +def test_config_unset_removes_one_key(tmp_path: Path) -> None: + """Unset removes the named key and leaves siblings alone.""" + path = tmp_path / "config.toml" + config_set("dflash.budget", 16, path=path) + config_set("dflash.max_ctx", 65536, path=path) + changed = config_unset("dflash.budget", path=path) + assert changed is True + body = path.read_text() + assert "budget" not in body + assert "max_ctx = 65536" in body + + +def test_config_unset_drops_empty_section(tmp_path: Path) -> None: + """Unsetting the last key in a section drops the empty section.""" + path = tmp_path / "config.toml" + config_set("dflash.budget", 16, path=path) + config_unset("dflash.budget", path=path) + body = path.read_text() + # The section may still exist as an empty table but `[dflash]` shouldn't. + assert "[dflash]" not in body + + +def test_config_get_reports_origin(tmp_path: Path) -> None: + """Each key carries an origin label — `file` when overridden, `default` otherwise.""" + path = tmp_path / "config.toml" + config_set("dflash.budget", 9, path=path) + entries = config_get(path=path) + assert entries["dflash.budget"] == (9, "file") + # max_ctx wasn't set so should report the live default. + value, origin = entries["dflash.max_ctx"] + assert origin == "default" + assert value == 16384 # DflashRuntime.max_ctx default + + +def test_config_get_rejects_unknown_key(tmp_path: Path) -> None: + path = tmp_path / "config.toml" + with pytest.raises(KeyError): + config_get("not.a.key", path=path) + + +def test_config_set_rejects_unknown_key(tmp_path: Path) -> None: + path = tmp_path / "config.toml" + with pytest.raises(KeyError): + config_set("not.a.key", 1, path=path) + + +def test_config_set_auto_creates_file(tmp_path: Path) -> None: + """`config set` creates a missing config.toml on first write.""" + path = tmp_path / "config.toml" + assert not path.exists() + config_set("port", 9090, path=path) + assert path.exists() + assert "port = 9090" in path.read_text() + + +def test_save_writes_sparse_doc(tmp_path: Path) -> None: + """`save` writes whatever doc is handed in — no defaults serialized.""" + path = tmp_path / "config.toml" + cfg = config._from_dict({}) + config.save(cfg, path, doc={"dflash": {"budget": 9}}) + body = path.read_text() + assert "budget = 9" in body + assert "max_ctx" not in body + + +def test_live_config_uses_recommend_preset_indirectly(tmp_path: Path) -> None: + """``live_config()`` returns a Config — no implicit preset when none given.""" + # The function probes the env-provided HostFacts; with no preset arg + # we must NOT silently pin one (that would surprise legacy installs). + cfg = config.live_config() + assert cfg.model.preset == "" diff --git a/lucebox/tests/test_config_cli.py b/lucebox/tests/test_config_cli.py new file mode 100644 index 000000000..446ab41b6 --- /dev/null +++ b/lucebox/tests/test_config_cli.py @@ -0,0 +1,127 @@ +"""Tests for the ``lucebox config`` sub-app CLI.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest +from lucebox.cli import app +from typer.testing import CliRunner + + +def _set_config_path(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path: + monkeypatch.setenv("LUCEBOX_HOME", str(tmp_path)) + return tmp_path / "config.toml" + + +def test_config_set_then_get_round_trip( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + cfg_path = _set_config_path(tmp_path, monkeypatch) + set_result = CliRunner().invoke(app, ["config", "set", "dflash.budget=12"]) + assert set_result.exit_code == 0 + assert cfg_path.exists() + get_result = CliRunner().invoke(app, ["config", "get", "dflash.budget"]) + assert get_result.exit_code == 0 + assert "12" in get_result.stdout + assert "from file" in get_result.stdout + + +def test_config_get_with_no_key_lists_every_registered_key( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + _set_config_path(tmp_path, monkeypatch) + result = CliRunner().invoke(app, ["config", "get"]) + assert result.exit_code == 0 + # Every registered dotted key shows up at least once. + for key in ("model.preset", "dflash.budget", "port"): + assert key in result.stdout + + +def test_config_unset_drops_key( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + cfg_path = _set_config_path(tmp_path, monkeypatch) + CliRunner().invoke(app, ["config", "set", "dflash.budget=9"]) + assert "budget = 9" in cfg_path.read_text() + unset_result = CliRunner().invoke(app, ["config", "unset", "dflash.budget"]) + assert unset_result.exit_code == 0 + body = cfg_path.read_text() + assert "budget" not in body + + +def test_config_set_unknown_key_errors( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + _set_config_path(tmp_path, monkeypatch) + result = CliRunner().invoke(app, ["config", "set", "totally.unknown=1"]) + assert result.exit_code == 2 + + +def test_config_set_rejects_missing_equals( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + _set_config_path(tmp_path, monkeypatch) + result = CliRunner().invoke(app, ["config", "set", "dflash.budget"]) + assert result.exit_code == 2 + + +def test_config_set_creates_file_when_missing( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + cfg_path = _set_config_path(tmp_path, monkeypatch) + assert not cfg_path.exists() + CliRunner().invoke(app, ["config", "set", "port=9090"]) + assert cfg_path.exists() + assert "port = 9090" in cfg_path.read_text() + + +def test_load_or_build_env_overrides_persisted_config( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """LUCEBOX_* env vars must win over config.toml. + + Regression test for the precedence bug fixed in this commit: prior + to the fix, `_load_or_build()` returned `config_mod.load()`'s result + verbatim when config.toml existed, so the systemd unit's + `Environment=LUCEBOX_IMAGE=...` was silently ignored. Sindri's + config.toml had `[image]` without `registry`, which made the + dataclass default `ghcr.io/luce-org/lucebox-hub` win over the + intended easel image. + """ + from lucebox.cli import _load_or_build + + cfg_path = _set_config_path(tmp_path, monkeypatch) + # Write a config.toml WITHOUT an image.registry line — the + # bug-trigger shape on sindri. + cfg_path.write_text( + '[image]\nvariant = "cuda12"\n[runtime]\nport = 9090\n' + '[dflash]\nbudget = 22\n' + ) + # Env should override what config.toml says (and what dataclass + # defaults fill in for missing keys). + monkeypatch.setenv("LUCEBOX_IMAGE", "ghcr.io/myfork/lucebox-hub") + monkeypatch.setenv("LUCEBOX_PORT", "7777") + monkeypatch.setenv("LUCEBOX_CONTAINER", "lucebox-test") + cfg = _load_or_build() + assert cfg.image == "ghcr.io/myfork/lucebox-hub" # env beats dataclass default + assert cfg.port == 7777 # env beats config.toml + assert cfg.container_name == "lucebox-test" # env applied + # variant is in config.toml — config.toml value (no env override). + assert cfg.variant == "cuda12" + # dflash IS persisted in config.toml — env doesn't touch it (no DFLASH_* + # env hooks at this layer). + assert cfg.dflash.budget == 22 + + +def test_load_or_build_no_toml_env_overrides_defaults( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """When config.toml is absent, env must still override defaults.""" + from lucebox.cli import _load_or_build + + _set_config_path(tmp_path, monkeypatch) + # Don't write a config.toml — exercise the live_config() fallback. + monkeypatch.setenv("LUCEBOX_IMAGE", "ghcr.io/myfork/lucebox-hub") + cfg = _load_or_build() + assert cfg.image == "ghcr.io/myfork/lucebox-hub" diff --git a/lucebox/tests/test_docker_run.py b/lucebox/tests/test_docker_run.py new file mode 100644 index 000000000..bb888514f --- /dev/null +++ b/lucebox/tests/test_docker_run.py @@ -0,0 +1,254 @@ +"""Tests for the docker-run serve-argv builder. + +This is the core's whole job: turn a Config into the exact `docker run` +command (and DFLASH_* env) that launches the server. The argv contract is +what `lucebox serve` / the systemd unit / `print-run` all consume, so it is +pinned field-by-field here rather than only smoke-tested. +""" + +from __future__ import annotations + +from pathlib import Path + +from lucebox.download import PRESETS +from lucebox.types import Config, DflashRuntime, ModelMeta + +from lucebox import docker_run + + +def _env(spec) -> dict[str, str]: + return dict(spec.env) + + +# ── DockerRunSpec.argv ─────────────────────────────────────────────────────── + + +def test_argv_minimal_defaults() -> None: + spec = docker_run.DockerRunSpec(image="img:tag", name="box") + argv = spec.argv() + assert argv[:2] == ["docker", "run"] + assert "--rm" in argv # remove defaults True + assert ["--name", "box"] == argv[argv.index("--name") : argv.index("--name") + 2] + assert ["--gpus", "all"] == argv[argv.index("--gpus") : argv.index("--gpus") + 2] + # image is the last positional (no entrypoint_args here) + assert argv[-1] == "img:tag" + assert "-d" not in argv # detach defaults False + + +def test_argv_flags_and_ordering() -> None: + spec = docker_run.DockerRunSpec( + image="img:tag", + name="box", + gpus=False, + detach=True, + remove=False, + port_publish=(8080, 8080), + volumes=(("/host/models", "/opt/lucebox-hub/server/models"),), + env=(("DFLASH_BUDGET", "22"),), + entrypoint_args=("serve",), + extra=("--shm-size", "1g"), + ) + argv = spec.argv() + assert "--rm" not in argv # remove=False + assert "-d" in argv # detach + assert "--gpus" not in argv # gpus=False + assert ["-p", "8080:8080"] == argv[argv.index("-p") : argv.index("-p") + 2] + assert ["-v", "/host/models:/opt/lucebox-hub/server/models"] == argv[ + argv.index("-v") : argv.index("-v") + 2 + ] + assert ["-e", "DFLASH_BUDGET=22"] == argv[argv.index("-e") : argv.index("-e") + 2] + # extra flags precede the image; entrypoint_args follow it. + assert argv[-1] == "serve" + assert argv[-2] == "img:tag" + assert argv.index("--shm-size") < argv.index("img:tag") + + +def test_printable_glues_value_taking_flags() -> None: + spec = docker_run.DockerRunSpec( + image="img:tag", + name="box", + port_publish=(8080, 8080), + env=(("K", "v"),), + ) + out = spec.printable() + # one flag per line, continued with backslash-newline + assert out.startswith("docker \\\n run") + # value-taking flags keep their value on the same line + assert "--name box" in out + assert "--gpus all" in out + assert "-p 8080:8080" in out + assert "-e K=v" in out + + +# ── _runtime_volumes ───────────────────────────────────────────────────────── + + +def test_runtime_volumes_mounts_models_and_home(tmp_path: Path) -> None: + cfg = Config(models_dir=tmp_path / "models") + vols = docker_run._runtime_volumes(cfg) + assert (str(tmp_path / "models"), "/opt/lucebox-hub/server/models") in vols + # $HOME is also mounted so absolute symlink targets resolve in-container. + assert any(host == str(Path.home()) for host, _ in vols) + + +def test_runtime_volumes_dedupes_when_models_is_home(monkeypatch, tmp_path: Path) -> None: + monkeypatch.setattr(Path, "home", staticmethod(lambda: tmp_path)) + cfg = Config(models_dir=tmp_path) + vols = docker_run._runtime_volumes(cfg) + # models_dir == home → only the models mount, no duplicate home mount. + assert len(vols) == 1 + + +# ── _resolve_model_files ───────────────────────────────────────────────────── + + +def test_resolve_model_files_explicit_override_wins(tmp_path: Path) -> None: + cfg = Config( + models_dir=tmp_path, + model=ModelMeta(preset="qwen3.6-27b", target_file="custom.gguf", draft_file="d.gguf"), + ) + target, draft, draft_dir = docker_run._resolve_model_files(cfg) + assert target == "custom.gguf" + assert draft == "d.gguf" + assert draft_dir == "" + + +def test_resolve_model_files_falls_back_to_preset_registry(tmp_path: Path) -> None: + pres = PRESETS["qwen3.6-27b"] + cfg = Config(models_dir=tmp_path, model=ModelMeta(preset="qwen3.6-27b")) + target, draft, draft_dir = docker_run._resolve_model_files(cfg) + assert target == pres.target_file + assert draft == (pres.draft_file or "") + assert draft_dir == "" # no speculator dir on disk + + +def test_resolve_model_files_no_preset_no_override(tmp_path: Path) -> None: + cfg = Config(models_dir=tmp_path) # ModelMeta() defaults: all empty + assert docker_run._resolve_model_files(cfg) == ("", "", "") + + +# ── server_run_spec ────────────────────────────────────────────────────────── + + +def test_server_run_spec_top_level_shape(tmp_path: Path) -> None: + cfg = Config( + image="ghcr.io/x/lucebox-hub", + variant="cuda12", + container_name="lucebox", + port=9000, + models_dir=tmp_path, + ) + spec = docker_run.server_run_spec(cfg) + assert spec.image == "ghcr.io/x/lucebox-hub:cuda12" + assert spec.name == "lucebox" + assert spec.gpus is True + assert spec.remove is True + assert spec.detach is False + assert spec.port_publish == (9000, 8080) + assert (str(tmp_path), "/opt/lucebox-hub/server/models") in spec.volumes + + +def test_server_run_spec_always_emits_core_dflash_env(tmp_path: Path) -> None: + cfg = Config(models_dir=tmp_path, dflash=DflashRuntime(budget=22, max_ctx=32768)) + env = _env(docker_run.server_run_spec(cfg)) + assert env["DFLASH_BUDGET"] == "22" + assert env["DFLASH_MAX_CTX"] == "32768" + assert env["DFLASH_PREFIX_CACHE_SLOTS"] == "0" + assert env["DFLASH_PREFILL_CACHE_SLOTS"] == "0" + assert env["DFLASH_THINK_MAX"] == "15488" + assert env["DFLASH_PORT"] == "8080" + + +def test_server_run_spec_optional_env_off_by_default(tmp_path: Path) -> None: + env = _env(docker_run.server_run_spec(Config(models_dir=tmp_path))) + for absent in ( + "DFLASH_LAZY", + "DFLASH_CACHE_TYPE_K", + "DFLASH_CACHE_TYPE_V", + "DFLASH_PREFILL_MODE", + "DFLASH_FA_WINDOW", + "DFLASH_THINK_SOFT_CLOSE_MIN_RATIO", + "DFLASH_DEBUG_THINKING_LOGITS", + "DFLASH_TARGET", + "DFLASH_DRAFT", + ): + assert absent not in env + + +def test_server_run_spec_optional_env_emitted_when_set(tmp_path: Path) -> None: + cfg = Config( + models_dir=tmp_path, + dflash=DflashRuntime( + lazy=True, + cache_type_k="tq3_0", + cache_type_v="tq3_0", + prefill_mode="auto", + prefill_keep_ratio=0.1, + prefill_threshold=20000, + prefill_drafter="drafter.gguf", + fa_window=512, + think_soft_close_min_ratio=0.5, + debug_thinking_logits=True, + ), + ) + env = _env(docker_run.server_run_spec(cfg)) + assert env["DFLASH_LAZY"] == "1" + assert env["DFLASH_CACHE_TYPE_K"] == "tq3_0" + assert env["DFLASH_CACHE_TYPE_V"] == "tq3_0" + assert env["DFLASH_PREFILL_MODE"] == "auto" + assert env["DFLASH_PREFILL_KEEP"] == "0.1" + assert env["DFLASH_PREFILL_THRESHOLD"] == "20000" + assert env["DFLASH_PREFILL_DRAFTER"] == "drafter.gguf" + assert env["DFLASH_FA_WINDOW"] == "512" + assert env["DFLASH_THINK_SOFT_CLOSE_MIN_RATIO"] == "0.5" + assert env["DFLASH_DEBUG_THINKING_LOGITS"] == "1" + + +def test_server_run_spec_resolves_target_and_draft_paths(tmp_path: Path) -> None: + pres = PRESETS["qwen3.6-27b"] + cfg = Config(models_dir=tmp_path, model=ModelMeta(preset="qwen3.6-27b")) + env = _env(docker_run.server_run_spec(cfg)) + assert env["DFLASH_TARGET"] == f"/opt/lucebox-hub/server/models/{pres.target_file}" + if pres.draft_file: + assert env["DFLASH_DRAFT"] == ( + f"/opt/lucebox-hub/server/models/draft/{pres.draft_file}" + ) + + +def test_server_run_spec_forwards_host_env(monkeypatch, tmp_path: Path) -> None: + monkeypatch.setenv("LUCEBOX_HOST_OS_PRETTY", "Ubuntu 22.04") + monkeypatch.setenv("LUCEBOX_HOST_GPU_NAME", "RTX 5090") + env = _env(docker_run.server_run_spec(Config(models_dir=tmp_path))) + assert env["LUCEBOX_HOST_OS_PRETTY"] == "Ubuntu 22.04" + assert env["LUCEBOX_HOST_GPU_NAME"] == "RTX 5090" + + +def test_large_preset_serves_at_safe_default_ctx(tmp_path: Path) -> None: + """Regression guard for the preset-cap analysis (#5). + + Activating a preset writes only [model], never [dflash], so a loaded + Config keeps the conservative DflashRuntime() floor (max_ctx=16384). + The VRAM-tier heuristic's higher caps only apply via `autotune --apply` + (which threads cfg.model.preset and is a separate PR). This test pins + that a large preset does NOT silently serve at a high, OOM-prone ctx + through the default serve path. + """ + cfg = Config(models_dir=tmp_path, model=ModelMeta(preset="qwen3.6-27b")) + env = _env(docker_run.server_run_spec(cfg)) + assert env["DFLASH_MAX_CTX"] == "16384" + + +# ── docker_pull ────────────────────────────────────────────────────────────── + + +def test_docker_pull_shells_out_and_returns_code(monkeypatch) -> None: + seen: dict[str, list[str]] = {} + + def fake_call(argv: list[str]) -> int: + seen["argv"] = argv + return 7 + + monkeypatch.setattr(docker_run.subprocess, "call", fake_call) + rc = docker_run.docker_pull("img:tag") + assert rc == 7 + assert seen["argv"] == ["docker", "pull", "img:tag"] diff --git a/lucebox/tests/test_download.py b/lucebox/tests/test_download.py new file mode 100644 index 000000000..8b69e96b8 --- /dev/null +++ b/lucebox/tests/test_download.py @@ -0,0 +1,323 @@ +"""Tests for the model-download orchestration. + +The downloader now drives `huggingface_hub.hf_hub_download` directly +(no subprocess) and verifies size + sha256 against the repo metadata +before re-fetching. The tests stub out the network calls so the +behavior contract — what gets requested, when downloads are skipped — +stays pinned without actually talking to the Hub. +""" + +from pathlib import Path +from types import SimpleNamespace + +import pytest +from lucebox.download import ( + DEFAULT_PRESET, + PRESETS, + recommend_preset, + resolve_preset, + status, +) +from lucebox.types import HostFacts + +from lucebox import download + + +def test_default_preset_uses_quantized_gguf_draft(): + assert DEFAULT_PRESET.draft_repo == "spiritbuun/Qwen3.6-27B-DFlash-GGUF" + assert DEFAULT_PRESET.draft_file == "dflash-draft-3.6-q4_k_m.gguf" + + +def test_default_preset_is_registered_under_qwen_name(): + assert DEFAULT_PRESET is PRESETS["qwen3.6-27b"] + assert DEFAULT_PRESET.name == "qwen3.6-27b" + + +def test_resolve_preset_returns_default_on_none(): + assert resolve_preset(None) is DEFAULT_PRESET + assert resolve_preset("") is DEFAULT_PRESET + + +def test_resolve_preset_picks_gemma_target_and_draft(): + pres = resolve_preset("gemma-4-26b") + assert pres.name == "gemma-4-26b" + assert pres.target_repo == "bartowski/google_gemma-4-26B-A4B-it-GGUF" + assert pres.target_file == "google_gemma-4-26B-A4B-it-Q4_K_M.gguf" + assert pres.draft_repo == "Lucebox/gemma-4-26B-A4B-it-DFlash-GGUF" + assert pres.draft_file == "gemma-4-26B-A4B-it-DFlash-q8_0.gguf" + assert pres.has_draft + + +def test_resolve_preset_supports_target_only_laguna(): + pres = resolve_preset("laguna-xs.2") + assert pres.target_repo == "Lucebox/Laguna-XS.2-GGUF" + assert pres.draft_repo is None + assert not pres.has_draft + + +def test_resolve_preset_picks_qwen36_moe_target_only(): + """Qwen3.6 MoE preset routes to unsloth's UD-Q4_K_M file, no draft. + + The MoE variant has no published DFlash draft GGUF (verified against + HfApi.repo_info 2026-05-28), so it runs target-only like Laguna. The + file stem is `Qwen3.6-35B-A3B-UD-Q4_K_M.gguf` — the unsloth repo only + publishes the UD ("unsloth dynamic") family at Q4_K_M, not a plain + `Q4_K_M.gguf`. + """ + pres = resolve_preset("qwen3.6-moe") + assert pres.name == "qwen3.6-moe" + assert pres.target_repo == "unsloth/Qwen3.6-35B-A3B-GGUF" + assert pres.target_file == "Qwen3.6-35B-A3B-UD-Q4_K_M.gguf" + assert pres.draft_repo is None + assert pres.draft_file is None + assert not pres.has_draft + + +def test_download_preset_target_only_qwen36_moe_skips_draft(tmp_path, monkeypatch): + """qwen3.6-moe behaves identically to laguna-xs.2: target only, no draft fetch.""" + cfg = SimpleNamespace(models_dir=tmp_path) + pres = resolve_preset("qwen3.6-moe") + assert not pres.has_draft + fetches: list[tuple[str, str]] = [] + + def _meta(_api, repo_id: str, filename: str) -> tuple[int, None]: + return 10, None + + def _stub_fetch(api, repo_id, filename, local_dir, console): # noqa: ARG001 + fetches.append((repo_id, filename)) + out = local_dir / filename + out.parent.mkdir(parents=True, exist_ok=True) + with out.open("wb") as f: + f.truncate(10) + return out + + monkeypatch.setattr(download, "_file_meta", _meta) + monkeypatch.setattr(download, "_fetch", _stub_fetch) + + assert download.download_preset(cfg, pres) == 0 + # Only the target — no draft attempt at all. + assert fetches == [(pres.target_repo, pres.target_file)] + + +def test_status_qwen36_moe_reports_draft_present_when_target_only(tmp_path, monkeypatch): + """No published draft → status reports draft_present=True (nothing to fetch).""" + cfg = SimpleNamespace(models_dir=tmp_path) + pres = resolve_preset("qwen3.6-moe") + + def _meta(_api, repo_id: str, filename: str) -> tuple[int, None]: + return 22 * 10**9, None + + monkeypatch.setattr(download, "_file_meta", _meta) + # Target absent → target_present False, draft_present True (no draft). + assert status(cfg, pres) == {"target_present": False, "draft_present": True} + + +def test_resolve_preset_unknown_name_lists_known_options(): + with pytest.raises(KeyError) as exc_info: + resolve_preset("qwen-99b") + msg = str(exc_info.value) + # Every registered preset must appear in the suggestion list so the + # user can copy-paste the right name. + for name in PRESETS: + assert name in msg + + +def _stub_file_meta(target_size: int, draft_size: int): + """Build a `_file_meta` replacement that returns (size, None) per repo+file. + + sha256 is left None so tests don't need to compute real hashes; the + real metadata path is exercised by the live `models download` + invocation, not the unit tests. + """ + + def _meta(_api, repo_id: str, filename: str) -> tuple[int, None]: + if repo_id == DEFAULT_PRESET.target_repo and filename == DEFAULT_PRESET.target_file: + return target_size, None + if repo_id == DEFAULT_PRESET.draft_repo and filename == DEFAULT_PRESET.draft_file: + return draft_size, None + raise FileNotFoundError(f"unexpected ({repo_id}, {filename})") + + return _meta + + +def test_status_checks_default_draft_gguf(tmp_path, monkeypatch): + cfg = SimpleNamespace(models_dir=tmp_path) + draft_dir = tmp_path / "draft" + draft_dir.mkdir() + target = tmp_path / DEFAULT_PRESET.target_file + draft = draft_dir / DEFAULT_PRESET.draft_file + + monkeypatch.setattr(download, "_file_meta", _stub_file_meta(target_size=1024, draft_size=512)) + + # Neither file exists yet. + assert status(cfg) == {"target_present": False, "draft_present": False} + + # Write files at the expected sizes. + with target.open("wb") as f: + f.truncate(1024) + with draft.open("wb") as f: + f.truncate(512) + assert status(cfg) == {"target_present": True, "draft_present": True} + + +def test_status_rejects_partial_model_files(tmp_path, monkeypatch): + cfg = SimpleNamespace(models_dir=tmp_path) + draft_dir = tmp_path / "draft" + draft_dir.mkdir() + target = tmp_path / DEFAULT_PRESET.target_file + draft = draft_dir / DEFAULT_PRESET.draft_file + target.write_bytes(b"partial") + draft.write_bytes(b"partial") + + # Repo says the target is 1 GB; a 7-byte file is partial, not present. + monkeypatch.setattr( + download, "_file_meta", _stub_file_meta(target_size=10**9, draft_size=10**6) + ) + assert status(cfg) == {"target_present": False, "draft_present": False} + + +def test_current_bytes_reads_xet_staging_path(tmp_path): + """Regression: progress polling must see hf-xet's hashed staging file. + + huggingface_hub 1.x writes partial Xet downloads to + ``{local_dir}/.cache/huggingface/download/{short_hash}.{etag}.incomplete`` + — NOT to ``{local_dir}/{filename}.incomplete``. Before the fix the + polling code only checked the latter (which never appears) so the + Rich progress bar sat at 0 bytes for the entire transfer. + """ + filename = "model.gguf" + etag = "abc123" + candidates = download._incomplete_path_candidates(tmp_path, filename, etag) + # The first candidate must point at the actual hf-xet staging path. + xet_path: Path = candidates[0] + assert xet_path.parent == tmp_path / ".cache" / "huggingface" / "download" + assert xet_path.name.endswith(f".{etag}.incomplete") + + # Now: writing to that path must be observed by _current_bytes. + xet_path.parent.mkdir(parents=True, exist_ok=True) + xet_path.write_bytes(b"x" * 4096) + target = tmp_path / filename + assert download._current_bytes(target, candidates) == 4096 + + +def test_current_bytes_falls_back_to_glob_without_etag(tmp_path): + """When sha256 is unknown we still find growing .incomplete files.""" + filename = "model.gguf" + candidates = download._incomplete_path_candidates(tmp_path, filename, etag=None) + target = tmp_path / filename + + staging = tmp_path / ".cache" / "huggingface" / "download" + staging.mkdir(parents=True, exist_ok=True) + (staging / "deadbeef.deadbeef.incomplete").write_bytes(b"x" * 8192) + assert download._current_bytes(target, candidates) == 8192 + + +def test_current_bytes_prefers_final_target_when_complete(tmp_path): + filename = "model.gguf" + candidates = download._incomplete_path_candidates(tmp_path, filename, etag="abc") + target = tmp_path / filename + target.write_bytes(b"x" * 1234) + assert download._current_bytes(target, candidates) == 1234 + + +def test_download_preset_fetches_exact_draft_file(tmp_path, monkeypatch): + cfg = SimpleNamespace(models_dir=tmp_path) + fetches: list[tuple[str, str, str]] = [] + + monkeypatch.setattr(download, "_file_meta", _stub_file_meta(target_size=10, draft_size=10)) + + # Stub the actual download to record what was requested + create a stub + # file of the expected size so `_local_matches` would pass on a re-run. + def _stub_fetch(api, repo_id, filename, local_dir, console): # noqa: ARG001 + fetches.append((repo_id, filename, str(local_dir))) + target = local_dir / filename + target.parent.mkdir(parents=True, exist_ok=True) + with target.open("wb") as f: + f.truncate(10) + return target + + monkeypatch.setattr(download, "_fetch", _stub_fetch) + + assert download.download_preset(cfg) == 0 + assert (DEFAULT_PRESET.target_repo, DEFAULT_PRESET.target_file, str(tmp_path)) in fetches + assert ( + DEFAULT_PRESET.draft_repo, + DEFAULT_PRESET.draft_file, + str(tmp_path / "draft"), + ) in fetches + + +def test_download_preset_routes_gemma_preset_to_correct_repos(tmp_path, monkeypatch): + cfg = SimpleNamespace(models_dir=tmp_path) + pres = resolve_preset("gemma-4-26b") + fetches: list[tuple[str, str, str]] = [] + + def _meta(_api, repo_id: str, filename: str) -> tuple[int, None]: + return 10, None + + def _stub_fetch(api, repo_id, filename, local_dir, console): # noqa: ARG001 + fetches.append((repo_id, filename, str(local_dir))) + out = local_dir / filename + out.parent.mkdir(parents=True, exist_ok=True) + with out.open("wb") as f: + f.truncate(10) + return out + + monkeypatch.setattr(download, "_file_meta", _meta) + monkeypatch.setattr(download, "_fetch", _stub_fetch) + + assert download.download_preset(cfg, pres) == 0 + assert (pres.target_repo, pres.target_file, str(tmp_path)) in fetches + assert (pres.draft_repo, pres.draft_file, str(tmp_path / "draft")) in fetches + + +def test_download_preset_target_only_skips_draft_fetch(tmp_path, monkeypatch): + cfg = SimpleNamespace(models_dir=tmp_path) + pres = resolve_preset("laguna-xs.2") + assert not pres.has_draft + fetches: list[tuple[str, str]] = [] + + def _meta(_api, repo_id: str, filename: str) -> tuple[int, None]: + return 10, None + + def _stub_fetch(api, repo_id, filename, local_dir, console): # noqa: ARG001 + fetches.append((repo_id, filename)) + out = local_dir / filename + out.parent.mkdir(parents=True, exist_ok=True) + with out.open("wb") as f: + f.truncate(10) + return out + + monkeypatch.setattr(download, "_file_meta", _meta) + monkeypatch.setattr(download, "_fetch", _stub_fetch) + + assert download.download_preset(cfg, pres) == 0 + # Target fetched, no draft fetch attempted at all. + assert fetches == [(pres.target_repo, pres.target_file)] + + +def test_status_target_only_preset_reports_draft_as_present(tmp_path, monkeypatch): + cfg = SimpleNamespace(models_dir=tmp_path) + pres = resolve_preset("laguna-xs.2") + + def _meta(_api, repo_id: str, filename: str) -> tuple[int, None]: + return 1024, None + + monkeypatch.setattr(download, "_file_meta", _meta) + # Target absent → target_present False, draft_present True (nothing to download). + assert status(cfg, pres) == {"target_present": False, "draft_present": True} + + +def test_recommend_preset_tiers() -> None: + """First-run preset recommendation is a pure VRAM-tier function. + + 22 GB+ → the Lucebox default (qwen3.6-27b); 16-21 GB → laguna-xs.2; + below 16 GB → None (the registered presets need ≥16 GB, so we punt to + an explicit choice rather than recommend something that can't run). + """ + assert recommend_preset(HostFacts(vram_gb=24)) == "qwen3.6-27b" + assert recommend_preset(HostFacts(vram_gb=22)) == "qwen3.6-27b" + assert recommend_preset(HostFacts(vram_gb=20)) == "laguna-xs.2" + assert recommend_preset(HostFacts(vram_gb=16)) == "laguna-xs.2" + assert recommend_preset(HostFacts(vram_gb=12)) is None + assert recommend_preset(HostFacts(vram_gb=0)) is None diff --git a/lucebox/tests/test_models_cli.py b/lucebox/tests/test_models_cli.py new file mode 100644 index 000000000..f44583044 --- /dev/null +++ b/lucebox/tests/test_models_cli.py @@ -0,0 +1,142 @@ +"""Tests for the ``lucebox models`` sub-app.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest +from lucebox.cli import app +from lucebox.download import PRESETS +from lucebox.types import HostFacts +from typer.testing import CliRunner + +from lucebox import config as config_mod +from lucebox import download as download_mod + + +def _set_config_path(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path: + monkeypatch.setenv("LUCEBOX_HOME", str(tmp_path)) + monkeypatch.setenv("LUCEBOX_MODELS", str(tmp_path / "models")) + return tmp_path / "config.toml" + + +def _stub_host(monkeypatch: pytest.MonkeyPatch, vram_gb: int) -> None: + monkeypatch.setattr("lucebox.host_facts.from_env", lambda: HostFacts(vram_gb=vram_gb)) + monkeypatch.setattr("lucebox.cli.from_env", lambda: HostFacts(vram_gb=vram_gb)) + + +def test_models_list_shows_every_registered_preset( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + _set_config_path(tmp_path, monkeypatch) + _stub_host(monkeypatch, vram_gb=24) + result = CliRunner().invoke(app, ["models", "list"]) + assert result.exit_code == 0 + for name in PRESETS: + assert name in result.stdout + + +def test_models_default_view_lists_only_installed( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + _set_config_path(tmp_path, monkeypatch) + _stub_host(monkeypatch, vram_gb=24) + # No models on disk → default view says "no presets installed". + result = CliRunner().invoke(app, ["models"]) + assert result.exit_code == 0 + assert "No presets installed" in result.stdout or "Models dir" in result.stdout + + +def test_models_download_recommends_when_empty( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """No preset configured + nothing on argv → auto-recommend + auto-activate.""" + cfg_path = _set_config_path(tmp_path, monkeypatch) + _stub_host(monkeypatch, vram_gb=24) + + # Stub the network calls so the test doesn't try to talk to HF. + monkeypatch.setattr(download_mod, "download_preset", lambda cfg, pres: 0) + monkeypatch.setattr( + download_mod, + "status", + lambda cfg, pres: {"target_present": True, "draft_present": True}, + ) + + result = CliRunner().invoke(app, ["models", "download"]) + assert result.exit_code == 0 + assert "Recommended preset" in result.stdout + assert cfg_path.exists() + # The active preset should now be model.preset = qwen3.6-27b. + entries = config_mod.config_get(path=cfg_path) + assert entries["model.preset"] == ("qwen3.6-27b", "file") + + +def test_models_download_refuses_silent_switch( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """When a preset is already active, `download` with no arg refuses.""" + cfg_path = _set_config_path(tmp_path, monkeypatch) + _stub_host(monkeypatch, vram_gb=24) + config_mod.config_set("model.preset", "qwen3.6-27b", path=cfg_path) + + result = CliRunner().invoke(app, ["models", "download"]) + assert result.exit_code == 2 + assert "already active" in result.stdout.lower() + + +def test_models_download_explicit_preset_no_activate( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """Passing a preset without --activate downloads but doesn't flip model.preset.""" + cfg_path = _set_config_path(tmp_path, monkeypatch) + _stub_host(monkeypatch, vram_gb=24) + monkeypatch.setattr(download_mod, "download_preset", lambda cfg, pres: 0) + monkeypatch.setattr( + download_mod, + "status", + lambda cfg, pres: {"target_present": False, "draft_present": False}, + ) + + result = CliRunner().invoke(app, ["models", "download", "gemma-4-26b"]) + assert result.exit_code == 0 + if cfg_path.exists(): + entries = config_mod.config_get(path=cfg_path) + assert entries["model.preset"] == ("", "default") + + +def test_models_download_explicit_preset_with_activate( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + cfg_path = _set_config_path(tmp_path, monkeypatch) + _stub_host(monkeypatch, vram_gb=24) + monkeypatch.setattr(download_mod, "download_preset", lambda cfg, pres: 0) + monkeypatch.setattr( + download_mod, + "status", + lambda cfg, pres: {"target_present": False, "draft_present": False}, + ) + + result = CliRunner().invoke(app, ["models", "download", "gemma-4-26b", "--activate"]) + assert result.exit_code == 0 + entries = config_mod.config_get(path=cfg_path) + assert entries["model.preset"] == ("gemma-4-26b", "file") + + +def test_installed_helpers_track_presence( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """``installed_status`` / ``installed_size_gb`` reflect on-disk byte counts.""" + _set_config_path(tmp_path, monkeypatch) + _stub_host(monkeypatch, vram_gb=24) + from lucebox.config import live_config + + cfg = live_config() + cfg.models_dir.mkdir(parents=True, exist_ok=True) + laguna = PRESETS["laguna-xs.2"] + assert download_mod.installed_status(cfg, laguna) == "absent" + + target = cfg.models_dir / laguna.target_file + target.parent.mkdir(parents=True, exist_ok=True) + target.write_bytes(b"x" * (5 * 10**9)) + assert download_mod.installed_status(cfg, laguna) == "installed" + assert download_mod.installed_size_gb(cfg, laguna) == pytest.approx(5.0, rel=0.01) diff --git a/pyproject.toml b/pyproject.toml index 56ae2bf4f..520838041 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,6 +8,7 @@ license = { text = "Apache-2.0" } authors = [{ name = "Lucebox" }] dependencies = [ + "lucebox", "lucebox-dflash", "pflash", ] @@ -23,7 +24,7 @@ line-length = 100 # server-internal and optimization Python (server/scripts, optimizations/*) # carries pre-existing style debt and is added to `include` as it is cleaned # up. Vendored deps stay excluded permanently (extend-exclude below). -include = ["harness/**/*.py", "scripts/**/*.py"] +include = ["harness/**/*.py", "scripts/**/*.py", "lucebox/**/*.py"] extend-exclude = [ "dflash/deps", "megakernel", @@ -51,11 +52,12 @@ package = false no-build-isolation-package = ["qwen35-megakernel-bf16"] [tool.uv.workspace] -# Workspace members. Keeping the list to the packages that live in this -# repo lets `uv lock --check` / `uv sync --frozen` pass. -members = ["server", "optimizations/megakernel", "optimizations/pflash"] +# Workspace members. PR adds the lucebox/ package alongside the existing +# server / megakernel / pflash members. +members = ["lucebox", "server", "optimizations/megakernel", "optimizations/pflash"] [tool.uv.sources] +lucebox = { workspace = true } lucebox-dflash = { workspace = true } pflash = { workspace = true } qwen35-megakernel-bf16 = { workspace = true } diff --git a/scripts/check_lucebox_wrapper_sandbox.sh b/scripts/check_lucebox_wrapper_sandbox.sh new file mode 100755 index 000000000..df2b2b9bc --- /dev/null +++ b/scripts/check_lucebox_wrapper_sandbox.sh @@ -0,0 +1,242 @@ +#!/usr/bin/env bash +# Exercise the host-side lucebox.sh installer/wrapper from an isolated prefix. +# +# The script intentionally runs from a throwaway HOME, XDG_CONFIG_HOME, +# LUCEBOX_HOME, model directory, and working directory. That catches accidental +# dependencies on the checkout or the user's real ~/.lucebox while keeping the +# test reproducible enough to paste into a bug report. + +set -euo pipefail + +IMAGE="${LUCEBOX_TEST_IMAGE:-ghcr.io/easel/lucebox-hub}" +VARIANT="${LUCEBOX_TEST_VARIANT:-integration-props-uv-squared-clean-cuda12}" +WRAPPER_SOURCE="${LUCEBOX_TEST_WRAPPER_SOURCE:-local}" +RUN_PULL="${LUCEBOX_TEST_RUN_PULL:-1}" +RUN_CONTAINER_CLI="${LUCEBOX_TEST_RUN_CONTAINER_CLI:-1}" +KEEP_SANDBOX="${LUCEBOX_TEST_KEEP_SANDBOX:-0}" + +ROOT="" +LOG="" + +usage() { + cat <&2; usage >&2; exit 2 ;; + esac +done + +die() { + echo "[FAIL] $*" >&2 + if [ -n "$LOG" ] && [ -f "$LOG" ]; then + echo "[FAIL] transcript: $LOG" >&2 + fi + exit 1 +} + +note() { + printf '[INFO] %s\n' "$*" +} + +pass() { + printf '[PASS] %s\n' "$*" +} + +assert_file() { + [ -f "$1" ] || die "missing file: $1" + pass "file exists: $1" +} + +assert_contains() { + local file="$1" + local pattern="$2" + if ! grep -Fq "$pattern" "$file"; then + echo "----- $file -----" >&2 + sed -n '1,220p' "$file" >&2 || true + echo "-----------------" >&2 + die "expected '$pattern' in $file" + fi + pass "$file contains: $pattern" +} + +run_logged() { + note "run: $*" + { + printf '\n===== %s =====\n' "$*" + "$@" + printf '===== exit=0 =====\n' + } 2>&1 | tee -a "$LOG" +} + +run_logged_capture() { + local out="$1" + shift + note "run: $* > $out" + { + printf '\n===== %s > %s =====\n' "$*" "$out" + "$@" + local rc=$? + printf '===== exit=%s =====\n' "$rc" + return "$rc" + } 2>&1 | tee "$out" | tee -a "$LOG" >/dev/null +} + +cleanup() { + if [ -n "$ROOT" ] && [ "$KEEP_SANDBOX" != "1" ]; then + rm -rf "$ROOT" + elif [ -n "$ROOT" ]; then + note "kept sandbox: $ROOT" + note "transcript: $LOG" + fi +} +trap cleanup EXIT + +REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +ROOT="$(mktemp -d "${TMPDIR:-/tmp}/lucebox-wrapper-sandbox.XXXXXX")" +LOG="$ROOT/transcript.log" + +HOME_DIR="$ROOT/home" +BIN_DIR="$ROOT/bin" +XDG_DIR="$ROOT/xdg" +MODELS_DIR="$ROOT/models" +WORK_DIR="$ROOT/work" +mkdir -p "$HOME_DIR" "$BIN_DIR" "$XDG_DIR" "$MODELS_DIR" "$WORK_DIR" + +note "sandbox: $ROOT" +note "transcript: $LOG" + +case "$WRAPPER_SOURCE" in + local) + cp "$REPO_ROOT/lucebox.sh" "$BIN_DIR/lucebox" + ;; + http://*|https://*) + curl -fsSL "$WRAPPER_SOURCE" -o "$BIN_DIR/lucebox" + ;; + *) + cp "$WRAPPER_SOURCE" "$BIN_DIR/lucebox" + ;; +esac +chmod +x "$BIN_DIR/lucebox" + +FIRST_LINE="$(head -n 1 "$BIN_DIR/lucebox")" +[ "$FIRST_LINE" = "#!/usr/bin/env bash" ] || die "unexpected shebang: $FIRST_LINE" +pass "wrapper has expected shebang" + +export HOME="$HOME_DIR" +export XDG_CONFIG_HOME="$XDG_DIR" +export LUCEBOX_HOME="$HOME_DIR/.lucebox" +export LUCEBOX_MODELS="$MODELS_DIR" +export LUCEBOX_IMAGE="$IMAGE" +export LUCEBOX_VARIANT="$VARIANT" +export LUCEBOX_CONTAINER="lucebox-sandbox" +export LUCEBOX_PORT="18080" +export PATH="$BIN_DIR:$PATH" + +cd "$WORK_DIR" +[ "$PWD" = "$WORK_DIR" ] || die "failed to enter sandbox workdir" +pass "working directory isolated: $PWD" + +run_logged_capture "$ROOT/version.out" lucebox version +assert_contains "$ROOT/version.out" "0.2.0" + +run_logged_capture "$ROOT/help.out" lucebox help +assert_contains "$ROOT/help.out" "LUCEBOX_VARIANT" +assert_contains "$ROOT/help.out" "LUCEBOX_IMAGE" + +docker manifest inspect "${IMAGE}:${VARIANT}" >/dev/null +pass "image manifest exists: ${IMAGE}:${VARIANT}" + +if [ "$RUN_PULL" = "1" ]; then + run_logged_capture "$ROOT/pull.out" lucebox pull + assert_contains "$ROOT/pull.out" "${IMAGE}:${VARIANT}" +fi + +if [ "$RUN_CONTAINER_CLI" = "1" ]; then + run_logged_capture "$ROOT/check.out" lucebox check + # Sparse persistence: `config set` creates config.toml with only the + # named key. Replaces the old `configure --overwrite` path. + run_logged_capture "$ROOT/config-image.out" lucebox config set "image=$IMAGE" + run_logged_capture "$ROOT/config-variant.out" lucebox config set "variant=$VARIANT" + assert_file "$LUCEBOX_HOME/config.toml" + [ "$(stat -c '%u' "$LUCEBOX_HOME/config.toml")" = "$(id -u)" ] \ + || die "config.toml is not owned by the invoking user" + pass "config.toml ownership matches invoking user" + assert_contains "$LUCEBOX_HOME/config.toml" "registry = \"$IMAGE\"" + assert_contains "$LUCEBOX_HOME/config.toml" "variant = \"$VARIANT\"" + + run_logged_capture "$ROOT/print-run.out" lucebox print-run + assert_contains "$ROOT/print-run.out" "${IMAGE}:${VARIANT}" + assert_contains "$ROOT/print-run.out" "$MODELS_DIR:/opt/lucebox-hub/dflash/models" + if grep -Fq "$REPO_ROOT" "$ROOT/print-run.out"; then + die "print-run leaked repository path: $REPO_ROOT" + fi + pass "print-run did not reference repository checkout" +fi + +# Exercise `lucebox install` without allowing it to call real systemctl, +# loginctl, docker, or nvidia-smi. The generated user unit must land under the +# sandbox XDG_CONFIG_HOME and point ExecStart at the sandbox-installed wrapper. +SHIM_DIR="$ROOT/shims" +mkdir -p "$SHIM_DIR" +cat > "$SHIM_DIR/docker" <<'EOF' +#!/usr/bin/env bash +case "${1:-}" in + info) exit 0 ;; + version) echo "25.0.0"; exit 0 ;; + stop) exit 0 ;; + *) echo "docker shim: $*" >&2; exit 0 ;; +esac +EOF +cat > "$SHIM_DIR/nvidia-smi" <<'EOF' +#!/usr/bin/env bash +case "$*" in + *"--query-gpu=name,memory.total,driver_version,compute_cap"*) + echo "Fake GPU, 24576, 555.42.01, 8.6"; exit 0 ;; + *"--query-gpu=name"*) + echo "Fake GPU"; exit 0 ;; + *) echo "Fake GPU"; exit 0 ;; +esac +EOF +cat > "$SHIM_DIR/systemctl" <<'EOF' +#!/usr/bin/env bash +if [ "$1" = "--user" ] && [ "$2" = "show-environment" ]; then exit 0; fi +if [ "$1" = "--user" ] && [ "$2" = "daemon-reload" ]; then exit 0; fi +echo "systemctl shim: $*" >&2 +exit 0 +EOF +cat > "$SHIM_DIR/loginctl" <<'EOF' +#!/usr/bin/env bash +echo "Linger=no" +EOF +chmod +x "$SHIM_DIR/docker" "$SHIM_DIR/nvidia-smi" "$SHIM_DIR/systemctl" "$SHIM_DIR/loginctl" + +PATH="$SHIM_DIR:$BIN_DIR:$PATH" run_logged_capture "$ROOT/install.out" lucebox install +UNIT="$XDG_CONFIG_HOME/systemd/user/lucebox.service" +assert_file "$UNIT" +assert_contains "$UNIT" "ExecStart=$BIN_DIR/lucebox serve" +assert_contains "$UNIT" "ExecStop=$SHIM_DIR/docker stop -t 30 lucebox-sandbox" +assert_contains "$ROOT/install.out" "Installed $UNIT" + +pass "sandbox wrapper check completed" +note "summary: image=${IMAGE}:${VARIANT} wrapper_source=${WRAPPER_SOURCE}" diff --git a/scripts/test_lucebox_sh.sh b/scripts/test_lucebox_sh.sh new file mode 100755 index 000000000..464f552b2 --- /dev/null +++ b/scripts/test_lucebox_sh.sh @@ -0,0 +1,1127 @@ +#!/usr/bin/env bash +# scripts/test_lucebox_sh.sh — smoke tests for the host-side wrapper + +# every other bash script we ship. +# +# Catches regressions like: +# * syntax errors (bash -n) +# * shellcheck error-level findings across every shipped bash script +# * `set -u` violations in command paths that don't need docker/nvidia — +# each subcommand dispatch is exercised in isolation to verify no +# LUCEBOX_HOST_* or DFLASH_* read fires before the helper that should +# populate it has run. +# * missing dispatch handlers (help, version, check, usage) +# * stale references to subcommands removed from main's case +# +# The wrapper is shell + has zero non-coreutils deps for the host-only +# commands, so this script doesn't need docker/nvidia/systemd present — +# probe_host degrades cleanly when those aren't found, and the +# formatter must render fine for the "everything is missing" case too. +# +# Run from anywhere: scripts/test_lucebox_sh.sh + +set -euo pipefail + +# Resolve repo root + script under test. +ROOT="$(git rev-parse --show-toplevel 2>/dev/null || (cd "$(dirname "$0")/.." && pwd))" +SCRIPT="$ROOT/lucebox.sh" +ENTRYPOINT="$ROOT/server/scripts/entrypoint.sh" +INSTALLER="$ROOT/install.sh" + +if [ ! -f "$SCRIPT" ]; then + echo "FAIL: lucebox.sh not found at $SCRIPT" >&2 + exit 1 +fi + +# entrypoint.sh ships with the docker-stack PR (#334). When it's absent +# (e.g. on the lucebox-cli branch in isolation), skip the entire suite — +# every section below either references $ENTRYPOINT in shellcheck targets, +# parses it with `bash -n`, or sources/dispatches into it directly. The +# host-only lucebox.sh wrapper itself is covered by lucebox.sh's own unit +# tests; this script's value is the wrapper↔entrypoint contract. +if [ ! -f "$ENTRYPOINT" ]; then + echo "Skipping entrypoint tests: server/scripts/entrypoint.sh not present (provided by #334 docker-stack)" + exit 0 +fi + +fail=0 +pass=0 +report() { + if [ "$1" = "ok" ]; then + printf ' \033[1;32m✓\033[0m %s\n' "$2" + pass=$((pass + 1)) + else + printf ' \033[1;31m✗\033[0m %s\n' "$2" + if [ -n "${3:-}" ]; then + printf ' %s\n' "$3" + fi + fail=$((fail + 1)) + fi +} + +# Helper: run the wrapper with strict bash, capture stdout+stderr, check for +# (a) zero exit code, (b) substring match. NO_COLOR is set so colour codes +# don't pollute substring matches. +assert_runs() { + local label="$1" cmd="$2" expect="${3:-}" + local out rc + out=$(NO_COLOR=1 bash -c "$cmd" 2>&1) + rc=$? + if [ "$rc" -ne 0 ]; then + report fail "$label" "exit $rc; output: $(printf '%s' "$out" | head -3)" + return + fi + if [ -n "$expect" ] && ! grep -qF "$expect" <<<"$out"; then + report fail "$label" "missing expected substring '$expect'; got: $(printf '%s' "$out" | head -3)" + return + fi + report ok "$label" +} + +# Helper: run a subcommand whose successful completion would normally need +# docker / nvidia / systemd. We only care that the bash dispatch up to the +# point of the missing dependency does NOT trip `set -u`. Exit code is +# allowed to be non-zero; what we forbid is a raw "unbound variable" / +# "syntax error" / "line N:" leak in the captured output. +# +# Wrapped in `timeout` so subcommands that exec into a follow-style binary +# (logs → journalctl -f, status when systemd is healthy, etc.) don't hang +# the test runner on a dev box where the underlying tools succeed. +assert_no_set_u_leak() { + local label="$1" + shift + local out + out=$(NO_COLOR=1 timeout 5 bash "$@" 2>&1 || true) + # The "line N:" pattern is anchored to a script-path prefix to avoid + # false positives from journalctl output ("systemd[1385106]:") which + # contains a similar shape but isn't a bash error. Bash always emits + # the source filename before the line number, e.g. + # /tmp/lbh-flat/lucebox.sh: line 200: VAR: unbound variable + if grep -qE 'unbound variable|syntax error|\.sh: line [0-9]+:' <<<"$out"; then + report fail "$label" "raw bash error leaked: $(head -3 <<<"$out")" + else + report ok "$label" + fi +} + +echo "[test_lucebox_sh] running against $SCRIPT" + +# ── 1. shellcheck ───────────────────────────────────────────────────────── +# Run shellcheck across every bash script we ship (the wrapper, the +# in-container entrypoint, and every helper under scripts/). Error-level +# findings fail the build; warnings are informational only — those have +# been triaged and the SC2034/SC2155/SC2164 hits in sweep_ds4_2case.sh +# aren't user-visible bugs. +SHELLCHECK_TARGETS=( + "$SCRIPT" + "$ENTRYPOINT" + "$INSTALLER" +) +# Add every scripts/*.sh except this one (don't recurse into our own tests). +while IFS= read -r -d '' f; do + [ "$f" = "${BASH_SOURCE[0]}" ] && continue + SHELLCHECK_TARGETS+=("$f") +done < <(find "$ROOT/scripts" -maxdepth 1 -name '*.sh' -type f -print0 2>/dev/null) +SHELLCHECK_TARGETS+=("${BASH_SOURCE[0]}") + +if command -v shellcheck >/dev/null 2>&1; then + sc_out=$(shellcheck --severity=error "${SHELLCHECK_TARGETS[@]}" 2>&1) || sc_rc=$? + sc_rc="${sc_rc:-0}" + if [ "$sc_rc" -eq 0 ]; then + report ok "shellcheck --severity=error (${#SHELLCHECK_TARGETS[@]} files)" + else + report fail "shellcheck --severity=error" "$(printf '%s' "$sc_out" | head -10)" + fi +else + report fail "shellcheck not installed" "install via 'apt-get install -y shellcheck' (Ubuntu) or 'brew install shellcheck'" +fi + +# ── 2. Syntax / parse ───────────────────────────────────────────────────── +if bash -n "$SCRIPT"; then report ok "bash -n lucebox.sh parses cleanly" +else report fail "bash -n lucebox.sh"; fi +if bash -n "$ENTRYPOINT"; then report ok "bash -n entrypoint.sh parses cleanly" +else report fail "bash -n entrypoint.sh"; fi + +# ── 3. Trivial subcommands (zero-exit expected) ─────────────────────────── +assert_runs "help" "bash '$SCRIPT' help" "host-side wrapper" +assert_runs "--help" "bash '$SCRIPT' --help" "host-side wrapper" +assert_runs "-h" "bash '$SCRIPT' -h" "host-side wrapper" +assert_runs "version" "bash '$SCRIPT' version" "" +assert_runs "--version" "bash '$SCRIPT' --version" "" + +# ── 4. check — host-only, must run to completion even without docker/nvidia. +# This is the path that broke last time (multi-byte glyph + set -u). +assert_runs "check" "bash '$SCRIPT' check" "host readiness report" + +# ── 5. systemd-surface subcommands — every one of these used to crash with +# `LUCEBOX_HOST_HAS_SYSTEMD: unbound variable` because cmd_systemctl_passthrough +# / cmd_logs / cmd_systemd_uninstall reached require_systemd without first +# calling probe_host. The fix routes through require_systemd → probe_host +# when the var is unset; these tests pin that invariant. +# +# On the bare runner there is no user systemd, no installed unit, and no +# docker — so every command is expected to exit non-zero with a CLEAN error +# message. What we forbid is a raw bash "unbound variable" leak. +for sub in start stop restart enable disable status install uninstall; do + assert_no_set_u_leak "$sub dispatch (no set -u leak)" "$SCRIPT" "$sub" +done +# `logs` is special: it execs `journalctl -f` which streams every historical +# journal record for the unit. On a dev box where the lucebox service has +# actually run, that stream contains every past error — including the very +# bugs this test exists to prevent — and we'd false-positive on them. Pass +# `-n 0 --no-pager` so we only see new entries (none, in the test window). +assert_no_set_u_leak "logs dispatch (no set -u leak)" "$SCRIPT" logs -n 0 --no-pager + +# ── 6. server-spawning subcommands — exercise the dispatch up to where +# the missing docker daemon stops them. `serve` is intentionally skipped +# because on a host with a working docker + the cuda12 image already +# pulled, it would actually exec into the container — at which point +# we'd be testing the image's entrypoint, not the wrapper. `pull` just +# execs `docker pull`, so we still smoke its host-side dispatch. +assert_no_set_u_leak "pull dispatch (no set -u leak)" "$SCRIPT" pull + +# ── 7. Unknown subcommand → cmd_in_container fallback path. Same rule: +# clean error, no raw bash leak. +assert_no_set_u_leak "unknown subcommand dispatch" "$SCRIPT" no-such-subcommand + +# ── 8. Pre-populated LUCEBOX_HOST_* env (simulates an already-probed host +# whose vars are passed in from a parent process). Useful in CI matrices +# where we want to mock a "good host" without nvidia-smi/docker on PATH. +out=$( + NO_COLOR=1 \ + LUCEBOX_HOST_HAS_SYSTEMD=0 \ + LUCEBOX_HOST_HAS_DOCKER=0 \ + LUCEBOX_HOST_HAS_CTK=none \ + LUCEBOX_HOST_GPU_VENDOR=none \ + LUCEBOX_HOST_GPU_NAME="" \ + LUCEBOX_HOST_GPU_COUNT=0 \ + LUCEBOX_HOST_VRAM_GB=0 \ + LUCEBOX_HOST_GPU_SM="" \ + LUCEBOX_HOST_DRIVER_VERSION="" \ + LUCEBOX_HOST_DRIVER_MAJOR=0 \ + LUCEBOX_HOST_NPROC=1 \ + LUCEBOX_HOST_RAM_GB=0 \ + LUCEBOX_HOST_IS_WSL=0 \ + LUCEBOX_HOST_DOCKER_VERSION="" \ + timeout 5 bash "$SCRIPT" start 2>&1 || true +) +if grep -qE 'unbound variable|syntax error' <<<"$out"; then + report fail "start with pre-populated LUCEBOX_HOST_* env" "leak: $(head -3 <<<"$out")" +else + report ok "start with pre-populated LUCEBOX_HOST_* env" +fi + +# ── 8b. PIN the top-of-script LUCEBOX_HOST_* safe-default seeds. Even with +# probe_host short-circuited to a no-op (the worst-case bug recurrence: a +# future refactor accidentally deletes the call from a dispatch path) the +# wrapper must not leak `unbound variable` on `start`. We achieve "probe_host +# is a no-op" by exporting `_LUCEBOX_HOST_PROBED=1` so ensure_probed skips +# the real probe — equivalent to a future refactor that calls ensure_probed +# but mis-implements the gate. +out=$( + NO_COLOR=1 \ + _LUCEBOX_HOST_PROBED=1 \ + timeout 5 bash "$SCRIPT" start 2>&1 || true +) +if grep -qE 'unbound variable|syntax error' <<<"$out"; then + report fail "start with probe_host bypassed (seed defaults must catch this)" "leak: $(head -3 <<<"$out")" +else + report ok "start with probe_host bypassed (seed defaults intact)" +fi + +# Same for every other systemd-surface subcommand, since the seed defaults +# are the only thing keeping these safe under `set -u` if probe_host is ever +# bypassed. +for sub in stop restart enable disable status install uninstall logs; do + out=$( + NO_COLOR=1 \ + _LUCEBOX_HOST_PROBED=1 \ + timeout 5 bash "$SCRIPT" "$sub" -n 0 --no-pager 2>&1 || true + ) + if grep -qE 'unbound variable|syntax error' <<<"$out"; then + report fail "$sub with probe_host bypassed" "leak: $(head -3 <<<"$out")" + else + report ok "$sub with probe_host bypassed" + fi +done + +# ── 8c. Install path writes a robust unit file. Use a sandbox HOME so we +# don't clobber the developer's real ~/.config/systemd/user/lucebox.service, +# and verify the generated unit contains the Environment= / ExecStartPre= +# hardening that Bug 2 ("systemctl start succeeds but no container") added. +# The install runs in a host with no real systemd (the sandbox doesn't have +# `systemctl --user`), so we pre-seed LUCEBOX_HOST_HAS_SYSTEMD=1 to slip past +# the require_systemd gate, then stub out the `systemctl` binary itself so +# daemon-reload is a no-op. +test_install_writes_robust_unit() { + local label="install writes hardened unit file" + local sandbox shim_dir + sandbox=$(mktemp -d) + shim_dir="$sandbox/bin" + mkdir -p "$shim_dir" + # Stub systemctl + docker + nvidia-smi + loginctl so the install's + # require_host_prereqs and daemon-reload calls all succeed. + for binname in systemctl docker nvidia-smi loginctl; do + cat > "$shim_dir/$binname" <<'STUB' +#!/usr/bin/env bash +case "$1" in + ps|version) exit 0 ;; + show-user) echo "Linger=no" ;; + --query-gpu=*) echo "Fake, 24576, 550.00, 8.9" ;; +esac +exit 0 +STUB + chmod +x "$shim_dir/$binname" + done + local out rc unit_path + unit_path="$sandbox/.config/systemd/user/lucebox.service" + out=$( + set +e + HOME="$sandbox" \ + XDG_CONFIG_HOME="$sandbox/.config" \ + XDG_DATA_HOME="$sandbox/.local/share" \ + PATH="$shim_dir:$PATH" \ + LUCEBOX_HOST_HAS_SYSTEMD=1 \ + LUCEBOX_HOST_HAS_DOCKER=1 \ + LUCEBOX_HOST_HAS_CTK=runtime \ + LUCEBOX_HOST_GPU_VENDOR=nvidia \ + _LUCEBOX_HOST_PROBED=1 \ + NO_COLOR=1 \ + timeout 10 bash "$SCRIPT" install 2>&1 + echo "RC=$?" + ) + rc=$(grep -oE 'RC=[0-9]+$' <<<"$out" | tail -1 | sed 's/^RC=//') + rc="${rc:-99}" + if [ "$rc" != "0" ]; then + report fail "$label" "exit $rc; output: $(head -10 <<<"$out")" + rm -rf "$sandbox" + return + fi + if [ ! -f "$unit_path" ]; then + report fail "$label" "unit file not written at $unit_path" + rm -rf "$sandbox" + return + fi + # Required hardening — each line is a Bug-2 root-cause defence: + # ExecStartPre=…docker rm -f … → clear orphaned container name + # Environment=PATH=… → systemd user-session PATH is sparse + # Environment=LUCEBOX_IMAGE=… → pin the image the user installed against + local missing="" + for needle in \ + "ExecStartPre=" \ + "Environment=PATH=" \ + "Environment=LUCEBOX_IMAGE=" \ + "Environment=LUCEBOX_VARIANT=" \ + "Environment=LUCEBOX_PORT=" \ + "Environment=LUCEBOX_MODELS=" \ + ; do + grep -qF "$needle" "$unit_path" || missing="$missing $needle" + done + if [ -n "$missing" ]; then + report fail "$label" "unit missing required directives:$missing" + rm -rf "$sandbox" + return + fi + report ok "$label" + rm -rf "$sandbox" +} +test_install_writes_robust_unit + +# ── 9. entrypoint.sh dispatch — confirm the in-container dispatch routes +# trivial subcommands (shell, an unknown passthrough) without firing +# `set -u` on DFLASH_* / DRAFT_* vars that only get assigned on the +# serve path. We can't fully exec the serve path here (it needs nvidia +# and the compiled binary) but we can confirm the early dispatch is clean. +# +# Each `exec` would actually try to run the underlying binary, which we +# don't have — so we shim it by overriding `exec` via a wrapper script. +# Easier: just confirm `bash -n` parses and run a tiny subset. +out=$(NO_COLOR=1 SUBCMD=help bash -c " + cd '$ROOT' + # Simulate 'docker run ... lucebox-hub:cuda12 shell echo ok' — entrypoint + # gets SUBCMD=shell and execs /bin/bash with the rest of argv. We replace + # exec via PATH so we don't actually exec. + tmpdir=\$(mktemp -d) + trap 'rm -rf \$tmpdir' EXIT + cat > \$tmpdir/uv <<'STUB' +#!/usr/bin/env bash +echo \"uv stub: \$*\" +exit 0 +STUB + chmod +x \$tmpdir/uv + PATH=\$tmpdir:\$PATH bash $ENTRYPOINT shell -c 'echo entrypoint-shell-dispatched' +" 2>&1 || true) +if grep -qE 'unbound variable|syntax error' <<<"$out"; then + report fail "entrypoint shell dispatch (no set -u leak)" "leak: $(head -5 <<<"$out")" +else + report ok "entrypoint shell dispatch (no set -u leak)" +fi + +# ── 10. entrypoint.sh serve-path under `set -u` — drive the REAL +# server/scripts/entrypoint.sh through its full draft-resolution block by +# sandboxing it with a synthetic DFLASH_DIR layout and a `dflash_server` +# shim that captures argv instead of execing the native binary. The +# `DRAFT_FAMILY_GLOB: unbound variable` bug fired precisely here — the +# previous version of this test inlined the block instead of sourcing +# the real file, and silently passed even when the shipped script was +# broken. So this test invokes server/scripts/entrypoint.sh directly. +# Build the shared entrypoint-serve sandbox: a synthetic DFLASH_DIR layout +# plus the `dflash_server` + `nvidia-smi` shims used by the three serve-path +# tests below. Assigns sandbox/models_dir/draft_dir/bin_dir/shim_dir into the +# CALLER'S scope (bash dynamic scoping) — the caller must `local`-declare +# them first. Mirrors the _make_docker_shim factoring above. +_make_entrypoint_sandbox() { + sandbox=$(mktemp -d) + models_dir="$sandbox/models" + draft_dir="$models_dir/draft" + bin_dir="$sandbox/build" + shim_dir="$sandbox/bin" + mkdir -p "$draft_dir" "$bin_dir" "$shim_dir" + # `dflash_server` shim — print argv and exit 0 instead of running. + cat > "$bin_dir/dflash_server" <<'STUB' +#!/usr/bin/env bash +printf '[shim] dflash_server' +for a in "$@"; do printf ' %q' "$a"; done +printf '\n' +exit 0 +STUB + chmod +x "$bin_dir/dflash_server" + # `nvidia-smi` shim — pretend we have a 24 GB GPU so the autotune + # block runs but doesn't pick the under-12-GB warn tier. + cat > "$shim_dir/nvidia-smi" <<'STUB' +#!/usr/bin/env bash +case "$*" in + *"--query-gpu=memory.total"*) echo 24576 ;; + -L|*-L*) echo "GPU 0: Fake (UUID: 0)" ;; + *) echo "ok" ;; +esac +exit 0 +STUB + chmod +x "$shim_dir/nvidia-smi" +} + +test_entrypoint_serve_path() { + local label="$1" target_name="$2" draft_file="$3" + local sandbox draft_dir models_dir bin_dir shim_dir + _make_entrypoint_sandbox + # Synthetic target (must be a real file at least 5 GB to pass the + # auto-detect block, OR we set DFLASH_TARGET explicitly to skip it). + touch "$models_dir/$target_name" + touch "$draft_dir/$draft_file" + + local out rc + out=$( + set +e + PATH="$shim_dir:$PATH" \ + DFLASH_DIR="$sandbox" \ + DFLASH_SERVER_BIN="$bin_dir/dflash_server" \ + DFLASH_TARGET="$models_dir/$target_name" \ + DFLASH_DRAFT="$draft_dir" \ + timeout 10 bash "$ENTRYPOINT" serve 2>&1 + echo "RC=$?" + ) + rc=$(grep -oE 'RC=[0-9]+$' <<<"$out" | tail -1 | sed 's/^RC=//') + rc="${rc:-99}" + rm -rf "$sandbox" + if grep -qE 'unbound variable|syntax error' <<<"$out"; then + report fail "$label" "leak: $(head -5 <<<"$out")" + elif [ "$rc" != "0" ]; then + report fail "$label" "exit $rc; output: $(head -5 <<<"$out")" + elif ! grep -qF "[shim] dflash_server" <<<"$out"; then + report fail "$label" "shim never executed; output: $(head -5 <<<"$out")" + else + report ok "$label" + fi +} + +# Exercise three branches of the family-glob logic: qwen3.6 + gemma-4 (the +# two families with family-specific globs) and an unknown target that +# triggers the empty-FAMILY_GLOBS fallback to the generic glob list. +test_entrypoint_serve_path "entrypoint serve: qwen3.6 family match" \ + "Qwen3.6-27B-Q4_K_M.gguf" "dflash-draft-3.6-test.gguf" +test_entrypoint_serve_path "entrypoint serve: gemma-4-31b family match" \ + "gemma-4-31B-it-Q8_0.gguf" "gemma-4-31b-dflash-q8.gguf" +test_entrypoint_serve_path "entrypoint serve: generic fallback" \ + "Mystery-Model-7B.gguf" "model.gguf" + +# ── 11. entrypoint.sh serve-path with MULTIPLE target-sized GGUFs in +# models/. The single-candidate fixture in test 10 doesn't exercise the +# auto-detect path that picks "first alphabetically" when more than one +# target ≥5 GB lives in the models dir — that path is what the sindri +# decode sweep tripped over after the user added the qwen3.6-moe preset +# (commit 4b6bced) alongside the existing Qwen3.6-27B target. The crash +# manifested as `DRAFT_FAMILY_GLOB: unbound variable`, and the partial +# fix in a87bb93 didn't survive a recurrence. +# +# Uses sparse files (`truncate -s 6G`) so the test stays cheap on disk — +# the 6 GB virtual size is enough to clear the find ... -size +5G filter +# without consuming actual blocks. Skip if truncate is missing (e.g. +# minimal busybox CI image). +test_entrypoint_multi_target() { + local label="$1" + shift + if ! command -v truncate &>/dev/null; then + report ok "$label (skipped: truncate not available)" + return + fi + local sandbox draft_dir models_dir bin_dir shim_dir + _make_entrypoint_sandbox + # Two qwen3.6-shaped targets ≥5 GB each — exactly the layout that + # broke on sindri (Qwen3.6-27B + Qwen3.6-35B-A3B-UD-Q4_K_M). + truncate -s 6G "$models_dir/Qwen3.6-27B-Q4_K_M.gguf" + truncate -s 6G "$models_dir/Qwen3.6-35B-A3B-UD-Q4_K_M.gguf" + touch "$draft_dir/dflash-draft-3.6-test.gguf" + + local out rc + out=$( + set +e + # NOTE: deliberately NOT setting DFLASH_TARGET — the test must + # exercise the auto-detect block (line ~151). The explicit-config + # workaround from the bug report would skip the bug entirely. + PATH="$shim_dir:$PATH" \ + DFLASH_DIR="$sandbox" \ + DFLASH_SERVER_BIN="$bin_dir/dflash_server" \ + DFLASH_DRAFT="$draft_dir" \ + timeout 10 bash "$ENTRYPOINT" serve 2>&1 + echo "RC=$?" + ) + rc=$(grep -oE 'RC=[0-9]+$' <<<"$out" | tail -1 | sed 's/^RC=//') + rc="${rc:-99}" + rm -rf "$sandbox" + # The auto-detect block is entered (so any `set -u` regression on + # DRAFT_FAMILY_GLOB will trip) and then the entrypoint refuses to + # auto-pick — the deliberate safety added in PR #334's cubic round. + # We require: no set-u leak, the refuse warn fired, a non-zero exit + # (so a future regression that logs the warning but still returns 0 + # cannot slip past — the container MUST fail to start, not silently + # auto-pick a stale GGUF), and the shim was NOT exec'd. + if grep -qE 'unbound variable|syntax error' <<<"$out"; then + report fail "$label" "leak: $(grep -E 'unbound variable|syntax error' <<<"$out" | head -3)" + elif ! grep -qF "Refusing to auto-select" <<<"$out"; then + report fail "$label" "refuse-to-auto-pick warn missing — did the auto-detect block fire? rc=$rc output: $(head -5 <<<"$out")" + elif [ "$rc" = "0" ]; then + report fail "$label" "refuse warn fired but rc=0 — entrypoint must exit non-zero on multi-target refuse" + elif grep -qF "[shim] dflash_server" <<<"$out"; then + report fail "$label" "shim was exec'd despite multi-target refuse" + else + report ok "$label" + fi +} + +# Drive the regression: the sindri layout that broke (post-moe-preset). +test_entrypoint_multi_target "entrypoint serve: multi-target auto-detect (no DRAFT_FAMILY_GLOB leak)" + +# Also drive the DFLASH_DRAFT-is-a-file path. The init at entrypoint.sh:257 +# sits inside `if [ -d "$DFLASH_DRAFT" ]; then` — when DRAFT is a file the +# block is skipped, and any future read of DRAFT_FAMILY_GLOB outside the +# block would trip set -u. The defensive `:-` guard at the read site is +# meant to survive that refactor; this test guarantees it. +test_entrypoint_draft_is_file() { + local label="$1" + local sandbox draft_dir models_dir bin_dir shim_dir + _make_entrypoint_sandbox + touch "$models_dir/Qwen3.6-27B-Q4_K_M.gguf" + # DFLASH_DRAFT points at a FILE (not a directory). + touch "$draft_dir/dflash-draft-3.6-test.gguf" + + local out rc + out=$( + set +e + PATH="$shim_dir:$PATH" \ + DFLASH_DIR="$sandbox" \ + DFLASH_SERVER_BIN="$bin_dir/dflash_server" \ + DFLASH_TARGET="$models_dir/Qwen3.6-27B-Q4_K_M.gguf" \ + DFLASH_DRAFT="$draft_dir/dflash-draft-3.6-test.gguf" \ + timeout 10 bash "$ENTRYPOINT" serve 2>&1 + echo "RC=$?" + ) + rc=$(grep -oE 'RC=[0-9]+$' <<<"$out" | tail -1 | sed 's/^RC=//') + rc="${rc:-99}" + rm -rf "$sandbox" + if grep -qE 'unbound variable|syntax error' <<<"$out"; then + report fail "$label" "leak: $(grep -E 'unbound variable|syntax error' <<<"$out" | head -3)" + elif [ "$rc" != "0" ]; then + report fail "$label" "exit $rc; output: $(head -5 <<<"$out")" + else + report ok "$label" + fi +} +test_entrypoint_draft_is_file "entrypoint serve: DFLASH_DRAFT is a file (no DRAFT_FAMILY_GLOB leak)" + +# ── 12. entrypoint.sh writes HOST_INFO atomically on the serve path. The +# C++ server reads /opt/lucebox-hub/HOST_INFO into ServerConfig.host_info +# and surfaces it under /props.host. We can't write to /opt/lucebox-hub +# from the test runner, so override the path by sourcing the helpers and +# calling _build_host_info_json directly. The full entrypoint runs in +# test 10/11 already; this test pins the JSON shape independently. +test_entrypoint_host_info_json() { + local label="$1" + # Source the helper functions from the real entrypoint.sh. + # shellcheck disable=SC1090 + source <(awk '/^_json_escape\(\) \{/,/^\}/' "$ENTRYPOINT") + # shellcheck disable=SC1090 + source <(awk '/^_json_str_or_null\(\) \{/,/^\}/' "$ENTRYPOINT") + # shellcheck disable=SC1090 + source <(awk '/^_json_int_or_null\(\) \{/,/^\}/' "$ENTRYPOINT") + # shellcheck disable=SC1090 + source <(awk '/^_trim\(\) \{/,/^\}/' "$ENTRYPOINT") + # shellcheck disable=SC1090 + source <(awk '/^_emit_gpu_array\(\) \{/,/^\}/' "$ENTRYPOINT") + # shellcheck disable=SC1090 + source <(awk '/^_build_host_info_json\(\) \{/,/^\}/' "$ENTRYPOINT") + + local out + LUCEBOX_HOST_OS_PRETTY="Ubuntu 22.04.3 LTS" \ + LUCEBOX_HOST_KERNEL="6.6.87.2-microsoft-standard-WSL2" \ + LUCEBOX_HOST_WSL_VERSION="wsl2" \ + LUCEBOX_HOST_DOCKER_VERSION="29.1.3" \ + LUCEBOX_HOST_DRIVER_VERSION="596.36" \ + LUCEBOX_HOST_NVIDIA_CTK_VERSION="1.16.2" \ + LUCEBOX_HOST_CPU_MODEL='Intel(R) Core(TM) Ultra 9 275HX' \ + LUCEBOX_HOST_NPROC=24 \ + LUCEBOX_HOST_RAM_GB=64 \ + LUCEBOX_HOST_GPU_LIST_CSV="0, GPU-abc, 00000000:01:00.0, NVIDIA RTX 5090, 12.0, 24576 MiB, 175.00 W" \ + LUCEBOX_HOST_CUDA_VISIBLE_DEVICES="0" \ + out=$(_build_host_info_json "lucebox.sh" "lucebox.sh" "2026-05-28T20:31:42Z") + if ! python3 -c "import json,sys; d=json.loads(sys.argv[1]); assert d['os_pretty']=='Ubuntu 22.04.3 LTS'; assert d['wsl_version']=='wsl2'; assert d['nvidia_ctk_version']=='1.16.2'; assert d['source']=='lucebox.sh'; assert d['gpus'][0]['vram_gb']==24; assert d['gpus'][0]['name']=='NVIDIA RTX 5090'" "$out" >/dev/null 2>&1; then + report fail "$label (populated)" "JSON shape mismatch: $out" + return + fi + # Now drive the unknown path: every LUCEBOX_HOST_* unset → nulls and source=unknown. + out=$(env -i bash -c " + set -u + $(declare -f _json_escape _json_str_or_null _json_int_or_null _emit_gpu_array _build_host_info_json) + _build_host_info_json 'unknown' 'entrypoint.sh' '2026-05-28T20:31:42Z' + ") + if ! python3 -c "import json,sys; d=json.loads(sys.argv[1]); assert d['source']=='unknown'; assert d['gpus']==[]; assert d['os_pretty'] is None" "$out" >/dev/null 2>&1; then + report fail "$label (unknown)" "JSON shape mismatch: $out" + return + fi + report ok "$label" +} +test_entrypoint_host_info_json "entrypoint HOST_INFO JSON shape (populated + unknown)" + +# ── install.sh end-to-end ───────────────────────────────────────────────── +# Drive install.sh against a file:// URL pointing at a fixture lucebox.sh, +# verify the installed copy has LUCEBOX_INSTALLED_FROM rewritten to the +# fetched URL — that's the contract that `lucebox update` depends on to +# preserve the user's channel across upgrades. +test_install_sh_bakes_source_url() { + local label="$1" + local tmp dest_dir dest_path src_url out rc + tmp=$(mktemp -d -t lucebox-install.XXXXXX) + # Use the real lucebox.sh as the "remote" file — `file://` works with + # curl out of the box and exercises the same install.sh code path as + # an https fetch would. + src_url="file://$SCRIPT" + dest_dir="$tmp/bin" + dest_path="$dest_dir/lucebox" + out=$(LUCEBOX_INSTALL_URL="$src_url" LUCEBOX_INSTALL_DEST="$dest_path" \ + NO_COLOR=1 bash "$INSTALLER" 2>&1) || rc=$? + rc="${rc:-0}" + if [ "$rc" -ne 0 ]; then + rm -rf "$tmp" + report fail "$label" "installer exited $rc; output: $(printf '%s' "$out" | head -3)" + return + fi + if [ ! -x "$dest_path" ]; then + rm -rf "$tmp" + report fail "$label" "installed file missing or not executable at $dest_path" + return + fi + if ! grep -q "^LUCEBOX_INSTALLED_FROM=\"$src_url\"$" "$dest_path"; then + rm -rf "$tmp" + report fail "$label" "LUCEBOX_INSTALLED_FROM not rewritten in installed copy" + return + fi + rm -rf "$tmp" + report ok "$label" +} +test_install_sh_bakes_source_url "install.sh bakes LUCEBOX_INSTALLED_FROM into installed copy" + +# ── update dispatch ─────────────────────────────────────────────────────── +# `lucebox update` must dispatch to cmd_update — verify it's wired in the +# main case statement and appears in --help. We can't actually run the +# update (it'd curl + replace this very script) so the test is parse-level. +test_update_subcommand_wired() { + local label="$1" + local out + out=$(LUCEBOX_HOST_HAS_SYSTEMD=0 "$SCRIPT" --help 2>&1) + if ! grep -q '^ update ' <<<"$out"; then + report fail "$label" "update command missing from --help output" + return + fi + if ! grep -q '^[[:space:]]*update)[[:space:]]*cmd_update' "$SCRIPT"; then + report fail "$label" "update) → cmd_update dispatch not wired" + return + fi + report ok "$label" +} +test_update_subcommand_wired "lucebox update subcommand is wired" + +# ── IMAGE_BASE derived from install source ──────────────────────────────── +# Source lucebox.sh in a subshell with LUCEBOX_INSTALLED_FROM pointing at +# various URLs, then check that IMAGE_BASE comes out right. Uses +# `set -e; return` early so we don't actually run the wrapper's main(). +test_image_base_derives_from_install_url() { + local label="$1" url expected got + for case in \ + "https://raw.githubusercontent.com/easel/lucebox-hub/feat/lucebox-docker/lucebox.sh|ghcr.io/easel/lucebox-hub" \ + "https://raw.githubusercontent.com/Luce-Org/lucebox-hub/main/lucebox.sh|ghcr.io/luce-org/lucebox-hub" \ + "https://raw.githubusercontent.com/easel/lucebox-hub/601ab52/lucebox.sh|ghcr.io/easel/lucebox-hub" \ + "https://example.com/bogus|ghcr.io/luce-org/lucebox-hub" + do + url="${case%%|*}" + expected="${case##*|}" + # Extract the derivation function from the script and run it in + # isolation — sourcing the whole script triggers main() and side + # effects we don't want under a test harness. + got=$(bash -c ' + '"$(sed -n "/^_lucebox_derive_image()/,/^}/p" "$SCRIPT")"' + _lucebox_derive_image "$1" + ' bash "$url") + if [ "$got" != "$expected" ]; then + report fail "$label" "url=$url expected=$expected got=$got" + return + fi + done + report ok "$label" +} +test_image_base_derives_from_install_url "IMAGE_BASE derived from LUCEBOX_INSTALLED_FROM (4 URL shapes)" + +# ── config.toml reader + resolver ───────────────────────────────────────── +# Drive _lucebox_config_get + _lucebox_resolve against a fixture +# config.toml in a tmp $LUCEBOX_HOME. Verifies the wrapper agrees with +# the Python CLI on every scalar that lives in [image]/[runtime]/[paths]. +test_config_toml_reader_and_resolve() { + local label="$1" tmp got + tmp=$(mktemp -d -t lucebox-cfg.XXXXXX) + cat > "$tmp/config.toml" <<'TOML' +[image] +variant = "cuda13" +registry = "ghcr.io/myorg/forkedhub" + +[runtime] +port = 9090 +container_name = "luce-test" + +[paths] +models = "/srv/models" + +[dflash] +budget = 22 +lazy = false +TOML + + # Exercise both helpers + the resolver via a subshell that sources + # the relevant snippets out of lucebox.sh. Each case is a triple: + # env_value | toml_key | default | expected + local cases=( + "|image.registry|ghcr.io/luce-org/lucebox-hub|ghcr.io/myorg/forkedhub" + "|image.variant|cuda12|cuda13" + "|runtime.port|8080|9090" + "|runtime.container_name|lucebox|luce-test" + "|paths.models|/var/lib/lucebox|/srv/models" + "OVERRIDE|image.registry|ghcr.io/luce-org/lucebox-hub|OVERRIDE" + "|missing.key|fallback-default|fallback-default" + ) + local case env_value toml_key default expected + for case in "${cases[@]}"; do + IFS='|' read -r env_value toml_key default expected <<<"$case" + got=$(LUCEBOX_HOME="$tmp" bash -c ' + '"$(sed -n "/^_lucebox_config_path()/,/^}/p" "$SCRIPT")"' + '"$(sed -n "/^_lucebox_config_get()/,/^}/p" "$SCRIPT")"' + '"$(sed -n "/^_lucebox_resolve()/,/^}/p" "$SCRIPT")"' + _lucebox_resolve "$1" "$2" "$3" + ' bash "$env_value" "$toml_key" "$default") + if [ "$got" != "$expected" ]; then + rm -rf "$tmp" + report fail "$label" "env=$env_value key=$toml_key default=$default expected=$expected got=$got" + return + fi + done + rm -rf "$tmp" + report ok "$label" +} +test_config_toml_reader_and_resolve "config.toml reader + env > toml > default resolution (7 cases)" + +# ── cmd_serve under systemd: INVOCATION_ID short-circuits is-active ────── +# When systemd invokes the wrapper as a unit's ExecStart, it sets +# $INVOCATION_ID. The wrapper must NOT then refuse "already running under +# systemd" — that's a self-defeating check that turns into a restart loop. +# Verify the guard is present in the source (the actual behavior requires +# a running systemd unit to test end-to-end, which the harness can't do). +test_cmd_serve_invocation_id_guard() { + local label="$1" + if ! grep -q 'INVOCATION_ID' "$SCRIPT"; then + report fail "$label" "INVOCATION_ID guard missing from cmd_serve preflight" + return + fi + # The guard must be the AND-condition gating the is-active check. + # If grep finds the is-active line WITHOUT INVOCATION_ID nearby, + # the guard isn't wired correctly. + if ! awk ' + /INVOCATION_ID/ { saw_guard = NR } + /is-active --quiet "\$UNIT_NAME"/ { + if (saw_guard && NR - saw_guard <= 3) found = 1 + } + END { exit (found ? 0 : 1) } + ' "$SCRIPT"; then + report fail "$label" "INVOCATION_ID not adjacent to is-active check (guard not wired)" + return + fi + report ok "$label" +} +test_cmd_serve_invocation_id_guard "cmd_serve has INVOCATION_ID guard on systemd is-active check" + +# ── cmd_systemctl_passthrough: smart start ─────────────────────────────── +# Verify the source has the "already active" + "restart loop" short +# circuits for the start action. Behavior-level testing requires a real +# unit; this is a source-level guarantee that the branches exist. +test_cmd_start_already_active_shortcircuit() { + local label="$1" + if ! grep -q 'is already active' "$SCRIPT"; then + report fail "$label" "already-active short-circuit missing" + return + fi + if ! grep -q 'is in restart-loop' "$SCRIPT"; then + report fail "$label" "restart-loop short-circuit missing" + return + fi + report ok "$label" +} +test_cmd_start_already_active_shortcircuit "lucebox start has already-active + restart-loop short-circuits" + +# ── install.sh SHA-pin refusal + CHANNEL override ──────────────────────── +# A SHA-pinned LUCEBOX_INSTALL_URL with no LUCEBOX_INSTALL_CHANNEL must +# refuse — otherwise `lucebox update` would re-fetch that frozen SHA +# forever. With CHANNEL set, the bake-in uses the channel URL, not the +# fetch URL. +test_install_sha_pin_refusal_and_channel_override() { + local label="$1" tmp got rc + tmp=$(mktemp -d -t lucebox-sha.XXXXXX) + + # Case 1: SHA-pinned URL without CHANNEL → must refuse + LUCEBOX_INSTALL_URL="https://raw.githubusercontent.com/easel/lucebox-hub/abc1234567/lucebox.sh" \ + LUCEBOX_INSTALL_DEST="$tmp/lucebox1" \ + NO_COLOR=1 \ + bash "$INSTALLER" >/dev/null 2>&1 && rc=0 || rc=$? + if [ "$rc" -eq 0 ]; then + rm -rf "$tmp" + report fail "$label" "SHA-pinned URL without CHANNEL should have refused (rc=$rc, got success)" + return + fi + if [ -f "$tmp/lucebox1" ]; then + rm -rf "$tmp" + report fail "$label" "SHA-pinned URL refusal still wrote $tmp/lucebox1" + return + fi + + # Case 2: SHA-pinned URL WITH CHANNEL → installs, bakes CHANNEL + LUCEBOX_INSTALL_URL="file://$SCRIPT" \ + LUCEBOX_INSTALL_CHANNEL="https://raw.githubusercontent.com/easel/lucebox-hub/feat/lucebox-docker/lucebox.sh" \ + LUCEBOX_INSTALL_DEST="$tmp/lucebox2" \ + NO_COLOR=1 \ + bash "$INSTALLER" >/dev/null 2>&1 || rc=$? + got=$(grep '^LUCEBOX_INSTALLED_FROM=' "$tmp/lucebox2" 2>/dev/null || echo missing) + if [ "$got" != 'LUCEBOX_INSTALLED_FROM="https://raw.githubusercontent.com/easel/lucebox-hub/feat/lucebox-docker/lucebox.sh"' ]; then + rm -rf "$tmp" + report fail "$label" "CHANNEL not baked; got: $got" + return + fi + + rm -rf "$tmp" + report ok "$label" +} +test_install_sha_pin_refusal_and_channel_override "install.sh refuses SHA-pin without CHANNEL + honors CHANNEL override" + +# ── lucebox completion ─────────────────────────────────────────────────── +# The completion script must source cleanly and complete a known prefix. +test_completion_bash() { + local label="$1" out + out=$(LUCEBOX_HOST_HAS_SYSTEMD=0 bash -c ' + source <("$1" completion bash 2>/dev/null) + COMP_WORDS=(lucebox conf) + COMP_CWORD=1 + _lucebox_complete + printf "%s\n" "${COMPREPLY[@]}" + ' bash "$SCRIPT") + if ! grep -qx 'config' <<<"$out"; then + report fail "$label" "completion didn't suggest 'config' for prefix 'conf'; got: $(printf '%s' "$out" | tr '\n' ' ')" + return + fi + report ok "$label" +} +test_completion_bash "lucebox completion bash completes a known prefix" + +# ── docker exec routing ─────────────────────────────────────────────────── +# When the lucebox container is running, steady-state subcommands must +# `docker exec` into it (cheap + shares the live server's net namespace) and +# service-restarting subcommands (serve, pull, ...) must stay on +# `docker run`. We mock docker via a PATH shim that: +# - on `docker ps -q -f name=^lucebox$` prints a fake container id +# (signals "container is running") iff DOCKER_FAKE_RUNNING=1. +# - on any other call (run, exec, pull, ...) echoes its argv on stdout and +# exits 0. The test then asserts on the captured first-token (run vs exec) +# and trailing argv. +# +# nvidia-smi is stubbed too so probe_host doesn't barf, but the captured argv +# we care about is the docker invocation downstream of dispatch. +_make_docker_shim() { + local sandbox="$1" running="$2" + local shim_dir="$sandbox/bin" + mkdir -p "$shim_dir" + # docker shim: dispatch on first arg. Important: ps -q -f name=^lucebox$ + # must print a fake id when DOCKER_FAKE_RUNNING=1 and nothing otherwise. + # All other invocations (run, exec, pull) print "DOCKER_INVOKED " + # on stdout so the caller can grep it. + cat > "$shim_dir/docker" < "$shim_dir/nvidia-smi" <<'STUB' +#!/usr/bin/env bash +case "$*" in + *"--query-gpu="*) echo "Fake GPU, 24576, 550.00, 8.9" ;; + *) echo "ok" ;; +esac +exit 0 +STUB + chmod +x "$shim_dir/nvidia-smi" +} + +# Drive the wrapper through the dispatch case under test and capture the +# docker invocation it would have exec'd. Because `cmd_in_container` / +# `cmd_exec_in_container` call `exec docker ...` we replace `exec` semantics +# by running the wrapper in a subshell — the docker shim prints what it was +# called with and the captured stdout is the proof. +_run_wrapper_capture_docker() { + local sandbox="$1"; shift + local shim_dir="$sandbox/bin" + set +e + HOME="$sandbox" \ + XDG_CONFIG_HOME="$sandbox/.config" \ + XDG_DATA_HOME="$sandbox/.local/share" \ + LUCEBOX_HOME="$sandbox/.lucebox" \ + PATH="$shim_dir:$PATH" \ + LUCEBOX_HOST_HAS_DOCKER=1 \ + LUCEBOX_HOST_HAS_CTK=runtime \ + LUCEBOX_HOST_GPU_VENDOR=nvidia \ + LUCEBOX_HOST_DRIVER_MAJOR=550 \ + LUCEBOX_HOST_DRIVER_VERSION="550.00" \ + LUCEBOX_HOST_GPU_NAME="Fake GPU" \ + LUCEBOX_HOST_GPU_COUNT=1 \ + LUCEBOX_HOST_VRAM_GB=24 \ + LUCEBOX_HOST_GPU_SM="89" \ + LUCEBOX_HOST_NPROC=8 \ + LUCEBOX_HOST_RAM_GB=64 \ + LUCEBOX_HOST_HAS_SYSTEMD=0 \ + LUCEBOX_HOST_IS_WSL=0 \ + LUCEBOX_HOST_DOCKER_VERSION="29.1.3" \ + _LUCEBOX_HOST_PROBED=1 \ + NO_COLOR=1 \ + timeout 10 bash "$SCRIPT" "$@" 2>&1 + set -e +} + +test_routes_to_exec_when_running() { + local label="$1" sandbox out + sandbox=$(mktemp -d -t lucebox-route.XXXXXX) + _make_docker_shim "$sandbox" 1 + out=$(_run_wrapper_capture_docker "$sandbox" config get model.preset || true) + rm -rf "$sandbox" + if ! grep -q '^DOCKER_INVOKED exec' <<<"$out"; then + report fail "$label" "expected 'docker exec' invocation; got: $(head -3 <<<"$out")" + return + fi + if grep -q '^DOCKER_INVOKED run' <<<"$out"; then + report fail "$label" "got 'docker run' when container is up — should have exec'd" + return + fi + # Sanity: the exec line ends with `lucebox config get model.preset`. + if ! grep -qE 'lucebox config get model.preset' <<<"$out"; then + report fail "$label" "exec argv missing tail 'lucebox config get model.preset'; got: $(head -3 <<<"$out")" + return + fi + # The exec path must forward the LUCEBOX_* scalar env subset (shared + # with the docker-run path via _append_scalar_env). Pin LUCEBOX_IMAGE= + # so a regression in that helper is caught here. + if ! grep -q 'LUCEBOX_IMAGE=' <<<"$out"; then + report fail "$label" "exec argv missing 'LUCEBOX_IMAGE=' scalar env; got: $(head -3 <<<"$out")" + return + fi + report ok "$label" +} +test_routes_to_exec_when_running "config get routes to docker exec when container running" + +test_routes_to_run_when_not_running() { + local label="$1" sandbox out + sandbox=$(mktemp -d -t lucebox-route.XXXXXX) + _make_docker_shim "$sandbox" 0 + out=$(_run_wrapper_capture_docker "$sandbox" config get model.preset || true) + rm -rf "$sandbox" + if ! grep -q '^DOCKER_INVOKED run' <<<"$out"; then + report fail "$label" "expected 'docker run' invocation (container not running); got: $(head -3 <<<"$out")" + return + fi + if grep -q '^DOCKER_INVOKED exec' <<<"$out"; then + report fail "$label" "got 'docker exec' but container is not running — should fall back to run" + return + fi + report ok "$label" +} +test_routes_to_run_when_not_running "config get falls back to docker run when container not running" + +test_no_exec_flag_forces_run() { + local label="$1" sandbox out + sandbox=$(mktemp -d -t lucebox-route.XXXXXX) + _make_docker_shim "$sandbox" 1 + # --no-exec must override the prefer-exec path even when container is up. + out=$(_run_wrapper_capture_docker "$sandbox" --no-exec config get model.preset || true) + rm -rf "$sandbox" + if grep -q '^DOCKER_INVOKED exec' <<<"$out"; then + report fail "$label" "--no-exec failed to force run path; got exec" + return + fi + if ! grep -q '^DOCKER_INVOKED run' <<<"$out"; then + report fail "$label" "expected 'docker run' under --no-exec; got: $(head -3 <<<"$out")" + return + fi + report ok "$label" +} +test_no_exec_flag_forces_run "--no-exec flag forces docker run even when container is up" + +test_no_exec_env_forces_run() { + local label="$1" sandbox out + sandbox=$(mktemp -d -t lucebox-route.XXXXXX) + _make_docker_shim "$sandbox" 1 + out=$( + LUCEBOX_NO_EXEC=1 _run_wrapper_capture_docker "$sandbox" config get model.preset || true + ) + rm -rf "$sandbox" + if grep -q '^DOCKER_INVOKED exec' <<<"$out"; then + report fail "$label" "LUCEBOX_NO_EXEC=1 failed to force run path; got exec" + return + fi + if ! grep -q '^DOCKER_INVOKED run' <<<"$out"; then + report fail "$label" "expected 'docker run' under LUCEBOX_NO_EXEC=1; got: $(head -3 <<<"$out")" + return + fi + report ok "$label" +} +test_no_exec_env_forces_run "LUCEBOX_NO_EXEC=1 env override forces docker run" + +test_models_routes_to_exec() { + local label="$1" sandbox out + sandbox=$(mktemp -d -t lucebox-route.XXXXXX) + _make_docker_shim "$sandbox" 1 + out=$(_run_wrapper_capture_docker "$sandbox" models list || true) + rm -rf "$sandbox" + if ! grep -q '^DOCKER_INVOKED exec' <<<"$out"; then + report fail "$label" "expected 'docker exec' for models when running; got: $(head -3 <<<"$out")" + return + fi + # Confirm the exec'd command tail is `lucebox models list` — the + # in-container CLI's argv must NOT be polluted with dispatcher bookkeeping. + if ! grep -qE 'lucebox models list' <<<"$out"; then + report fail "$label" "exec'd argv missing 'lucebox models list' tail" + return + fi + report ok "$label" +} +test_models_routes_to_exec "models list routes to docker exec when container running" + +# ── usage mentions exec-when-running ────────────────────────────────────── +test_usage_mentions_exec_routing() { + local label="$1" out + out=$(NO_COLOR=1 bash "$SCRIPT" --help 2>&1) + if ! grep -qi 'docker exec\|--no-exec' <<<"$out"; then + report fail "$label" "usage doesn't mention the exec routing / --no-exec flag" + return + fi + report ok "$label" +} +test_usage_mentions_exec_routing "usage documents docker exec routing + --no-exec flag" + +# ── TTY flag selection. Regression guard for the process-substitution bug: +# _set_tty_flags must run in the CALLER's scope so `[ -t 1 ]` inspects the +# real terminal. If it is ever moved back behind `< <(...)` or `$(...)`, +# fd 1 becomes a pipe and it emits -i even on a real tty, silently dropping +# docker's -t and breaking the interactive client TUIs (lucebox claude …). +# The rest of this suite runs non-tty, so only this test exercises the -it +# branch — via a real PTY allocated by python's pty.fork. +test_tty_flags_selection() { + local label="$1" fn out + fn=$(awk '/^_set_tty_flags\(\) \{/,/^\}/' "$SCRIPT") + + # (a) non-tty (stdin /dev/null, stdout a pipe) → -i + out=$(bash -c "$fn"$'\n''f=(); _set_tty_flags f; printf "%s" "${f[*]}"' /dev/null) + if [ "$out" != "-i" ]; then + report fail "$label" "non-tty expected -i, got '$out'" + return + fi + + # (b) real tty on fd0+fd1 (python pty.fork) → -it + out=$(python3 - "$SCRIPT" <<'PY' 2>/dev/null +import os, pty, re, sys +src = open(sys.argv[1]).read() +fn = re.search(r'^_set_tty_flags\(\) \{.*?^\}', src, re.S | re.M).group(0) +script = fn + '\nf=(); _set_tty_flags f; printf "TTYFLAG=%s\\n" "${f[*]}"\n' +pid, fd = pty.fork() +if pid == 0: + os.execvp("bash", ["bash", "-c", script]) +buf = b"" +try: + while True: + chunk = os.read(fd, 1024) + if not chunk: + break + buf += chunk +except OSError: + pass +os.waitpid(pid, 0) +m = re.search(rb"TTYFLAG=(\S+)", buf) +sys.stdout.write(m.group(1).decode() if m else "NONE") +PY +) + if [ "$out" != "-it" ]; then + report fail "$label" "real tty expected -it, got '$out'" + return + fi + report ok "$label" +} +test_tty_flags_selection "_set_tty_flags: -it on a real tty, -i otherwise" + +echo +if [ "$fail" -eq 0 ]; then + echo "[test_lucebox_sh] $pass passed, 0 failed" + exit 0 +else + echo "[test_lucebox_sh] $pass passed, $fail failed" >&2 + exit 1 +fi diff --git a/server/scripts/entrypoint.sh b/server/scripts/entrypoint.sh index f35e295c4..fe37b3cbe 100755 --- a/server/scripts/entrypoint.sh +++ b/server/scripts/entrypoint.sh @@ -73,6 +73,15 @@ esac # write-failure (read-only FS, etc.) gets a warning and we continue. write_host_info() { local target="/opt/lucebox-hub/HOST_INFO" + # If the target dir doesn't exist (e.g. running the entrypoint outside + # the canonical container layout: unit tests, plain `docker run` without + # a bind mount), don't try to write — bash's own "No such file or + # directory" complaint on the `> "$tmp"` redirect below would leak to + # stderr regardless of `2>/dev/null` (that suppresses the command's + # stderr, not the redirect itself). HOST_INFO is informational. + if [ ! -d "$(dirname "$target")" ]; then + return 0 + fi local tmp="${target}.tmp.$$" local collected_at collected_at=$(date -u +%FT%TZ 2>/dev/null || echo "") @@ -158,6 +167,15 @@ _json_int_or_null() { # `nvidia-smi --query-gpu=index,uuid,pci.bus_id,name,compute_cap,memory.total,power.limit # --format=csv,noheader` produced on the host) into a JSON # array. Empty CSV → "[]". Each row becomes one object. +# Strip leading/trailing whitespace from a string. Pure bash (no sed fork) +# via prefix/suffix removal of the longest run of spaces or tabs. +_trim() { + local s="$1" + s="${s#"${s%%[![:space:]]*}"}" # leading + s="${s%"${s##*[![:space:]]}"}" # trailing + printf '%s' "$s" +} + _emit_gpu_array() { local csv="${LUCEBOX_HOST_GPU_LIST_CSV:-}" if [ -z "$csv" ]; then @@ -173,13 +191,13 @@ _emit_gpu_array() { # split on `,` alone and trim whitespace per field so both forms parse. local idx uuid pci name cc mem plimit IFS=',' read -r idx uuid pci name cc mem plimit <<<"$line" - idx=$(printf '%s' "$idx" | sed 's/^[[:space:]]*//; s/[[:space:]]*$//') - uuid=$(printf '%s' "$uuid" | sed 's/^[[:space:]]*//; s/[[:space:]]*$//') - pci=$(printf '%s' "$pci" | sed 's/^[[:space:]]*//; s/[[:space:]]*$//') - name=$(printf '%s' "$name" | sed 's/^[[:space:]]*//; s/[[:space:]]*$//') - cc=$(printf '%s' "$cc" | sed 's/^[[:space:]]*//; s/[[:space:]]*$//') - mem=$(printf '%s' "$mem" | sed 's/^[[:space:]]*//; s/[[:space:]]*$//') - plimit=$(printf '%s' "$plimit" | sed 's/^[[:space:]]*//; s/[[:space:]]*$//') + idx=$(_trim "$idx") + uuid=$(_trim "$uuid") + pci=$(_trim "$pci") + name=$(_trim "$name") + cc=$(_trim "$cc") + mem=$(_trim "$mem") + plimit=$(_trim "$plimit") # Strip units. "24576 MiB" → 24576; "175.00 W" → 175 (truncate). local mem_mib vram_gb power_w mem_mib=$(printf '%s' "$mem" | awk '{print $1+0}') diff --git a/uv.lock b/uv.lock index fee8de0df..ba16922d8 100644 --- a/uv.lock +++ b/uv.lock @@ -9,6 +9,7 @@ resolution-markers = [ [manifest] members = [ + "lucebox", "lucebox-dflash", "lucebox-hub", "pflash", @@ -429,6 +430,26 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/59/67/a6739ac96e28b7855808bdb0370e250606104a859750d209e5a0716fe7ab/librt-0.11.0-cp312-cp312-win_arm64.whl", hash = "sha256:2f10cf143e4a9bb0f4f5af568a00df94a2d69ef41c2579584454bb0fe5cc642c", size = 103470, upload-time = "2026-05-10T18:16:10.369Z" }, ] +[[package]] +name = "lucebox" +source = { editable = "lucebox" } +dependencies = [ + { name = "httpx" }, + { name = "huggingface-hub" }, + { name = "rich" }, + { name = "tomli-w" }, + { name = "typer" }, +] + +[package.metadata] +requires-dist = [ + { name = "httpx", specifier = ">=0.27" }, + { name = "huggingface-hub", specifier = ">=0.27" }, + { name = "rich", specifier = ">=13" }, + { name = "tomli-w", specifier = ">=1.0" }, + { name = "typer", specifier = ">=0.12" }, +] + [[package]] name = "lucebox-dflash" version = "0.1.0" @@ -466,6 +487,7 @@ name = "lucebox-hub" version = "0.0.0" source = { virtual = "." } dependencies = [ + { name = "lucebox" }, { name = "lucebox-dflash" }, { name = "pflash" }, ] @@ -482,6 +504,7 @@ megakernel = [ [package.metadata] requires-dist = [ + { name = "lucebox", editable = "lucebox" }, { name = "lucebox-dflash", virtual = "server" }, { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.10,<2" }, { name = "pflash", editable = "optimizations/pflash" }, @@ -1124,6 +1147,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/72/f4/0de46cfa12cdcbcd464cc59fde36912af405696f687e53a091fb432f694c/tokenizers-0.22.2-cp39-abi3-win_arm64.whl", hash = "sha256:9ce725d22864a1e965217204946f830c37876eee3b2ba6fc6255e8e903d5fcbc", size = 2612133, upload-time = "2026-01-05T10:45:17.232Z" }, ] +[[package]] +name = "tomli-w" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/19/75/241269d1da26b624c0d5e110e8149093c759b7a286138f4efd61a60e75fe/tomli_w-1.2.0.tar.gz", hash = "sha256:2dd14fac5a47c27be9cd4c976af5a12d87fb1f0b4512f81d69cce3b35ae25021", size = 7184, upload-time = "2025-01-15T12:07:24.262Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/18/c86eb8e0202e32dd3df50d43d7ff9854f8e0603945ff398974c1d91ac1ef/tomli_w-1.2.0-py3-none-any.whl", hash = "sha256:188306098d013b691fcadc011abd66727d3c414c571bb01b1a174ba8c983cf90", size = 6675, upload-time = "2025-01-15T12:07:22.074Z" }, +] + [[package]] name = "torch" version = "2.11.0+cu128"