Light-Heart-Labs · Lightheartdevs · Mar 25, 2026 · Mar 11, 2026 · Mar 19, 2026 · Mar 20, 2026
diff --git a/dream-server/.env.schema.json b/dream-server/.env.schema.json
@@ -434,6 +434,38 @@
       "type": "string",
       "description": "Enable image generation in Open WebUI (requires ComfyUI)",
       "default": "true"
+    },
+    "GPU_ASSIGNMENT_JSON_B64": {
+      "type": "string",
+      "description": "Base64-encoded GPU assignment JSON"
+    },
+    "LLAMA_SERVER_GPU_UUIDS": {
+      "type": "string",
+      "description": "GPU UUIDs assigned to llama-server (comma-separated, used by NVIDIA_VISIBLE_DEVICES)"
+    },
+    "LLAMA_ARG_SPLIT_MODE": {
+      "type": "string",
+      "description": "llama.cpp split mode (LLAMA_ARG_SPLIT_MODE): none | layer (pipeline) | row (tensor/hybrid)"
+    },
+    "LLAMA_ARG_TENSOR_SPLIT": {
+      "type": "string",
+      "description": "llama.cpp tensor split weights (LLAMA_ARG_TENSOR_SPLIT): comma-separated proportions e.g. 3,1"
+    },
+    "EMBEDDINGS_GPU_UUID": {
+      "type": "string",
+      "description": "GPU UUID assigned to embeddings service"
+    },
+    "COMFYUI_GPU_UUID": {
+      "type": "string",
+      "description": "GPU UUID assigned to ComfyUI"
+    },
+    "WHISPER_GPU_UUID": {
+      "type": "string",
+      "description": "GPU UUID assigned to Whisper"
+    },
+    "LLM_MODEL_SIZE_MB": {
+      "type": "integer",
+      "description": "Approximate model file size in MB (used for multi-GPU memory planning)"
     }
   }
 }
diff --git a/dream-server/README.md b/dream-server/README.md
@@ -37,6 +37,8 @@ Known-good version baselines: [`docs/KNOWN-GOOD-VERSIONS.md`](docs/KNOWN-GOOD-VE
 
 ## 5-Minute Quickstart (Linux)
 
+> **Prerequisites:** `curl` and `jq` must be installed. The installer will auto-install `jq` if missing, but `curl` is required to fetch the installer itself.
+
 ```bash
 # One-line install (Linux — NVIDIA or AMD)
 curl -fsSL https://raw.githubusercontent.com/Light-Heart-Labs/DreamServer/v2.4.0/get-dream-server.sh | bash

diff --git a/dream-server/docker-compose.multigpu.yml b/dream-server/docker-compose.multigpu.yml
@@ -0,0 +1,12 @@
+services:
+  llama-server:
+    environment:
+      NVIDIA_VISIBLE_DEVICES: "${LLAMA_SERVER_GPU_UUIDS:-all}"
+      LLAMA_ARG_SPLIT_MODE: "${LLAMA_ARG_SPLIT_MODE:-none}"
+      LLAMA_ARG_TENSOR_SPLIT: "${LLAMA_ARG_TENSOR_SPLIT:-}"
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              capabilities: [gpu]
diff --git a/dream-server/extensions/services/comfyui/compose.multigpu.yaml b/dream-server/extensions/services/comfyui/compose.multigpu.yaml
@@ -0,0 +1,9 @@
+services:
+  comfyui:
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              device_ids: ["${COMFYUI_GPU_UUID}"]
+              capabilities: [gpu]
diff --git a/dream-server/extensions/services/embeddings/compose.multigpu.yaml b/dream-server/extensions/services/embeddings/compose.multigpu.yaml
@@ -0,0 +1,9 @@
+services:
+  embeddings:
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              device_ids: ["${EMBEDDINGS_GPU_UUID}"]
+              capabilities: [gpu]
diff --git a/dream-server/extensions/services/whisper/compose.multigpu.yaml b/dream-server/extensions/services/whisper/compose.multigpu.yaml
@@ -0,0 +1,9 @@
+services:
+  whisper:
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              device_ids: ["${WHISPER_GPU_UUID}"]
+              capabilities: [gpu]
diff --git a/dream-server/installers/lib/compose-select.sh b/dream-server/installers/lib/compose-select.sh
@@ -7,7 +7,7 @@
 #          GPU backend, and capability profile
 #
 # Expects: SCRIPT_DIR, TIER, GPU_BACKEND, CAP_COMPOSE_OVERLAYS, LOG_FILE,
-#           log(), warn()
+#          GPU_COUNT, log(), warn()
 # Provides: resolve_compose_config() → sets COMPOSE_FILE, COMPOSE_FLAGS
 #
 # Modder notes:
@@ -91,6 +91,7 @@ resolve_compose_config() {
             --tier "$TIER" \
             --gpu-backend "$GPU_BACKEND" \
             --profile-overlays "${CAP_COMPOSE_OVERLAYS:-}" \
+            --gpu-count "${GPU_COUNT:-1}" \
             --env 2>>"$LOG_FILE")"
         load_env_from_output <<< "$COMPOSE_ENV"
     fi

diff --git a/dream-server/installers/lib/constants.sh b/dream-server/installers/lib/constants.sh
@@ -49,6 +49,7 @@ BGRN='\033[1;32m'        # Bright green — emphasis, success, headings
 DGRN='\033[2;32m'        # Dim green — secondary text, lore
 AMB='\033[0;33m'         # Amber — warnings, ETA labels
 WHT='\033[1;37m'         # White — key URLs
+DIM='\033[2;37m'         # Dim white
 NC='\033[0m'             # Reset
 CURSOR='█'               # Block cursor for typing
 

diff --git a/dream-server/installers/lib/nvidia-topo.sh b/dream-server/installers/lib/nvidia-topo.sh
@@ -0,0 +1,171 @@
+#!/usr/bin/env bash
+# ============================================================================
+# Dream Server Installer — NVIDIA GPU Topology Detection
+# ============================================================================
+# Part of: installers/lib/
+# Purpose: Detect NVIDIA Multi-GPU topology as well as basic GPU info
+#          and return as JSON. Sourced by detection.sh and 03-features.sh.
+#
+# Expects: nvidia-smi, warn(), err(), LINK_RANK
+# Provides: parse_nvidia_topo_matrix(), detect_nvidia_topo(), link_rank(),
+#           link_label(), get_rank()
+#
+# Modder notes:
+#   This script handles NVIDIA-specific topology detection including NVLink,
+#   PCIe, and NUMA relationships. It outputs structured JSON for consumption
+#   by the multi-GPU strategy selection logic.
+# ============================================================================
+
+link_rank() {
+  case "$1" in
+  NV4 | NV6 | NV8 | NV12 | NV18)  echo 100 ;;   # NVLink gen2/3
+  XGMI | XGMI2)                   echo 90  ;;   # AMD Infinity Fabric
+  NV1 | NV2 | NV3)                echo 80  ;;   # NVLink gen1
+  MIG)                            echo 70  ;;   # MIG instance, same die
+  PIX)                            echo 50  ;;   # Same PCIe switch
+  PXB)                            echo 40  ;;   # Multiple PCIe switches, same CPU
+  PHB)                            echo 30  ;;   # PCIe host bridge
+  NODE)                           echo 20  ;;   # Same NUMA, no direct bridge
+  SYS | SOC)                      echo 10  ;;   # Cross-NUMA (SOC = old name for SYS)
+  *)                              echo 0   ;;
+  esac
+}
+
+link_label() {
+  case "$1" in
+  NV*)   echo "NVLink" ;;
+  XGMI*) echo "InfinityFabric" ;;
+  MIG)   echo "MIG-SameDie" ;;
+  PIX)   echo "PCIe-SameSwitch" ;;
+  PXB)   echo "PCIe-CrossSwitch" ;;
+  PHB)   echo "PCIe-HostBridge" ;;
+  NODE)  echo "SameNUMA-NoBridge" ;;
+  SYS | SOC) echo "CrossNUMA" ;;
+  X)     echo "Self" ;;
+  *)     echo "Unknown" ;;
+  esac
+}
+parse_nvidia_topo_matrix() {
+  # Returns JSON array of {gpu_a, gpu_b, link_type, link_label, rank}
+  local matrix
+  matrix=$(nvidia-smi topo -m 2>/dev/null) || {
+    warn "nvidia-smi topo -m failed"
+    echo "[]"
+    return
+  }
+
+  local header_line headers=()
+  header_line=$(echo "$matrix" | grep -E '^\s+GPU[0-9]' | head -1)
+  read -ra headers <<<"$header_line"
+
+  # Collect pairs as TSV, then convert to JSON via jq
+  local pairs_tsv=""
+
+  while IFS= read -r line; do
+    [[ "$line" =~ ^(GPU[0-9]+|NIC[0-9]+) ]] || continue
+    local row_label
+    row_label=$(echo "$line" | awk '{print $1}')
+    [[ "$row_label" =~ ^GPU ]] || continue # only GPU rows
+    local gpu_a="${row_label#GPU}"
+    local cells=()
+    read -ra cells <<<"$line"
+    # cells[0] = row label, cells[1..] = columns
+    for col_idx in "${!headers[@]}"; do
+      local col_header="${headers[$col_idx]}"
+      [[ "$col_header" =~ ^GPU ]] || continue
+      local gpu_b="${col_header#GPU}"
+      [[ "$gpu_a" == "$gpu_b" ]] && continue  # skip self
+      [[ "$gpu_a" -ge "$gpu_b" ]] && continue # dedup (only A<B pairs)
+      local cell="${cells[$((col_idx + 1))]:-UNKNOWN}"
+      local rank
+      rank=$(link_rank "$cell")
+      local label
+      label=$(link_label "$cell")
+      pairs_tsv+="${gpu_a}	${gpu_b}	${cell}	${label}	${rank}"$'\n'
+    done
+  done <<<"$matrix"
+
+  if [[ -z "$pairs_tsv" ]]; then
+    echo "[]"
+    return
+  fi
+
+  printf '%s' "$pairs_tsv" | jq -Rn '[inputs | split("\t") | {
+    gpu_a: (.[0] | tonumber),
+    gpu_b: (.[1] | tonumber),
+    link_type: .[2],
+    link_label: .[3],
+    rank: (.[4] | tonumber)
+  }]'
+}
+
+detect_nvidia_topo() {
+  # Basic GPU list
+  local gpu_list
+  gpu_list=$(nvidia-smi --query-gpu=index,name,memory.total,pcie.link.gen.current,pcie.link.width.current,uuid \
+    --format=csv,noheader,nounits 2>/dev/null) || {
+    err "nvidia-smi query failed"
+    return 1
+  }
+
+  # Parse CSV into JSON array via jq
+  local gpus_json
+  gpus_json=$(echo "$gpu_list" | jq -Rn '[inputs | split(",") | map(gsub("^\\s+|\\s+$"; "")) | {
+    index: (.[0] | tonumber),
+    name: .[1],
+    memory_gb: ((.[2] | tonumber) / 1024 * 10 | round / 10),
+    pcie_gen: .[3],
+    pcie_width: .[4],
+    uuid: .[5]
+  }]')
+
+  local gpu_count
+  gpu_count=$(echo "$gpus_json" | jq 'length')
+
+  # MIG detection
+  local mig_mode="false"
+  if nvidia-smi -q 2>/dev/null | grep -q "MIG Mode.*Enabled"; then
+    mig_mode="true"
+  fi
+
+  # Driver version
+  local driver_ver
+  driver_ver=$(nvidia-smi --query-gpu=driver_version --format=csv,noheader 2>/dev/null | head -1 | xargs)
+
+  # Topology matrix
+  local topo_pairs
+  topo_pairs=$(parse_nvidia_topo_matrix)
+
+  # NUMA info
+  local numa_json="{}"
+  if command -v numactl &>/dev/null; then
+    local numa_nodes
+    numa_nodes=$(numactl --hardware 2>/dev/null | grep "^node [0-9]* cpus" | wc -l)
+    numa_json=$(jq -n --argjson n "$numa_nodes" '{nodes: $n}')
+  fi
+
+  # Compose final JSON
+  jq -n \
+    --arg vendor "nvidia" \
+    --argjson gpu_count "$gpu_count" \
+    --arg driver "$driver_ver" \
+    --argjson mig "$mig_mode" \
+    --argjson numa "$numa_json" \
+    --argjson gpus "$gpus_json" \
+    --argjson links "$topo_pairs" \
+    '{
+      vendor: $vendor,
+      gpu_count: $gpu_count,
+      driver_version: $driver,
+      mig_enabled: $mig,
+      numa: $numa,
+      gpus: $gpus,
+      links: $links
+    }'
+}
+
+# ============================================================================
+# Topology lookup helpers (used by 03-features.sh custom assignment path)
+# ============================================================================
+
+get_rank()  { echo "${LINK_RANK["$1,$2"]:-0}"; }
diff --git a/dream-server/installers/lib/tier-map.sh b/dream-server/installers/lib/tier-map.sh
@@ -23,6 +23,7 @@ resolve_tier_config() {
             GGUF_URL=""
             GGUF_SHA256=""
             MAX_CONTEXT=200000
+            LLM_MODEL_SIZE_MB=0
             ;;
         ARC)
             # Intel Arc A770 (16 GB) and future Arc B-series (≥12 GB VRAM)
@@ -33,6 +34,7 @@ resolve_tier_config() {
             GGUF_URL="https://huggingface.co/unsloth/Qwen3.5-9B-GGUF/resolve/main/Qwen3.5-9B-Q4_K_M.gguf"
             GGUF_SHA256="03b74727a860a56338e042c4420bb3f04b2fec5734175f4cb9fa853daf52b7e8"
             MAX_CONTEXT=32768
+            LLM_MODEL_SIZE_MB=5760    # Qwen3.5-9B-Q4_K_M (5.68 GB)
             GPU_BACKEND="sycl"
             N_GPU_LAYERS=99
             ;;
@@ -45,6 +47,7 @@ resolve_tier_config() {
             GGUF_URL="https://huggingface.co/unsloth/Qwen3.5-4B-GGUF/resolve/main/Qwen3.5-4B-Q4_K_M.gguf"
             GGUF_SHA256="00fe7986ff5f6b463e62455821146049db6f9313603938a70800d1fb69ef11a4"
             MAX_CONTEXT=16384
+            LLM_MODEL_SIZE_MB=2870    # Qwen3.5-4B-Q4_K_M (2.74 GB)
             GPU_BACKEND="sycl"
             N_GPU_LAYERS=99
             ;;
@@ -55,6 +58,7 @@ resolve_tier_config() {
             GGUF_URL="https://huggingface.co/unsloth/Qwen3-Coder-Next-GGUF/resolve/main/Qwen3-Coder-Next-Q4_K_M.gguf"
             GGUF_SHA256="9e6032d2f3b50a60f17ce8bf5a1d85c71af9b53b89c7978020ae7c660f29b090"
             MAX_CONTEXT=131072
+            LLM_MODEL_SIZE_MB=48500   # 48.5 GB per HF file listing
             ;;
         SH_LARGE)
             TIER_NAME="Strix Halo 90+"
@@ -63,6 +67,7 @@ resolve_tier_config() {
             GGUF_URL="https://huggingface.co/unsloth/Qwen3-Coder-Next-GGUF/resolve/main/Qwen3-Coder-Next-Q4_K_M.gguf"
             GGUF_SHA256="9e6032d2f3b50a60f17ce8bf5a1d85c71af9b53b89c7978020ae7c660f29b090"
             MAX_CONTEXT=131072
+            LLM_MODEL_SIZE_MB=48500   # 48.5 GB per HF file listing
             ;;
         SH_COMPACT)
             TIER_NAME="Strix Halo Compact"
@@ -71,6 +76,7 @@ resolve_tier_config() {
             GGUF_URL="https://huggingface.co/unsloth/Qwen3-30B-A3B-GGUF/resolve/main/Qwen3-30B-A3B-Q4_K_M.gguf"
             GGUF_SHA256="9f1a24700a339b09c06009b729b5c809e0b64c213b8af5b711b3dbdfd0c5ba48"
             MAX_CONTEXT=131072
+            LLM_MODEL_SIZE_MB=18600   # 18.6 GB per HF file listing
             ;;
         0)
             TIER_NAME="Lightweight"
@@ -79,6 +85,7 @@ resolve_tier_config() {
             GGUF_URL="https://huggingface.co/unsloth/Qwen3.5-2B-GGUF/resolve/main/Qwen3.5-2B-Q4_K_M.gguf"
             GGUF_SHA256=""
             MAX_CONTEXT=8192
+            LLM_MODEL_SIZE_MB=1500    # Qwen3.5-2B-Q4_K_M (1.28 GB)
             ;;
         1)
             TIER_NAME="Entry Level"
@@ -87,6 +94,7 @@ resolve_tier_config() {
             GGUF_URL="https://huggingface.co/unsloth/Qwen3.5-9B-GGUF/resolve/main/Qwen3.5-9B-Q4_K_M.gguf"
             GGUF_SHA256="03b74727a860a56338e042c4420bb3f04b2fec5734175f4cb9fa853daf52b7e8"
             MAX_CONTEXT=16384
+            LLM_MODEL_SIZE_MB=5760    # Qwen3.5-9B-Q4_K_M (5.68 GB)
             ;;
         2)
             TIER_NAME="Prosumer"
@@ -95,6 +103,7 @@ resolve_tier_config() {
             GGUF_URL="https://huggingface.co/unsloth/Qwen3.5-9B-GGUF/resolve/main/Qwen3.5-9B-Q4_K_M.gguf"
             GGUF_SHA256="03b74727a860a56338e042c4420bb3f04b2fec5734175f4cb9fa853daf52b7e8"
             MAX_CONTEXT=32768
+            LLM_MODEL_SIZE_MB=5760    # Qwen3.5-9B-Q4_K_M (5.68 GB)
             ;;
         3)
             TIER_NAME="Pro"
@@ -103,6 +112,7 @@ resolve_tier_config() {
             GGUF_URL="https://huggingface.co/unsloth/Qwen3.5-27B-GGUF/resolve/main/Qwen3.5-27B-Q4_K_M.gguf"
             GGUF_SHA256="84b5f7f112156d63836a01a69dc3f11a6ba63b10a23b8ca7a7efaf52d5a2d806"
             MAX_CONTEXT=32768
+            LLM_MODEL_SIZE_MB=16400   # Qwen3.5-27B-Q4_K_M (16.7 GB)
             ;;
         4)
             TIER_NAME="Enterprise"
@@ -111,6 +121,7 @@ resolve_tier_config() {
             GGUF_URL="https://huggingface.co/unsloth/Qwen3-30B-A3B-GGUF/resolve/main/Qwen3-30B-A3B-Q4_K_M.gguf"
             GGUF_SHA256="9f1a24700a339b09c06009b729b5c809e0b64c213b8af5b711b3dbdfd0c5ba48"
             MAX_CONTEXT=131072
+            LLM_MODEL_SIZE_MB=18600   # 18.6 GB per HF file listing
             ;;
         *)
             error "Invalid tier: $TIER. Valid tiers: 0, 1, 2, 3, 4, CLOUD, NV_ULTRA, SH_LARGE, SH_COMPACT, ARC, ARC_LITE"