Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions dream-server/.env.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,38 @@
"type": "string",
"description": "Enable image generation in Open WebUI (requires ComfyUI)",
"default": "true"
},
"GPU_ASSIGNMENT_JSON_B64": {
"type": "string",
"description": "Base64-encoded GPU assignment JSON"
},
"LLAMA_SERVER_GPU_UUIDS": {
"type": "string",
"description": "GPU UUIDs assigned to llama-server (comma-separated, used by NVIDIA_VISIBLE_DEVICES)"
},
"LLAMA_ARG_SPLIT_MODE": {
"type": "string",
"description": "llama.cpp split mode (LLAMA_ARG_SPLIT_MODE): none | layer (pipeline) | row (tensor/hybrid)"
},
"LLAMA_ARG_TENSOR_SPLIT": {
"type": "string",
"description": "llama.cpp tensor split weights (LLAMA_ARG_TENSOR_SPLIT): comma-separated proportions e.g. 3,1"
},
"EMBEDDINGS_GPU_UUID": {
"type": "string",
"description": "GPU UUID assigned to embeddings service"
},
"COMFYUI_GPU_UUID": {
"type": "string",
"description": "GPU UUID assigned to ComfyUI"
},
"WHISPER_GPU_UUID": {
"type": "string",
"description": "GPU UUID assigned to Whisper"
},
"LLM_MODEL_SIZE_MB": {
"type": "integer",
"description": "Approximate model file size in MB (used for multi-GPU memory planning)"
}
}
}
2 changes: 2 additions & 0 deletions dream-server/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ Known-good version baselines: [`docs/KNOWN-GOOD-VERSIONS.md`](docs/KNOWN-GOOD-VE

## 5-Minute Quickstart (Linux)

> **Prerequisites:** `curl` and `jq` must be installed. The installer will auto-install `jq` if missing, but `curl` is required to fetch the installer itself.

```bash
# One-line install (Linux — NVIDIA or AMD)
curl -fsSL https://raw.githubusercontent.com/Light-Heart-Labs/DreamServer/v2.4.0/get-dream-server.sh | bash
Expand Down
12 changes: 12 additions & 0 deletions dream-server/docker-compose.multigpu.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
services:
llama-server:
environment:
NVIDIA_VISIBLE_DEVICES: "${LLAMA_SERVER_GPU_UUIDS:-all}"
LLAMA_ARG_SPLIT_MODE: "${LLAMA_ARG_SPLIT_MODE:-none}"
LLAMA_ARG_TENSOR_SPLIT: "${LLAMA_ARG_TENSOR_SPLIT:-}"
deploy:
resources:
reservations:
devices:
- driver: nvidia
capabilities: [gpu]
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
services:
comfyui:
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["${COMFYUI_GPU_UUID}"]
capabilities: [gpu]
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
services:
embeddings:
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["${EMBEDDINGS_GPU_UUID}"]
capabilities: [gpu]
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
services:
whisper:
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ["${WHISPER_GPU_UUID}"]
capabilities: [gpu]
3 changes: 2 additions & 1 deletion dream-server/installers/lib/compose-select.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# GPU backend, and capability profile
#
# Expects: SCRIPT_DIR, TIER, GPU_BACKEND, CAP_COMPOSE_OVERLAYS, LOG_FILE,
# log(), warn()
# GPU_COUNT, log(), warn()
# Provides: resolve_compose_config() → sets COMPOSE_FILE, COMPOSE_FLAGS
#
# Modder notes:
Expand Down Expand Up @@ -91,6 +91,7 @@ resolve_compose_config() {
--tier "$TIER" \
--gpu-backend "$GPU_BACKEND" \
--profile-overlays "${CAP_COMPOSE_OVERLAYS:-}" \
--gpu-count "${GPU_COUNT:-1}" \
--env 2>>"$LOG_FILE")"
load_env_from_output <<< "$COMPOSE_ENV"
fi
Expand Down
1 change: 1 addition & 0 deletions dream-server/installers/lib/constants.sh
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ BGRN='\033[1;32m' # Bright green — emphasis, success, headings
DGRN='\033[2;32m' # Dim green — secondary text, lore
AMB='\033[0;33m' # Amber — warnings, ETA labels
WHT='\033[1;37m' # White — key URLs
DIM='\033[2;37m' # Dim white
NC='\033[0m' # Reset
CURSOR='█' # Block cursor for typing

Expand Down
171 changes: 171 additions & 0 deletions dream-server/installers/lib/nvidia-topo.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
#!/usr/bin/env bash
# ============================================================================
# Dream Server Installer — NVIDIA GPU Topology Detection
# ============================================================================
# Part of: installers/lib/
# Purpose: Detect NVIDIA Multi-GPU topology as well as basic GPU info
# and return as JSON. Sourced by detection.sh and 03-features.sh.
#
# Expects: nvidia-smi, warn(), err(), LINK_RANK
# Provides: parse_nvidia_topo_matrix(), detect_nvidia_topo(), link_rank(),
# link_label(), get_rank()
#
# Modder notes:
# This script handles NVIDIA-specific topology detection including NVLink,
# PCIe, and NUMA relationships. It outputs structured JSON for consumption
# by the multi-GPU strategy selection logic.
# ============================================================================

link_rank() {
case "$1" in
NV4 | NV6 | NV8 | NV12 | NV18) echo 100 ;; # NVLink gen2/3
XGMI | XGMI2) echo 90 ;; # AMD Infinity Fabric
NV1 | NV2 | NV3) echo 80 ;; # NVLink gen1
MIG) echo 70 ;; # MIG instance, same die
PIX) echo 50 ;; # Same PCIe switch
PXB) echo 40 ;; # Multiple PCIe switches, same CPU
PHB) echo 30 ;; # PCIe host bridge
NODE) echo 20 ;; # Same NUMA, no direct bridge
SYS | SOC) echo 10 ;; # Cross-NUMA (SOC = old name for SYS)
*) echo 0 ;;
esac
}

link_label() {
case "$1" in
NV*) echo "NVLink" ;;
XGMI*) echo "InfinityFabric" ;;
MIG) echo "MIG-SameDie" ;;
PIX) echo "PCIe-SameSwitch" ;;
PXB) echo "PCIe-CrossSwitch" ;;
PHB) echo "PCIe-HostBridge" ;;
NODE) echo "SameNUMA-NoBridge" ;;
SYS | SOC) echo "CrossNUMA" ;;
X) echo "Self" ;;
*) echo "Unknown" ;;
esac
}
parse_nvidia_topo_matrix() {
# Returns JSON array of {gpu_a, gpu_b, link_type, link_label, rank}
local matrix
matrix=$(nvidia-smi topo -m 2>/dev/null) || {
warn "nvidia-smi topo -m failed"
echo "[]"
return
}

local header_line headers=()
header_line=$(echo "$matrix" | grep -E '^\s+GPU[0-9]' | head -1)
read -ra headers <<<"$header_line"

# Collect pairs as TSV, then convert to JSON via jq
local pairs_tsv=""

while IFS= read -r line; do
[[ "$line" =~ ^(GPU[0-9]+|NIC[0-9]+) ]] || continue
local row_label
row_label=$(echo "$line" | awk '{print $1}')
[[ "$row_label" =~ ^GPU ]] || continue # only GPU rows
local gpu_a="${row_label#GPU}"
local cells=()
read -ra cells <<<"$line"
# cells[0] = row label, cells[1..] = columns
for col_idx in "${!headers[@]}"; do
local col_header="${headers[$col_idx]}"
[[ "$col_header" =~ ^GPU ]] || continue
local gpu_b="${col_header#GPU}"
[[ "$gpu_a" == "$gpu_b" ]] && continue # skip self
[[ "$gpu_a" -ge "$gpu_b" ]] && continue # dedup (only A<B pairs)
local cell="${cells[$((col_idx + 1))]:-UNKNOWN}"
local rank
rank=$(link_rank "$cell")
local label
label=$(link_label "$cell")
pairs_tsv+="${gpu_a} ${gpu_b} ${cell} ${label} ${rank}"$'\n'
done
done <<<"$matrix"

if [[ -z "$pairs_tsv" ]]; then
echo "[]"
return
fi

printf '%s' "$pairs_tsv" | jq -Rn '[inputs | split("\t") | {
gpu_a: (.[0] | tonumber),
gpu_b: (.[1] | tonumber),
link_type: .[2],
link_label: .[3],
rank: (.[4] | tonumber)
}]'
}

detect_nvidia_topo() {
# Basic GPU list
local gpu_list
gpu_list=$(nvidia-smi --query-gpu=index,name,memory.total,pcie.link.gen.current,pcie.link.width.current,uuid \
--format=csv,noheader,nounits 2>/dev/null) || {
err "nvidia-smi query failed"
return 1
}

# Parse CSV into JSON array via jq
local gpus_json
gpus_json=$(echo "$gpu_list" | jq -Rn '[inputs | split(",") | map(gsub("^\\s+|\\s+$"; "")) | {
index: (.[0] | tonumber),
name: .[1],
memory_gb: ((.[2] | tonumber) / 1024 * 10 | round / 10),
pcie_gen: .[3],
pcie_width: .[4],
uuid: .[5]
}]')

local gpu_count
gpu_count=$(echo "$gpus_json" | jq 'length')

# MIG detection
local mig_mode="false"
if nvidia-smi -q 2>/dev/null | grep -q "MIG Mode.*Enabled"; then
mig_mode="true"
fi

# Driver version
local driver_ver
driver_ver=$(nvidia-smi --query-gpu=driver_version --format=csv,noheader 2>/dev/null | head -1 | xargs)

# Topology matrix
local topo_pairs
topo_pairs=$(parse_nvidia_topo_matrix)

# NUMA info
local numa_json="{}"
if command -v numactl &>/dev/null; then
local numa_nodes
numa_nodes=$(numactl --hardware 2>/dev/null | grep "^node [0-9]* cpus" | wc -l)
numa_json=$(jq -n --argjson n "$numa_nodes" '{nodes: $n}')
fi

# Compose final JSON
jq -n \
--arg vendor "nvidia" \
--argjson gpu_count "$gpu_count" \
--arg driver "$driver_ver" \
--argjson mig "$mig_mode" \
--argjson numa "$numa_json" \
--argjson gpus "$gpus_json" \
--argjson links "$topo_pairs" \
'{
vendor: $vendor,
gpu_count: $gpu_count,
driver_version: $driver,
mig_enabled: $mig,
numa: $numa,
gpus: $gpus,
links: $links
}'
}

# ============================================================================
# Topology lookup helpers (used by 03-features.sh custom assignment path)
# ============================================================================

get_rank() { echo "${LINK_RANK["$1,$2"]:-0}"; }
11 changes: 11 additions & 0 deletions dream-server/installers/lib/tier-map.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ resolve_tier_config() {
GGUF_URL=""
GGUF_SHA256=""
MAX_CONTEXT=200000
LLM_MODEL_SIZE_MB=0
;;
ARC)
# Intel Arc A770 (16 GB) and future Arc B-series (≥12 GB VRAM)
Expand All @@ -33,6 +34,7 @@ resolve_tier_config() {
GGUF_URL="https://huggingface.co/unsloth/Qwen3.5-9B-GGUF/resolve/main/Qwen3.5-9B-Q4_K_M.gguf"
GGUF_SHA256="03b74727a860a56338e042c4420bb3f04b2fec5734175f4cb9fa853daf52b7e8"
MAX_CONTEXT=32768
LLM_MODEL_SIZE_MB=5760 # Qwen3.5-9B-Q4_K_M (5.68 GB)
GPU_BACKEND="sycl"
N_GPU_LAYERS=99
;;
Expand All @@ -45,6 +47,7 @@ resolve_tier_config() {
GGUF_URL="https://huggingface.co/unsloth/Qwen3.5-4B-GGUF/resolve/main/Qwen3.5-4B-Q4_K_M.gguf"
GGUF_SHA256="00fe7986ff5f6b463e62455821146049db6f9313603938a70800d1fb69ef11a4"
MAX_CONTEXT=16384
LLM_MODEL_SIZE_MB=2870 # Qwen3.5-4B-Q4_K_M (2.74 GB)
GPU_BACKEND="sycl"
N_GPU_LAYERS=99
;;
Expand All @@ -55,6 +58,7 @@ resolve_tier_config() {
GGUF_URL="https://huggingface.co/unsloth/Qwen3-Coder-Next-GGUF/resolve/main/Qwen3-Coder-Next-Q4_K_M.gguf"
GGUF_SHA256="9e6032d2f3b50a60f17ce8bf5a1d85c71af9b53b89c7978020ae7c660f29b090"
MAX_CONTEXT=131072
LLM_MODEL_SIZE_MB=48500 # 48.5 GB per HF file listing
;;
SH_LARGE)
TIER_NAME="Strix Halo 90+"
Expand All @@ -63,6 +67,7 @@ resolve_tier_config() {
GGUF_URL="https://huggingface.co/unsloth/Qwen3-Coder-Next-GGUF/resolve/main/Qwen3-Coder-Next-Q4_K_M.gguf"
GGUF_SHA256="9e6032d2f3b50a60f17ce8bf5a1d85c71af9b53b89c7978020ae7c660f29b090"
MAX_CONTEXT=131072
LLM_MODEL_SIZE_MB=48500 # 48.5 GB per HF file listing
;;
SH_COMPACT)
TIER_NAME="Strix Halo Compact"
Expand All @@ -71,6 +76,7 @@ resolve_tier_config() {
GGUF_URL="https://huggingface.co/unsloth/Qwen3-30B-A3B-GGUF/resolve/main/Qwen3-30B-A3B-Q4_K_M.gguf"
GGUF_SHA256="9f1a24700a339b09c06009b729b5c809e0b64c213b8af5b711b3dbdfd0c5ba48"
MAX_CONTEXT=131072
LLM_MODEL_SIZE_MB=18600 # 18.6 GB per HF file listing
;;
0)
TIER_NAME="Lightweight"
Expand All @@ -79,6 +85,7 @@ resolve_tier_config() {
GGUF_URL="https://huggingface.co/unsloth/Qwen3.5-2B-GGUF/resolve/main/Qwen3.5-2B-Q4_K_M.gguf"
GGUF_SHA256=""
MAX_CONTEXT=8192
LLM_MODEL_SIZE_MB=1500 # Qwen3.5-2B-Q4_K_M (1.28 GB)
;;
1)
TIER_NAME="Entry Level"
Expand All @@ -87,6 +94,7 @@ resolve_tier_config() {
GGUF_URL="https://huggingface.co/unsloth/Qwen3.5-9B-GGUF/resolve/main/Qwen3.5-9B-Q4_K_M.gguf"
GGUF_SHA256="03b74727a860a56338e042c4420bb3f04b2fec5734175f4cb9fa853daf52b7e8"
MAX_CONTEXT=16384
LLM_MODEL_SIZE_MB=5760 # Qwen3.5-9B-Q4_K_M (5.68 GB)
;;
2)
TIER_NAME="Prosumer"
Expand All @@ -95,6 +103,7 @@ resolve_tier_config() {
GGUF_URL="https://huggingface.co/unsloth/Qwen3.5-9B-GGUF/resolve/main/Qwen3.5-9B-Q4_K_M.gguf"
GGUF_SHA256="03b74727a860a56338e042c4420bb3f04b2fec5734175f4cb9fa853daf52b7e8"
MAX_CONTEXT=32768
LLM_MODEL_SIZE_MB=5760 # Qwen3.5-9B-Q4_K_M (5.68 GB)
;;
3)
TIER_NAME="Pro"
Expand All @@ -103,6 +112,7 @@ resolve_tier_config() {
GGUF_URL="https://huggingface.co/unsloth/Qwen3.5-27B-GGUF/resolve/main/Qwen3.5-27B-Q4_K_M.gguf"
GGUF_SHA256="84b5f7f112156d63836a01a69dc3f11a6ba63b10a23b8ca7a7efaf52d5a2d806"
MAX_CONTEXT=32768
LLM_MODEL_SIZE_MB=16400 # Qwen3.5-27B-Q4_K_M (16.7 GB)
;;
4)
TIER_NAME="Enterprise"
Expand All @@ -111,6 +121,7 @@ resolve_tier_config() {
GGUF_URL="https://huggingface.co/unsloth/Qwen3-30B-A3B-GGUF/resolve/main/Qwen3-30B-A3B-Q4_K_M.gguf"
GGUF_SHA256="9f1a24700a339b09c06009b729b5c809e0b64c213b8af5b711b3dbdfd0c5ba48"
MAX_CONTEXT=131072
LLM_MODEL_SIZE_MB=18600 # 18.6 GB per HF file listing
;;
*)
error "Invalid tier: $TIER. Valid tiers: 0, 1, 2, 3, 4, CLOUD, NV_ULTRA, SH_LARGE, SH_COMPACT, ARC, ARC_LITE"
Expand Down
Loading
Loading