diff --git a/dream-server/.env.schema.json b/dream-server/.env.schema.json index d1af609b..94c4e5f0 100644 --- a/dream-server/.env.schema.json +++ b/dream-server/.env.schema.json @@ -434,6 +434,38 @@ "type": "string", "description": "Enable image generation in Open WebUI (requires ComfyUI)", "default": "true" + }, + "GPU_ASSIGNMENT_JSON_B64": { + "type": "string", + "description": "Base64-encoded GPU assignment JSON" + }, + "LLAMA_SERVER_GPU_UUIDS": { + "type": "string", + "description": "GPU UUIDs assigned to llama-server (comma-separated, used by NVIDIA_VISIBLE_DEVICES)" + }, + "LLAMA_ARG_SPLIT_MODE": { + "type": "string", + "description": "llama.cpp split mode (LLAMA_ARG_SPLIT_MODE): none | layer (pipeline) | row (tensor/hybrid)" + }, + "LLAMA_ARG_TENSOR_SPLIT": { + "type": "string", + "description": "llama.cpp tensor split weights (LLAMA_ARG_TENSOR_SPLIT): comma-separated proportions e.g. 3,1" + }, + "EMBEDDINGS_GPU_UUID": { + "type": "string", + "description": "GPU UUID assigned to embeddings service" + }, + "COMFYUI_GPU_UUID": { + "type": "string", + "description": "GPU UUID assigned to ComfyUI" + }, + "WHISPER_GPU_UUID": { + "type": "string", + "description": "GPU UUID assigned to Whisper" + }, + "LLM_MODEL_SIZE_MB": { + "type": "integer", + "description": "Approximate model file size in MB (used for multi-GPU memory planning)" } } } diff --git a/dream-server/README.md b/dream-server/README.md index dfd10c1c..03158529 100644 --- a/dream-server/README.md +++ b/dream-server/README.md @@ -37,6 +37,8 @@ Known-good version baselines: [`docs/KNOWN-GOOD-VERSIONS.md`](docs/KNOWN-GOOD-VE ## 5-Minute Quickstart (Linux) +> **Prerequisites:** `curl` and `jq` must be installed. The installer will auto-install `jq` if missing, but `curl` is required to fetch the installer itself. + ```bash # One-line install (Linux — NVIDIA or AMD) curl -fsSL https://raw.githubusercontent.com/Light-Heart-Labs/DreamServer/v2.4.0/get-dream-server.sh | bash diff --git a/dream-server/docker-compose.multigpu.yml b/dream-server/docker-compose.multigpu.yml new file mode 100644 index 00000000..750920f8 --- /dev/null +++ b/dream-server/docker-compose.multigpu.yml @@ -0,0 +1,12 @@ +services: + llama-server: + environment: + NVIDIA_VISIBLE_DEVICES: "${LLAMA_SERVER_GPU_UUIDS:-all}" + LLAMA_ARG_SPLIT_MODE: "${LLAMA_ARG_SPLIT_MODE:-none}" + LLAMA_ARG_TENSOR_SPLIT: "${LLAMA_ARG_TENSOR_SPLIT:-}" + deploy: + resources: + reservations: + devices: + - driver: nvidia + capabilities: [gpu] diff --git a/dream-server/extensions/services/comfyui/compose.multigpu.yaml b/dream-server/extensions/services/comfyui/compose.multigpu.yaml new file mode 100644 index 00000000..47c9f51b --- /dev/null +++ b/dream-server/extensions/services/comfyui/compose.multigpu.yaml @@ -0,0 +1,9 @@ +services: + comfyui: + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ["${COMFYUI_GPU_UUID}"] + capabilities: [gpu] diff --git a/dream-server/extensions/services/embeddings/compose.multigpu.yaml b/dream-server/extensions/services/embeddings/compose.multigpu.yaml new file mode 100644 index 00000000..e749bdfd --- /dev/null +++ b/dream-server/extensions/services/embeddings/compose.multigpu.yaml @@ -0,0 +1,9 @@ +services: + embeddings: + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ["${EMBEDDINGS_GPU_UUID}"] + capabilities: [gpu] diff --git a/dream-server/extensions/services/whisper/compose.multigpu.yaml b/dream-server/extensions/services/whisper/compose.multigpu.yaml new file mode 100644 index 00000000..51939a30 --- /dev/null +++ b/dream-server/extensions/services/whisper/compose.multigpu.yaml @@ -0,0 +1,9 @@ +services: + whisper: + deploy: + resources: + reservations: + devices: + - driver: nvidia + device_ids: ["${WHISPER_GPU_UUID}"] + capabilities: [gpu] diff --git a/dream-server/installers/lib/compose-select.sh b/dream-server/installers/lib/compose-select.sh index cd317d0d..be23e51b 100755 --- a/dream-server/installers/lib/compose-select.sh +++ b/dream-server/installers/lib/compose-select.sh @@ -7,7 +7,7 @@ # GPU backend, and capability profile # # Expects: SCRIPT_DIR, TIER, GPU_BACKEND, CAP_COMPOSE_OVERLAYS, LOG_FILE, -# log(), warn() +# GPU_COUNT, log(), warn() # Provides: resolve_compose_config() → sets COMPOSE_FILE, COMPOSE_FLAGS # # Modder notes: @@ -91,6 +91,7 @@ resolve_compose_config() { --tier "$TIER" \ --gpu-backend "$GPU_BACKEND" \ --profile-overlays "${CAP_COMPOSE_OVERLAYS:-}" \ + --gpu-count "${GPU_COUNT:-1}" \ --env 2>>"$LOG_FILE")" load_env_from_output <<< "$COMPOSE_ENV" fi diff --git a/dream-server/installers/lib/constants.sh b/dream-server/installers/lib/constants.sh index ff2177ab..5617e7b4 100755 --- a/dream-server/installers/lib/constants.sh +++ b/dream-server/installers/lib/constants.sh @@ -49,6 +49,7 @@ BGRN='\033[1;32m' # Bright green — emphasis, success, headings DGRN='\033[2;32m' # Dim green — secondary text, lore AMB='\033[0;33m' # Amber — warnings, ETA labels WHT='\033[1;37m' # White — key URLs +DIM='\033[2;37m' # Dim white NC='\033[0m' # Reset CURSOR='█' # Block cursor for typing diff --git a/dream-server/installers/lib/nvidia-topo.sh b/dream-server/installers/lib/nvidia-topo.sh new file mode 100644 index 00000000..7b3b5037 --- /dev/null +++ b/dream-server/installers/lib/nvidia-topo.sh @@ -0,0 +1,171 @@ +#!/usr/bin/env bash +# ============================================================================ +# Dream Server Installer — NVIDIA GPU Topology Detection +# ============================================================================ +# Part of: installers/lib/ +# Purpose: Detect NVIDIA Multi-GPU topology as well as basic GPU info +# and return as JSON. Sourced by detection.sh and 03-features.sh. +# +# Expects: nvidia-smi, warn(), err(), LINK_RANK +# Provides: parse_nvidia_topo_matrix(), detect_nvidia_topo(), link_rank(), +# link_label(), get_rank() +# +# Modder notes: +# This script handles NVIDIA-specific topology detection including NVLink, +# PCIe, and NUMA relationships. It outputs structured JSON for consumption +# by the multi-GPU strategy selection logic. +# ============================================================================ + +link_rank() { + case "$1" in + NV4 | NV6 | NV8 | NV12 | NV18) echo 100 ;; # NVLink gen2/3 + XGMI | XGMI2) echo 90 ;; # AMD Infinity Fabric + NV1 | NV2 | NV3) echo 80 ;; # NVLink gen1 + MIG) echo 70 ;; # MIG instance, same die + PIX) echo 50 ;; # Same PCIe switch + PXB) echo 40 ;; # Multiple PCIe switches, same CPU + PHB) echo 30 ;; # PCIe host bridge + NODE) echo 20 ;; # Same NUMA, no direct bridge + SYS | SOC) echo 10 ;; # Cross-NUMA (SOC = old name for SYS) + *) echo 0 ;; + esac +} + +link_label() { + case "$1" in + NV*) echo "NVLink" ;; + XGMI*) echo "InfinityFabric" ;; + MIG) echo "MIG-SameDie" ;; + PIX) echo "PCIe-SameSwitch" ;; + PXB) echo "PCIe-CrossSwitch" ;; + PHB) echo "PCIe-HostBridge" ;; + NODE) echo "SameNUMA-NoBridge" ;; + SYS | SOC) echo "CrossNUMA" ;; + X) echo "Self" ;; + *) echo "Unknown" ;; + esac +} +parse_nvidia_topo_matrix() { + # Returns JSON array of {gpu_a, gpu_b, link_type, link_label, rank} + local matrix + matrix=$(nvidia-smi topo -m 2>/dev/null) || { + warn "nvidia-smi topo -m failed" + echo "[]" + return + } + + local header_line headers=() + header_line=$(echo "$matrix" | grep -E '^\s+GPU[0-9]' | head -1) + read -ra headers <<<"$header_line" + + # Collect pairs as TSV, then convert to JSON via jq + local pairs_tsv="" + + while IFS= read -r line; do + [[ "$line" =~ ^(GPU[0-9]+|NIC[0-9]+) ]] || continue + local row_label + row_label=$(echo "$line" | awk '{print $1}') + [[ "$row_label" =~ ^GPU ]] || continue # only GPU rows + local gpu_a="${row_label#GPU}" + local cells=() + read -ra cells <<<"$line" + # cells[0] = row label, cells[1..] = columns + for col_idx in "${!headers[@]}"; do + local col_header="${headers[$col_idx]}" + [[ "$col_header" =~ ^GPU ]] || continue + local gpu_b="${col_header#GPU}" + [[ "$gpu_a" == "$gpu_b" ]] && continue # skip self + [[ "$gpu_a" -ge "$gpu_b" ]] && continue # dedup (only A/dev/null) || { + err "nvidia-smi query failed" + return 1 + } + + # Parse CSV into JSON array via jq + local gpus_json + gpus_json=$(echo "$gpu_list" | jq -Rn '[inputs | split(",") | map(gsub("^\\s+|\\s+$"; "")) | { + index: (.[0] | tonumber), + name: .[1], + memory_gb: ((.[2] | tonumber) / 1024 * 10 | round / 10), + pcie_gen: .[3], + pcie_width: .[4], + uuid: .[5] + }]') + + local gpu_count + gpu_count=$(echo "$gpus_json" | jq 'length') + + # MIG detection + local mig_mode="false" + if nvidia-smi -q 2>/dev/null | grep -q "MIG Mode.*Enabled"; then + mig_mode="true" + fi + + # Driver version + local driver_ver + driver_ver=$(nvidia-smi --query-gpu=driver_version --format=csv,noheader 2>/dev/null | head -1 | xargs) + + # Topology matrix + local topo_pairs + topo_pairs=$(parse_nvidia_topo_matrix) + + # NUMA info + local numa_json="{}" + if command -v numactl &>/dev/null; then + local numa_nodes + numa_nodes=$(numactl --hardware 2>/dev/null | grep "^node [0-9]* cpus" | wc -l) + numa_json=$(jq -n --argjson n "$numa_nodes" '{nodes: $n}') + fi + + # Compose final JSON + jq -n \ + --arg vendor "nvidia" \ + --argjson gpu_count "$gpu_count" \ + --arg driver "$driver_ver" \ + --argjson mig "$mig_mode" \ + --argjson numa "$numa_json" \ + --argjson gpus "$gpus_json" \ + --argjson links "$topo_pairs" \ + '{ + vendor: $vendor, + gpu_count: $gpu_count, + driver_version: $driver, + mig_enabled: $mig, + numa: $numa, + gpus: $gpus, + links: $links + }' +} + +# ============================================================================ +# Topology lookup helpers (used by 03-features.sh custom assignment path) +# ============================================================================ + +get_rank() { echo "${LINK_RANK["$1,$2"]:-0}"; } diff --git a/dream-server/installers/lib/tier-map.sh b/dream-server/installers/lib/tier-map.sh index 91f127a8..1d346210 100755 --- a/dream-server/installers/lib/tier-map.sh +++ b/dream-server/installers/lib/tier-map.sh @@ -23,6 +23,7 @@ resolve_tier_config() { GGUF_URL="" GGUF_SHA256="" MAX_CONTEXT=200000 + LLM_MODEL_SIZE_MB=0 ;; ARC) # Intel Arc A770 (16 GB) and future Arc B-series (≥12 GB VRAM) @@ -33,6 +34,7 @@ resolve_tier_config() { GGUF_URL="https://huggingface.co/unsloth/Qwen3.5-9B-GGUF/resolve/main/Qwen3.5-9B-Q4_K_M.gguf" GGUF_SHA256="03b74727a860a56338e042c4420bb3f04b2fec5734175f4cb9fa853daf52b7e8" MAX_CONTEXT=32768 + LLM_MODEL_SIZE_MB=5760 # Qwen3.5-9B-Q4_K_M (5.68 GB) GPU_BACKEND="sycl" N_GPU_LAYERS=99 ;; @@ -45,6 +47,7 @@ resolve_tier_config() { GGUF_URL="https://huggingface.co/unsloth/Qwen3.5-4B-GGUF/resolve/main/Qwen3.5-4B-Q4_K_M.gguf" GGUF_SHA256="00fe7986ff5f6b463e62455821146049db6f9313603938a70800d1fb69ef11a4" MAX_CONTEXT=16384 + LLM_MODEL_SIZE_MB=2870 # Qwen3.5-4B-Q4_K_M (2.74 GB) GPU_BACKEND="sycl" N_GPU_LAYERS=99 ;; @@ -55,6 +58,7 @@ resolve_tier_config() { GGUF_URL="https://huggingface.co/unsloth/Qwen3-Coder-Next-GGUF/resolve/main/Qwen3-Coder-Next-Q4_K_M.gguf" GGUF_SHA256="9e6032d2f3b50a60f17ce8bf5a1d85c71af9b53b89c7978020ae7c660f29b090" MAX_CONTEXT=131072 + LLM_MODEL_SIZE_MB=48500 # 48.5 GB per HF file listing ;; SH_LARGE) TIER_NAME="Strix Halo 90+" @@ -63,6 +67,7 @@ resolve_tier_config() { GGUF_URL="https://huggingface.co/unsloth/Qwen3-Coder-Next-GGUF/resolve/main/Qwen3-Coder-Next-Q4_K_M.gguf" GGUF_SHA256="9e6032d2f3b50a60f17ce8bf5a1d85c71af9b53b89c7978020ae7c660f29b090" MAX_CONTEXT=131072 + LLM_MODEL_SIZE_MB=48500 # 48.5 GB per HF file listing ;; SH_COMPACT) TIER_NAME="Strix Halo Compact" @@ -71,6 +76,7 @@ resolve_tier_config() { GGUF_URL="https://huggingface.co/unsloth/Qwen3-30B-A3B-GGUF/resolve/main/Qwen3-30B-A3B-Q4_K_M.gguf" GGUF_SHA256="9f1a24700a339b09c06009b729b5c809e0b64c213b8af5b711b3dbdfd0c5ba48" MAX_CONTEXT=131072 + LLM_MODEL_SIZE_MB=18600 # 18.6 GB per HF file listing ;; 0) TIER_NAME="Lightweight" @@ -79,6 +85,7 @@ resolve_tier_config() { GGUF_URL="https://huggingface.co/unsloth/Qwen3.5-2B-GGUF/resolve/main/Qwen3.5-2B-Q4_K_M.gguf" GGUF_SHA256="" MAX_CONTEXT=8192 + LLM_MODEL_SIZE_MB=1500 # Qwen3.5-2B-Q4_K_M (1.28 GB) ;; 1) TIER_NAME="Entry Level" @@ -87,6 +94,7 @@ resolve_tier_config() { GGUF_URL="https://huggingface.co/unsloth/Qwen3.5-9B-GGUF/resolve/main/Qwen3.5-9B-Q4_K_M.gguf" GGUF_SHA256="03b74727a860a56338e042c4420bb3f04b2fec5734175f4cb9fa853daf52b7e8" MAX_CONTEXT=16384 + LLM_MODEL_SIZE_MB=5760 # Qwen3.5-9B-Q4_K_M (5.68 GB) ;; 2) TIER_NAME="Prosumer" @@ -95,6 +103,7 @@ resolve_tier_config() { GGUF_URL="https://huggingface.co/unsloth/Qwen3.5-9B-GGUF/resolve/main/Qwen3.5-9B-Q4_K_M.gguf" GGUF_SHA256="03b74727a860a56338e042c4420bb3f04b2fec5734175f4cb9fa853daf52b7e8" MAX_CONTEXT=32768 + LLM_MODEL_SIZE_MB=5760 # Qwen3.5-9B-Q4_K_M (5.68 GB) ;; 3) TIER_NAME="Pro" @@ -103,6 +112,7 @@ resolve_tier_config() { GGUF_URL="https://huggingface.co/unsloth/Qwen3.5-27B-GGUF/resolve/main/Qwen3.5-27B-Q4_K_M.gguf" GGUF_SHA256="84b5f7f112156d63836a01a69dc3f11a6ba63b10a23b8ca7a7efaf52d5a2d806" MAX_CONTEXT=32768 + LLM_MODEL_SIZE_MB=16400 # Qwen3.5-27B-Q4_K_M (16.7 GB) ;; 4) TIER_NAME="Enterprise" @@ -111,6 +121,7 @@ resolve_tier_config() { GGUF_URL="https://huggingface.co/unsloth/Qwen3-30B-A3B-GGUF/resolve/main/Qwen3-30B-A3B-Q4_K_M.gguf" GGUF_SHA256="9f1a24700a339b09c06009b729b5c809e0b64c213b8af5b711b3dbdfd0c5ba48" MAX_CONTEXT=131072 + LLM_MODEL_SIZE_MB=18600 # 18.6 GB per HF file listing ;; *) error "Invalid tier: $TIER. Valid tiers: 0, 1, 2, 3, 4, CLOUD, NV_ULTRA, SH_LARGE, SH_COMPACT, ARC, ARC_LITE" diff --git a/dream-server/installers/phases/01-preflight.sh b/dream-server/installers/phases/01-preflight.sh index 3645760c..a90ec363 100755 --- a/dream-server/installers/phases/01-preflight.sh +++ b/dream-server/installers/phases/01-preflight.sh @@ -42,11 +42,21 @@ if ! command -v curl &> /dev/null; then fi log "curl: $(curl --version 2>/dev/null | sed -n '1p')" -# Check optional tools (warn but don't fail) -OPTIONAL_TOOLS_MISSING="" if ! command -v jq &> /dev/null; then - OPTIONAL_TOOLS_MISSING="$OPTIONAL_TOOLS_MISSING jq" + log "jq not found - attempting auto-install..." + case "$PKG_MANAGER" in + dnf) sudo dnf install -y jq ;; + pacman) sudo pacman -S --noconfirm jq ;; + zypper) sudo zypper install -y jq ;; + apk) sudo apk add jq ;; + *) sudo apt-get install -y jq ;; + esac + command -v jq &> /dev/null || error "Failed to install jq automatically. Install it manually and re-run." fi +log "jq: $(jq --version 2>/dev/null)" + +# Check optional tools (warn but don't fail) +OPTIONAL_TOOLS_MISSING="" if ! command -v rsync &> /dev/null; then OPTIONAL_TOOLS_MISSING="$OPTIONAL_TOOLS_MISSING rsync" fi diff --git a/dream-server/installers/phases/02-detection.sh b/dream-server/installers/phases/02-detection.sh index 28f3cd44..ddb16133 100755 --- a/dream-server/installers/phases/02-detection.sh +++ b/dream-server/installers/phases/02-detection.sh @@ -16,7 +16,9 @@ # TIER, TIER_NAME, LLM_MODEL, GGUF_FILE, GGUF_URL, MAX_CONTEXT, # COMPOSE_FILE, COMPOSE_FLAGS, RAM_GB, DISK_AVAIL, BACKEND_ID, # LLM_HEALTHCHECK_URL, LLM_PUBLIC_API_PORT, -# OPENCLAW_PROVIDER_NAME_DEFAULT, OPENCLAW_PROVIDER_URL_DEFAULT +# OPENCLAW_PROVIDER_NAME_DEFAULT, OPENCLAW_PROVIDER_URL_DEFAULT, +# GPU_TOPOLOGY_JSON, GPU_HAS_NVLINK, GPU_TOTAL_VRAM, +# LLM_MODEL_SIZE_MB # # Modder notes: # Change tier auto-detection thresholds or add new hardware classes here. @@ -280,6 +282,41 @@ if [[ $GPU_COUNT -gt 0 && "$GPU_BACKEND" == "intel" ]]; then log "Intel Arc backend: GPU_BACKEND=intel, VRAM=${GPU_VRAM}MB, Level Zero=${_level_zero_ok}" fi +# ----------------------------------------------------------------------------- +# NVIDIA Multi-GPU Topology Detection +# ----------------------------------------------------------------------------- +GPU_TOPOLOGY_JSON="{}" +GPU_HAS_NVLINK="false" +GPU_TOTAL_VRAM=0 +if [[ $GPU_COUNT -gt 1 && "$GPU_BACKEND" == "nvidia" ]]; then + ai "Detecting multi-GPU topology..." + if [[ -f "$SCRIPT_DIR/installers/lib/nvidia-topo.sh" ]]; then + # Source the topology detection script + source "$SCRIPT_DIR/installers/lib/nvidia-topo.sh" + + # Run topology detection and capture JSON output + GPU_TOPOLOGY_JSON=$(detect_nvidia_topo 2>>"$LOG_FILE") || { + warn "Multi-GPU topology detection failed — multi-GPU configuration disabled" + ai_warn "Could not detect GPU topology. Multi-GPU features will be skipped." + ai_warn "Check $LOG_FILE for details. You can re-run the installer after fixing the issue." + GPU_TOPOLOGY_JSON="{}" + } + + # Extract key topology information for tier assignment + if [[ -n "$GPU_TOPOLOGY_JSON" && "$GPU_TOPOLOGY_JSON" != "{}" ]]; then + GPU_HAS_NVLINK=$(echo "$GPU_TOPOLOGY_JSON" | jq -r '[.links[] | select(.link_type | startswith("NV"))] | length > 0') + GPU_TOTAL_VRAM=$(echo "$GPU_TOPOLOGY_JSON" | jq -r '[.gpus[].memory_gb] | add * 1024 | floor') + log "Multi-GPU topology: NVLink=$GPU_HAS_NVLINK, Total VRAM=${GPU_TOTAL_VRAM}MB" + else + log "topology detection returned empty, using basic GPU info" + GPU_TOTAL_VRAM=$((GPU_VRAM * GPU_COUNT)) + fi + else + log "NVIDIA topology detection script not found, skipping detailed topology analysis" + GPU_TOTAL_VRAM=$((GPU_VRAM * GPU_COUNT)) + fi +fi + # Auto-detect tier if not specified if [[ -z "$TIER" ]]; then PROFILE_TIER="$(normalize_profile_tier "${CAP_RECOMMENDED_TIER:-}")" @@ -306,7 +343,26 @@ if [[ -z "$TIER" ]]; then fi elif [[ $GPU_VRAM -ge 90000 ]]; then TIER="NV_ULTRA" - elif [[ $GPU_COUNT -ge 2 ]] || [[ $GPU_VRAM -ge 40000 ]]; then + elif [[ $GPU_COUNT -ge 2 ]]; then + # Enhanced multi-GPU tier assignment based on topology + if [[ "$GPU_HAS_NVLINK" == "true" ]]; then + # High-bandwidth interconnect (NVLink) + if [[ $GPU_COUNT -ge 4 || $GPU_TOTAL_VRAM -ge 90000 ]]; then + TIER="NV_ULTRA" + else + TIER=4 + fi + else + # PCIe or other interconnect + if [[ $GPU_COUNT -ge 4 ]]; then + TIER=4 + elif [[ $GPU_TOTAL_VRAM -ge 40000 ]]; then + TIER=4 + else + TIER=3 + fi + fi + elif [[ $GPU_VRAM -ge 40000 ]]; then TIER=4 elif [[ $GPU_VRAM -ge 20000 ]] || [[ $RAM_GB -ge 96 ]]; then TIER=3 diff --git a/dream-server/installers/phases/03-features.sh b/dream-server/installers/phases/03-features.sh index 11d6a244..fb676e7b 100755 --- a/dream-server/installers/phases/03-features.sh +++ b/dream-server/installers/phases/03-features.sh @@ -6,10 +6,15 @@ # Purpose: Interactive feature selection menu # # Expects: INTERACTIVE, DRY_RUN, TIER, ENABLE_VOICE, ENABLE_WORKFLOWS, -# ENABLE_RAG, ENABLE_OPENCLAW, show_phase(), show_install_menu(), -# log(), warn(), signal() +# ENABLE_RAG, ENABLE_OPENCLAW, GPU_COUNT, GPU_BACKEND, +# GPU_TOPOLOGY_JSON, LLM_MODEL_SIZE_MB, SCRIPT_DIR, VERBOSE, DEBUG, +# GPU_INDICES, GPU_UUIDS (arrays from topology), +# show_phase(), show_install_menu(), chapter(), bootline(), +# success(), log(), warn(), error(), signal() # Provides: ENABLE_VOICE, ENABLE_WORKFLOWS, ENABLE_RAG, ENABLE_OPENCLAW, -# OPENCLAW_CONFIG +# OPENCLAW_CONFIG, GPU_ASSIGNMENT_JSON, +# LLAMA_SERVER_GPU_UUIDS, WHISPER_GPU_UUID, COMFYUI_GPU_UUID, +# EMBEDDINGS_GPU_UUID, LLAMA_ARG_SPLIT_MODE, LLAMA_ARG_TENSOR_SPLIT # # Modder notes: # Add new optional features to the Custom menu here. @@ -84,3 +89,279 @@ if [[ "$ENABLE_OPENCLAW" == "true" ]]; then fi log "All services enabled (core install)" + +# Early return if single gpu +if [[ "$GPU_COUNT" -le 1 ]]; then + log "Single GPU detected — skipping multi-GPU configuration." + return +fi + +# Multi-GPU Configuration + +# write $GPU_TOPOLOGY_JSON into a tmpfile to use by the commands +TOPOLOGY_FILE=$(mktemp /tmp/ds_gpu_topology.XXXXXX.json) +trap "rm -f $TOPOLOGY_FILE" EXIT +echo "$GPU_TOPOLOGY_JSON" > "$TOPOLOGY_FILE" + +ASSIGN_GPUS_SCRIPT="$SCRIPT_DIR/scripts/assign_gpus.py" + +# Validate topology gpu_count matches installer's GPU_COUNT (don't overwrite the canonical value) +_topo_gpu_count=$(jq '.gpu_count // 0' "$TOPOLOGY_FILE") +if [[ "$_topo_gpu_count" != "$GPU_COUNT" ]]; then + warn "Topology gpu_count ($_topo_gpu_count) differs from detected GPU_COUNT ($GPU_COUNT) — using detected value" +fi +VENDOR=$(jq -r '.vendor' "$TOPOLOGY_FILE") + +# Build GPU arrays keyed by actual GPU index +# This ensures GPU_UUIDS[$idx] always maps to the correct GPU even if +# nvidia-smi returns GPUs out of index order. +declare -a GPU_INDICES=() +declare -A GPU_NAMES=() +declare -A GPU_VRAMS_GB=() +declare -A GPU_UUIDS=() +while IFS=$'\t' read -r _idx _name _mem _uuid; do + GPU_INDICES+=("$_idx") + GPU_NAMES["$_idx"]="$_name" + GPU_VRAMS_GB["$_idx"]="$_mem" + GPU_UUIDS["$_idx"]="$_uuid" +done < <(jq -r '.gpus[] | [.index, .name, .memory_gb, .uuid] | @tsv' "$TOPOLOGY_FILE") + +declare -A LINK_RANK +declare -A LINK_TYPE +while IFS=$'\t' read -r a b rank ltype; do + LINK_RANK["$a,$b"]=$rank + LINK_RANK["$b,$a"]=$rank + LINK_TYPE["$a,$b"]=$ltype + LINK_TYPE["$b,$a"]=$ltype +done < <(jq -r '.links[] | [.gpu_a, .gpu_b, .rank, .link_type] | @tsv' "$TOPOLOGY_FILE") + +# Automatic assignment +run_automatic() { + echo "" + chapter "AUTOMATIC GPU ASSIGNMENT" + echo -e " ${GRN}Running topology-aware assignment...${NC}" + echo "" + + local result + result=$(python3 "$ASSIGN_GPUS_SCRIPT" \ + --topology "$TOPOLOGY_FILE" --model-size "$LLM_MODEL_SIZE_MB" 2>&1) || { + echo -e " ${RED}Assignment failed:${NC}\n $result" + error "GPU assignment failed: $result" + } + + local strategy mode tp pp mem_util + strategy=$(echo "$result" | jq -r '.gpu_assignment.strategy') + mode=$(echo "$result" | jq -r '.gpu_assignment.services.llama_server.parallelism.mode') + tp=$(echo "$result" | jq -r '.gpu_assignment.services.llama_server.parallelism.tensor_parallel_size') + pp=$(echo "$result" | jq -r '.gpu_assignment.services.llama_server.parallelism.pipeline_parallel_size') + mem_util=$(echo "$result" | jq -r '.gpu_assignment.services.llama_server.parallelism.gpu_memory_utilization') + + GPU_ASSIGNMENT_JSON="$result" + success "Assignment complete" + echo "" + echo -e " ${WHT}Strategy:${NC} ${BGRN}${strategy}${NC}" + echo -e " ${WHT}Llama mode:${NC} ${BGRN}${mode}${NC}" + echo "" + echo -e " ${WHT}Service assignments:${NC}" + + for svc in llama_server whisper comfyui embeddings; do + local labels="" + while IFS= read -r uuid; do + for i in "${GPU_INDICES[@]}"; do + [[ "${GPU_UUIDS[$i]}" == "$uuid" ]] && labels+="GPU${i} " + done + done < <(echo "$result" | jq -r ".gpu_assignment.services.${svc}.gpus[]" 2>/dev/null) + [[ -n "$labels" ]] && printf " ${AMB}*${NC} %-16s ${BGRN}%s${NC}\n" "$svc" "$labels" + done + + _show_json "$result" +} + +# Custom assignment +run_custom() { + [[ "$INTERACTIVE" == "true" ]] || { warn "run_custom called in non-interactive mode — skipping."; return; } + echo "" + chapter "CUSTOM GPU ASSIGNMENT" + echo -e " ${GRN}Assign GPUs to each service manually.${NC}" + echo -e " ${DIM}whisper / comfyui / embeddings: 1 GPU each. llama_server: 1 or more.${NC}" + echo "" + + declare -A CUSTOM_ASSIGNMENT + for svc in whisper comfyui embeddings; do + local valid=false + while ! $valid; do + read -rp " GPU for ${WHT}${svc}${NC} (0-$((GPU_COUNT-1))): " chosen + if [[ "$chosen" =~ ^[0-9]+$ ]] && [[ $chosen -ge 0 ]] && [[ $chosen -lt $GPU_COUNT ]]; then + CUSTOM_ASSIGNMENT[$svc]=$chosen; valid=true + else + warn " Invalid -- enter a number between 0 and $((GPU_COUNT-1))." + fi + done + done + + echo "" + local used=("${CUSTOM_ASSIGNMENT[whisper]}" "${CUSTOM_ASSIGNMENT[comfyui]}" "${CUSTOM_ASSIGNMENT[embeddings]}") + local default_llama="" + for idx in "${GPU_INDICES[@]}"; do + local found=false + for u in "${used[@]}"; do [[ "$u" == "$idx" ]] && found=true; done + $found || default_llama+="${idx}," + done + default_llama="${default_llama%,}" + + read -rp " GPUs for ${WHT}llama_server${NC} [${default_llama}]: " llama_input + llama_input="${llama_input:-$default_llama}" + IFS=',' read -ra LLAMA_GPUS_CUSTOM <<< "$llama_input" + for g in "${LLAMA_GPUS_CUSTOM[@]}"; do + [[ "$g" =~ ^[0-9]+$ ]] && [[ $g -lt $GPU_COUNT ]] || error "Invalid GPU index '$g'" + done + + echo "" + echo -e " ${WHT}Assignment:${NC}" + printf " ${AMB}*${NC} %-16s ${BGRN}" "llama_server" + for g in "${LLAMA_GPUS_CUSTOM[@]}"; do printf "GPU%s " "$g"; done + printf "${NC}\n" + for svc in whisper comfyui embeddings; do + printf " ${AMB}*${NC} %-16s ${BGRN}GPU%s${NC}\n" "$svc" "${CUSTOM_ASSIGNMENT[$svc]}" + done + + local all_assigned=("${LLAMA_GPUS_CUSTOM[@]}" "${CUSTOM_ASSIGNMENT[whisper]}" \ + "${CUSTOM_ASSIGNMENT[comfyui]}" "${CUSTOM_ASSIGNMENT[embeddings]}") + local unique; unique=$(printf '%s\n' "${all_assigned[@]}" | sort -u | wc -l) + local strategy="dedicated" + [[ $unique -lt ${#all_assigned[@]} ]] && strategy="colocated" + [[ $GPU_COUNT -eq 1 ]] && strategy="single" + + local n=${#LLAMA_GPUS_CUSTOM[@]} + local min_rank=100 + if [[ $n -gt 1 ]]; then + for ((x=0; x 0 + then $ts | map(tostring) | join(",") + else ($svc.gpus | length) as $n | + if $n > 1 then [range($n) | 1] | map(tostring) | join(",") + else "1" + end + end') + +rm -f "$TOPOLOGY_FILE" diff --git a/dream-server/installers/phases/04-requirements.sh b/dream-server/installers/phases/04-requirements.sh index 20c2673d..17c38a01 100755 --- a/dream-server/installers/phases/04-requirements.sh +++ b/dream-server/installers/phases/04-requirements.sh @@ -20,6 +20,7 @@ dream_progress 25 "requirements" "Checking system requirements" chapter "REQUIREMENTS CHECK" [[ -f "${SCRIPT_DIR:-}/lib/safe-env.sh" ]] && . "${SCRIPT_DIR}/lib/safe-env.sh" +[[ -f "$SCRIPT_DIR/lib/service-registry.sh" ]] && . "$SCRIPT_DIR/lib/service-registry.sh" REQUIREMENTS_MET=true TIER_RANK="$(tier_rank "$TIER")" diff --git a/dream-server/installers/phases/06-directories.sh b/dream-server/installers/phases/06-directories.sh index 14f89a9f..e44a1f8b 100755 --- a/dream-server/installers/phases/06-directories.sh +++ b/dream-server/installers/phases/06-directories.sh @@ -11,12 +11,14 @@ # LLM_MODEL, MAX_CONTEXT, GGUF_FILE, COMPOSE_FLAGS, # ENABLE_VOICE, ENABLE_WORKFLOWS, ENABLE_RAG, ENABLE_OPENCLAW, # OPENCLAW_CONFIG, OPENCLAW_PROVIDER_NAME_DEFAULT, -# OPENCLAW_PROVIDER_URL_DEFAULT, +# OPENCLAW_PROVIDER_URL_DEFAULT, GPU_ASSIGNMENT_JSON, +# COMFYUI_GPU_UUID, WHISPER_GPU_UUID, EMBEDDINGS_GPU_UUID, +# LLAMA_SERVER_GPU_UUIDS, LLAMA_ARG_SPLIT_MODE, LLAMA_ARG_TENSOR_SPLIT, # chapter(), ai(), ai_ok(), ai_warn(), log(), warn(), error() # Provides: WEBUI_SECRET, N8N_PASS, LITELLM_KEY, LIVEKIT_SECRET, # DASHBOARD_API_KEY, OPENCODE_SERVER_PASSWORD, OPENCLAW_TOKEN, # OPENCLAW_PROVIDER_NAME, OPENCLAW_PROVIDER_URL, OPENCLAW_MODEL, -# OPENCLAW_CONTEXT +# OPENCLAW_CONTEXT, GPU_ASSIGNMENT_JSON_B64 (in .env) # # Modder notes: # This is the largest phase. Modify .env generation, add new config files, @@ -287,6 +289,12 @@ MODELS_EOF ANTHROPIC_API_KEY=$(_env_get ANTHROPIC_API_KEY "${ANTHROPIC_API_KEY:-}") OPENAI_API_KEY=$(_env_get OPENAI_API_KEY "${OPENAI_API_KEY:-}") TOGETHER_API_KEY=$(_env_get TOGETHER_API_KEY "${TOGETHER_API_KEY:-}") + # Base64-encode GPU assignment JSON for safe .env storage + if [[ -n "$GPU_ASSIGNMENT_JSON" && "$GPU_ASSIGNMENT_JSON" != "{}" ]]; then + GPU_ASSIGNMENT_JSON_B64=$(echo "$GPU_ASSIGNMENT_JSON" | jq -c '.' | base64 -w0) + else + GPU_ASSIGNMENT_JSON_B64="" + fi # Generate .env file cat > "$INSTALL_DIR/.env" << ENV_EOF @@ -400,6 +408,16 @@ LANGFUSE_INIT_USER_PASSWORD=${LANGFUSE_INIT_USER_PASSWORD} # ── Image Generation ── ENABLE_IMAGE_GENERATION=${ENABLE_COMFYUI:-true} + +#=== Multi-GPU Settings === +GPU_ASSIGNMENT_JSON_B64=${GPU_ASSIGNMENT_JSON_B64:-} +COMFYUI_GPU_UUID=${COMFYUI_GPU_UUID:-} +WHISPER_GPU_UUID=${WHISPER_GPU_UUID:-} +EMBEDDINGS_GPU_UUID=${EMBEDDINGS_GPU_UUID:-} +LLAMA_SERVER_GPU_UUIDS=${LLAMA_SERVER_GPU_UUIDS:-} +LLAMA_ARG_SPLIT_MODE=${LLAMA_ARG_SPLIT_MODE:-none} +LLAMA_ARG_TENSOR_SPLIT=${LLAMA_ARG_TENSOR_SPLIT:-} + ENV_EOF chmod 600 "$INSTALL_DIR/.env" # Secure secrets file diff --git a/dream-server/scripts/assign_gpus.py b/dream-server/scripts/assign_gpus.py new file mode 100644 index 00000000..bd8e40a0 --- /dev/null +++ b/dream-server/scripts/assign_gpus.py @@ -0,0 +1,480 @@ +#!/usr/bin/env python3 +""" +assign_gpus.py — GPU assignment algorithm for DreamServer + +Usage: + python3 assign_gpus.py --topology topo.json --model-size 70000 + python3 assign_gpus.py --topology topo.json --model-size 70000 --enabled-services llama_server,whisper + +Output: gpu_assignment JSON to stdout +Errors: to stderr, exit code 1 +""" + +import argparse +import json +import math +import sys +from dataclasses import dataclass, field +from itertools import combinations +from typing import Optional + + +# Constants + +HIGH_BW_THRESHOLD = 80 # min rank for NVLink / XGMI +DEFAULT_SERVICES = ["llama_server", "whisper", "comfyui", "embeddings"] +NON_LLAMA = ["whisper", "comfyui", "embeddings"] + + +# Data Models + +@dataclass +class GPU: + index: int + uuid: str + name: str + memory_mb: float + +@dataclass +class Link: + gpu_a: int + gpu_b: int + link_type: str + link_label: str + rank: int + +@dataclass +class Subset: + gpus: list + min_link_rank: int + total_vram_mb: float + all_pairs_highbw: bool + +@dataclass +class LlamaParallelism: + mode: str + tensor_parallel_size: int + pipeline_parallel_size: int + gpu_memory_utilization: float + tensor_split: Optional[list] = None + +@dataclass +class ServiceAssignment: + gpus: list + parallelism: Optional[LlamaParallelism] = None + +@dataclass +class AssignmentResult: + strategy: str + services: dict + + +# Phase 1: Topology Analysis + +def parse_gpus(topology: dict) -> list: + gpus = [] + for g in topology["gpus"]: + gpus.append(GPU( + index=g["index"], + uuid=g["uuid"], + name=g["name"], + memory_mb=g["memory_gb"] * 1024, + )) + return gpus + + +def parse_links(topology: dict) -> list: + links = [] + for link in topology.get("links", []): + links.append(Link( + gpu_a=link["gpu_a"], + gpu_b=link["gpu_b"], + link_type=link["link_type"], + link_label=link["link_label"], + rank=link["rank"], + )) + return links + + +def build_rank_matrix(links: list) -> dict: + """ + rank_matrix[(min_idx, max_idx)] = rank + Pairs not in links default to 0. + """ + matrix = {} + for link in links: + key = (min(link.gpu_a, link.gpu_b), max(link.gpu_a, link.gpu_b)) + matrix[key] = link.rank + return matrix + + +def get_rank(rank_matrix: dict, a: int, b: int) -> int: + return rank_matrix.get((min(a, b), max(a, b)), 0) + + +def compute_subset(gpus: list, rank_matrix: dict) -> Subset: + """ + Compute a Subset from a list of GPUs. + Single GPU: min_link_rank=0, all_pairs_highbw=True (no links needed). + """ + if len(gpus) == 1: + return Subset( + gpus=gpus, + min_link_rank=0, + total_vram_mb=gpus[0].memory_mb, + all_pairs_highbw=True, + ) + + indices = [g.index for g in gpus] + ranks = [get_rank(rank_matrix, a, b) for a, b in combinations(indices, 2)] + min_rank = min(ranks) + + return Subset( + gpus=gpus, + min_link_rank=min_rank, + total_vram_mb=sum(g.memory_mb for g in gpus), + all_pairs_highbw=(min_rank >= HIGH_BW_THRESHOLD), + ) + + +def enumerate_subsets(gpus: list, rank_matrix: dict) -> list: + """ + Generate all non-empty subsets of GPUs, ordered by: + 1. min_link_rank DESC (topology quality) + 2. subset size ASC (prefer fewer GPUs, leave more for services) + 3. total_vram DESC (tiebreaker) + """ + all_subsets = [] + for size in range(1, len(gpus) + 1): + for combo in combinations(gpus, size): + all_subsets.append(compute_subset(list(combo), rank_matrix)) + + return sorted( + all_subsets, + key=lambda s: (s.min_link_rank, -len(s.gpus), s.total_vram_mb), + reverse=True, + ) + + +# Phase 2: GPU Assignment + +def find_llama_subset(ordered_subsets: list, model_size_mb: float) -> Subset: + """ + Pick the best-ranked subset whose total VRAM covers model_size_mb. + Returns the first match (best topology, smallest size, most VRAM). + """ + for subset in ordered_subsets: + if subset.total_vram_mb >= model_size_mb: + return subset + return None + + +def span_subsets(all_gpus: list, rank_matrix: dict, model_size_mb: float, ordered_subsets: list) -> Subset: + """ + No single subset covers model_size_mb. + Take the best subset, then greedily add GPUs from the remaining pool + (ordered by memory_mb DESC) until VRAM is covered. + Recomputes min_link_rank on the combined set. + """ + best = ordered_subsets[0] + accumulated = list(best.gpus) + used = {g.index for g in accumulated} + + remaining = sorted( + [g for g in all_gpus if g.index not in used], + key=lambda g: g.memory_mb, + reverse=True, + ) + + for gpu in remaining: + accumulated.append(gpu) + candidate = compute_subset(accumulated, rank_matrix) + if candidate.total_vram_mb >= model_size_mb: + return candidate + + raise ValueError( + f"Model size {model_size_mb:.0f}MB exceeds total available VRAM " + f"({sum(g.memory_mb for g in all_gpus):.0f}MB across all GPUs)." + ) + + +def assign_services(all_gpus: list, llama_gpus: list, rank_matrix: dict, enabled_services: list) -> tuple: + """ + Assign remaining GPUs to non-llama services. + Returns (service_assignments dict, final_llama_gpus list, strategy str). + + Rules: + remaining == 0 → all 3 services share llama's last GPU → colocated + remaining == 1 → all 3 services share remaining[0] → colocated + remaining == 2 → whisper → [0], comfyui+embeddings → [1] → colocated + remaining >= 3 → whisper → [0], comfyui → [1], emb → [2] → dedicated + remaining[3:] → back to llama + """ + llama_indices = {g.index for g in llama_gpus} + remaining = sorted( + [g for g in all_gpus if g.index not in llama_indices], + key=lambda g: g.memory_mb, + reverse=True, + ) + + active_non_llama = [s for s in NON_LLAMA if s in enabled_services] + assignments = {} + final_llama_gpus = list(llama_gpus) + + if len(remaining) == 0: + fallback = llama_gpus[-1] + for s in active_non_llama: + assignments[s] = ServiceAssignment(gpus=[fallback]) + strategy = "colocated" + + elif len(remaining) == 1: + for s in active_non_llama: + assignments[s] = ServiceAssignment(gpus=[remaining[0]]) + strategy = "colocated" + + elif len(remaining) == 2: + if "whisper" in enabled_services: assignments["whisper"] = ServiceAssignment(gpus=[remaining[0]]) + if "comfyui" in enabled_services: assignments["comfyui"] = ServiceAssignment(gpus=[remaining[1]]) + if "embeddings" in enabled_services: assignments["embeddings"] = ServiceAssignment(gpus=[remaining[1]]) + strategy = "colocated" + + else: + if "whisper" in enabled_services: assignments["whisper"] = ServiceAssignment(gpus=[remaining[0]]) + if "comfyui" in enabled_services: assignments["comfyui"] = ServiceAssignment(gpus=[remaining[1]]) + if "embeddings" in enabled_services: assignments["embeddings"] = ServiceAssignment(gpus=[remaining[2]]) + # Push extras back to llama so no GPU sits idle + if len(remaining) > 3: + final_llama_gpus = final_llama_gpus + remaining[3:] + strategy = "dedicated" + + assignments["llama_server"] = ServiceAssignment(gpus=final_llama_gpus) + return assignments, final_llama_gpus, strategy + + +# Phase 3: Llama Parallelism + +def largest_pow2_divisor(n: int) -> int: + """ + Find the largest power of 2 p such that: + - p divides n evenly + - p <= sqrt(n) (keeps tensor_size <= pipeline_size for balance) + Minimum return value is 2 (hybrid requires at least 2 tensor groups). + """ + p = 1 + while True: + candidate = p * 2 + if candidate > n or n % candidate != 0: + break + if candidate > math.sqrt(n): + break + p = candidate + return max(2, p) + + +def is_heterogeneous(gpus: list) -> bool: + vrams = [g.memory_mb for g in gpus] + return max(vrams) != min(vrams) + + +def compute_tensor_split(gpus: list) -> list: + """Proportional VRAM weights, rounded to 4 decimal places.""" + total = sum(g.memory_mb for g in gpus) + return [round(g.memory_mb / total, 4) for g in gpus] + + +def select_parallelism(subset: Subset) -> LlamaParallelism: + """ + Select parallelism mode based on GPU count and min_link_rank. + + Thresholds: + rank >= 80 → NVLink / XGMI → tensor or hybrid + rank 11-79 → same-NUMA PCIe → pipeline, or hybrid if rank >= 40 and >= 4 GPUs + rank <= 10 → cross-NUMA → pipeline only + """ + gpus = subset.gpus + n = len(gpus) + rank = subset.min_link_rank + split = compute_tensor_split(gpus) if is_heterogeneous(gpus) else None + + # Single GPU + if n == 1: + return LlamaParallelism( + mode="none", + tensor_parallel_size=1, + pipeline_parallel_size=1, + gpu_memory_utilization=0.95, + ) + + # High-bandwidth (NVLink / XGMI) + if rank >= HIGH_BW_THRESHOLD: + if n <= 3: + return LlamaParallelism( + mode="tensor", + tensor_parallel_size=n, + pipeline_parallel_size=1, + gpu_memory_utilization=0.92, + tensor_split=split, + ) + else: + tp = largest_pow2_divisor(n) + pp = n // tp + return LlamaParallelism( + mode="hybrid", + tensor_parallel_size=tp, + pipeline_parallel_size=pp, + gpu_memory_utilization=0.93, + tensor_split=split, + ) + + # Cross-NUMA PCIe + if rank <= 10: + return LlamaParallelism( + mode="pipeline", + tensor_parallel_size=1, + pipeline_parallel_size=n, + gpu_memory_utilization=0.95, + ) + + # Same-NUMA PCIe (rank 11-79) + if n <= 3: + return LlamaParallelism( + mode="pipeline", + tensor_parallel_size=1, + pipeline_parallel_size=n, + gpu_memory_utilization=0.95, + ) + else: + if rank >= 40: + tp = largest_pow2_divisor(n) + pp = n // tp + return LlamaParallelism( + mode="hybrid", + tensor_parallel_size=tp, + pipeline_parallel_size=pp, + gpu_memory_utilization=0.93, + tensor_split=split, + ) + else: + return LlamaParallelism( + mode="pipeline", + tensor_parallel_size=1, + pipeline_parallel_size=n, + gpu_memory_utilization=0.95, + ) + + +# Phase 4: Build Output JSON + +def build_output(result: AssignmentResult) -> dict: + services = {} + + for name, assignment in result.services.items(): + entry = {"gpus": [g.uuid for g in assignment.gpus]} + + if assignment.parallelism: + p = assignment.parallelism + para = { + "mode": p.mode, + "tensor_parallel_size": p.tensor_parallel_size, + "pipeline_parallel_size": p.pipeline_parallel_size, + "gpu_memory_utilization": p.gpu_memory_utilization, + } + if p.tensor_split is not None: + para["tensor_split"] = p.tensor_split + entry["parallelism"] = para + + services[name] = entry + + return { + "gpu_assignment": { + "version": "1.0", + "strategy": result.strategy, + "services": services, + } + } + + +# Entry Point + +def main(): + parser = argparse.ArgumentParser(description="GPU assignment algorithm for DreamServer") + parser.add_argument("--topology", required=True, help="Path to topology JSON file") + parser.add_argument("--model-size", required=True, type=float, help="Model size in MB") + parser.add_argument("--enabled-services", default=",".join(DEFAULT_SERVICES), + help="Comma-separated list of enabled services") + args = parser.parse_args() + + # Load topology + try: + with open(args.topology) as f: + topology = json.load(f) + except FileNotFoundError: + print(f"ERROR: topology file not found: {args.topology}", file=sys.stderr) + sys.exit(1) + except json.JSONDecodeError as e: + print(f"ERROR: invalid JSON in topology file: {e}", file=sys.stderr) + sys.exit(1) + + enabled_services = [s.strip() for s in args.enabled_services.split(",")] + model_size_mb = args.model_size + gpu_count = topology.get("gpu_count", 0) + + if gpu_count == 0: + print("ERROR: no GPUs found in topology", file=sys.stderr) + sys.exit(1) + + # Early exit: single GPU + if gpu_count == 1: + gpu = parse_gpus(topology)[0] + if model_size_mb > gpu.memory_mb: + print( + f"ERROR: Model size {model_size_mb:.0f}MB exceeds total available VRAM " + f"({gpu.memory_mb:.0f}MB across all GPUs).", + file=sys.stderr, + ) + sys.exit(1) + parallelism = LlamaParallelism( + mode="none", + tensor_parallel_size=1, + pipeline_parallel_size=1, + gpu_memory_utilization=0.95, + ) + services = {} + for s in enabled_services: + services[s] = ServiceAssignment(gpus=[gpu]) + services["llama_server"].parallelism = parallelism + result = AssignmentResult(strategy="single", services=services) + print(json.dumps(build_output(result), indent=2)) + return + + # Phase 1: Topology analysis + gpus = parse_gpus(topology) + links = parse_links(topology) + rank_matrix = build_rank_matrix(links) + ordered = enumerate_subsets(gpus, rank_matrix) + + # Phase 2: GPU assignment + try: + llama_subset = find_llama_subset(ordered, model_size_mb) + if llama_subset is None: + llama_subset = span_subsets(gpus, rank_matrix, model_size_mb, ordered) + except ValueError as e: + print(f"ERROR: {e}", file=sys.stderr) + sys.exit(1) + + service_assignments, final_llama_gpus, strategy = assign_services( + gpus, llama_subset.gpus, rank_matrix, enabled_services + ) + + # Phase 3: Llama parallelism + final_subset = compute_subset(final_llama_gpus, rank_matrix) + parallelism = select_parallelism(final_subset) + service_assignments["llama_server"].parallelism = parallelism + + # Phase 4: Emit JSON + result = AssignmentResult(strategy=strategy, services=service_assignments) + print(json.dumps(build_output(result), indent=2)) + + +if __name__ == "__main__": + main() diff --git a/dream-server/scripts/build-capability-profile.sh b/dream-server/scripts/build-capability-profile.sh index 0c9906b6..6578a4cf 100755 --- a/dream-server/scripts/build-capability-profile.sh +++ b/dream-server/scripts/build-capability-profile.sh @@ -98,7 +98,7 @@ gpu_type = (gpu.get("type") or "none").lower() gpu_name = gpu.get("name") or "None" memory_type = (gpu.get("memory_type") or "none").lower() vram_mb = int(gpu.get("vram_mb") or 0) -gpu_count = 1 if gpu_type not in {"none", ""} else 0 +gpu_count = int(gpu.get("count") or (1 if gpu_type not in {"none", ""} else 0)) llm_health_url = f"http://localhost:{llm_port}{llm_health}" llm_api_port = llm_port diff --git a/dream-server/scripts/detect-hardware.sh b/dream-server/scripts/detect-hardware.sh index 7276a9e3..dc763892 100755 --- a/dream-server/scripts/detect-hardware.sh +++ b/dream-server/scripts/detect-hardware.sh @@ -161,6 +161,15 @@ parse_nvidia_vram_mb() { as_int "$mb" } +# Count NVIDIA GPUs +count_nvidia_gpus() { + if command -v nvidia-smi &>/dev/null; then + nvidia-smi --query-gpu=name --format=csv,noheader,nounits 2>/dev/null | wc -l | tr -d ' ' + else + echo "0" + fi +} + # Detect AMD GPU via sysfs (works without ROCm installed) # Returns: gpu_name|vram_bytes|gtt_bytes|is_apu|gpu_busy|temp|power|vulkan|rocm|driver|device_id|subsystem_device|revision detect_amd_sysfs() { @@ -255,6 +264,19 @@ detect_amd_sysfs() { return 1 } +# Count AMD GPUs via sysfs +count_amd_gpus() { + local count=0 + for card_dir in /sys/class/drm/card*/device; do + [[ -d "$card_dir" ]] || continue + local vendor + vendor=$(cat "$card_dir/vendor" 2>/dev/null) || continue + # (( 0++ )) returns exit 1 in bash, so || true prevents pipefail abort + [[ "$vendor" == "0x1002" ]] && (( count++ )) || true + done + echo "$count" +} + # Detect AMD GPU (legacy ROCm-only path) detect_amd() { # Try sysfs first (works without ROCm) @@ -429,6 +451,7 @@ main() { local gpu_name="" local gpu_vram_mb=0 + local gpu_count=0 local gpu_type="none" local gpu_architecture="" local memory_type="discrete" @@ -452,6 +475,7 @@ main() { if [[ -n "$nvidia_out" ]]; then gpu_name=$(echo "$nvidia_out" | awk -F',' '{gsub(/^[ \t]+|[ \t]+$/,"",$1); print $1}' | xargs || true) gpu_vram_mb=$(parse_nvidia_vram_mb "$nvidia_out") + gpu_count=$(count_nvidia_gpus) gpu_type="nvidia" gpu_architecture="cuda" memory_type="discrete" @@ -470,6 +494,7 @@ main() { gtt_bytes=$(as_int "$gtt_bytes") gpu_vram_mb=$(( vram_bytes / 1048576 )) + gpu_count=$(count_amd_gpus) gpu_type="amd" gpu_temp=$(as_int "$temp") gpu_power=$(as_int "$power") @@ -503,6 +528,7 @@ main() { if [[ -n "$apple_out" ]]; then gpu_name="Apple Silicon (Unified Memory)" gpu_vram_mb=$((ram * 1024)) + gpu_count=1 gpu_type="apple" gpu_architecture="apple-unified" memory_type="unified" @@ -546,6 +572,7 @@ main() { "name": "$esc_gpu", "architecture": "$(json_escape "$gpu_architecture")", "memory_type": "$(json_escape "$memory_type")", + "count": $gpu_count, "vram_mb": $gpu_vram_mb, "vram_gb": $gpu_vram_gb, "device_id": "$(json_escape "$device_id")", diff --git a/dream-server/scripts/resolve-compose-stack.sh b/dream-server/scripts/resolve-compose-stack.sh index a4b0af99..b277a555 100755 --- a/dream-server/scripts/resolve-compose-stack.sh +++ b/dream-server/scripts/resolve-compose-stack.sh @@ -7,6 +7,7 @@ GPU_BACKEND="nvidia" PROFILE_OVERLAYS="" ENV_MODE="false" SKIP_BROKEN="false" +GPU_COUNT="1" while [[ $# -gt 0 ]]; do case "$1" in @@ -34,6 +35,10 @@ while [[ $# -gt 0 ]]; do ENV_MODE="true" shift ;; + --gpu-count) + GPU_COUNT="${2:-$GPU_COUNT}" + shift 2 + ;; *) echo "Unknown argument: $1" >&2 exit 1 @@ -50,7 +55,7 @@ elif command -v python >/dev/null 2>&1; then PYTHON_CMD="python" fi -"$PYTHON_CMD" - "$SCRIPT_DIR" "$TIER" "$GPU_BACKEND" "$PROFILE_OVERLAYS" "$ENV_MODE" "$SKIP_BROKEN" <<'PY' +"$PYTHON_CMD" - "$SCRIPT_DIR" "$TIER" "$GPU_BACKEND" "$PROFILE_OVERLAYS" "$ENV_MODE" "$SKIP_BROKEN" "$GPU_COUNT" <<'PY' import os import pathlib import sys @@ -63,6 +68,7 @@ profile_overlays = [x.strip() for x in (sys.argv[4] or "").split(",") if x.strip env_mode = (sys.argv[5] or "false").lower() == "true" skip_broken = (sys.argv[6] or "false").lower() == "true" dream_mode = os.environ.get("DREAM_MODE", "local").lower() +gpu_count = int(sys.argv[7] or "1") def existing(overlays): return all((script_dir / f).exists() for f in overlays) @@ -113,6 +119,10 @@ else: if not resolved: resolved = [primary] +# Multi-GPU overlay if we have more than 1 GPU. +if gpu_count > 1 and (script_dir / "docker-compose.multigpu.yml").exists(): + resolved.append("docker-compose.multigpu.yml") + # Discover enabled extension compose fragments via manifests ext_dir = script_dir / "extensions" / "services" if ext_dir.exists(): @@ -161,11 +171,19 @@ if ext_dir.exists(): gpu_overlay = service_dir / f"compose.{gpu_backend}.yaml" if gpu_overlay.exists(): resolved.append(str(gpu_overlay.relative_to(script_dir))) + # Mode-specific overlay — depends_on for local/hybrid mode only if dream_mode in ("local", "hybrid", "lemonade"): local_mode_overlay = service_dir / "compose.local.yaml" if local_mode_overlay.exists(): resolved.append(str(local_mode_overlay.relative_to(script_dir))) + + # Multi-GPU overlay if we have more than 1 GPU + if gpu_count > 1: + multi_gpu_overlay = service_dir / "compose.multigpu.yaml" + if multi_gpu_overlay.exists(): + resolved.append(str(multi_gpu_overlay.relative_to(script_dir))) + except Exception as e: # Narrow exception handling to specific parse/structure errors yaml_error = yaml_available and hasattr(yaml, 'YAMLError') and isinstance(e, yaml.YAMLError) diff --git a/dream-server/tests/fixtures/topology_json/nvidia_smi_topo_matrix_1gpu_pcie.json b/dream-server/tests/fixtures/topology_json/nvidia_smi_topo_matrix_1gpu_pcie.json new file mode 100644 index 00000000..5c491a2f --- /dev/null +++ b/dream-server/tests/fixtures/topology_json/nvidia_smi_topo_matrix_1gpu_pcie.json @@ -0,0 +1,18 @@ +{ + "vendor": "nvidia", + "gpu_count": 1, + "driver_version": "535.129.03", + "mig_enabled": false, + "numa": {}, + "gpus": [ + { + "index": 0, + "name": "NVIDIA RTX 4090", + "memory_gb": 24.0, + "pcie_gen": "4", + "pcie_width": "16", + "uuid": "GPU-12345678-1234-1234-1234-123456789012" + } + ], + "links": [] +} diff --git a/dream-server/tests/fixtures/topology_json/nvidia_smi_topo_matrix_2gpus_phb_coloc.json b/dream-server/tests/fixtures/topology_json/nvidia_smi_topo_matrix_2gpus_phb_coloc.json new file mode 100644 index 00000000..199cf6eb --- /dev/null +++ b/dream-server/tests/fixtures/topology_json/nvidia_smi_topo_matrix_2gpus_phb_coloc.json @@ -0,0 +1,34 @@ +{ + "vendor": "nvidia", + "gpu_count": 2, + "driver_version": "535.129.03", + "mig_enabled": false, + "numa": {}, + "gpus": [ + { + "index": 0, + "name": "NVIDIA RTX 4090", + "memory_gb": 24.0, + "pcie_gen": "4", + "pcie_width": "16", + "uuid": "GPU-00000000-0000-0000-0000-000000000000" + }, + { + "index": 1, + "name": "NVIDIA RTX 4090", + "memory_gb": 24.0, + "pcie_gen": "4", + "pcie_width": "16", + "uuid": "GPU-11111111-1111-1111-1111-111111111111" + } + ], + "links": [ + { + "gpu_a": 0, + "gpu_b": 1, + "link_type": "PHB", + "link_label": "PCIe-HostBridge", + "rank": 30 + } + ] +} diff --git a/dream-server/tests/fixtures/topology_json/nvidia_smi_topo_matrix_4gpus_soc.json b/dream-server/tests/fixtures/topology_json/nvidia_smi_topo_matrix_4gpus_soc.json new file mode 100644 index 00000000..ab9d7c2d --- /dev/null +++ b/dream-server/tests/fixtures/topology_json/nvidia_smi_topo_matrix_4gpus_soc.json @@ -0,0 +1,85 @@ +{ + "vendor": "nvidia", + "gpu_count": 4, + "driver_version": "535.129.03", + "mig_enabled": false, + "numa": {}, + "gpus": [ + { + "index": 0, + "name": "NVIDIA A100-SXM4-80GB", + "memory_gb": 80.0, + "pcie_gen": "4", + "pcie_width": "16", + "uuid": "GPU-00000000-0000-0000-0000-000000000000" + }, + { + "index": 1, + "name": "NVIDIA A100-SXM4-80GB", + "memory_gb": 80.0, + "pcie_gen": "4", + "pcie_width": "16", + "uuid": "GPU-11111111-1111-1111-1111-111111111111" + }, + { + "index": 2, + "name": "NVIDIA A100-SXM4-80GB", + "memory_gb": 80.0, + "pcie_gen": "4", + "pcie_width": "16", + "uuid": "GPU-22222222-2222-2222-2222-222222222222" + }, + { + "index": 3, + "name": "NVIDIA A100-SXM4-80GB", + "memory_gb": 80.0, + "pcie_gen": "4", + "pcie_width": "16", + "uuid": "GPU-33333333-3333-3333-3333-333333333333" + } + ], + "links": [ + { + "gpu_a": 0, + "gpu_b": 1, + "link_type": "PHB", + "link_label": "PCIe-HostBridge", + "rank": 30 + }, + { + "gpu_a": 0, + "gpu_b": 2, + "link_type": "SOC", + "link_label": "CrossNUMA", + "rank": 10 + }, + { + "gpu_a": 0, + "gpu_b": 3, + "link_type": "SOC", + "link_label": "CrossNUMA", + "rank": 10 + }, + { + "gpu_a": 1, + "gpu_b": 2, + "link_type": "SOC", + "link_label": "CrossNUMA", + "rank": 10 + }, + { + "gpu_a": 1, + "gpu_b": 3, + "link_type": "SOC", + "link_label": "CrossNUMA", + "rank": 10 + }, + { + "gpu_a": 2, + "gpu_b": 3, + "link_type": "PHB", + "link_label": "PCIe-HostBridge", + "rank": 30 + } + ] +} diff --git a/dream-server/tests/fixtures/topology_json/nvidia_smi_topo_matrix_4gpus_sys_separated_nv_pairs.json b/dream-server/tests/fixtures/topology_json/nvidia_smi_topo_matrix_4gpus_sys_separated_nv_pairs.json new file mode 100644 index 00000000..cf29a92e --- /dev/null +++ b/dream-server/tests/fixtures/topology_json/nvidia_smi_topo_matrix_4gpus_sys_separated_nv_pairs.json @@ -0,0 +1,85 @@ +{ + "vendor": "nvidia", + "gpu_count": 4, + "driver_version": "535.129.03", + "mig_enabled": false, + "numa": {}, + "gpus": [ + { + "index": 0, + "name": "NVIDIA A100-SXM4-80GB", + "memory_gb": 80.0, + "pcie_gen": "4", + "pcie_width": "16", + "uuid": "GPU-00000000-0000-0000-0000-000000000000" + }, + { + "index": 1, + "name": "NVIDIA A100-SXM4-80GB", + "memory_gb": 80.0, + "pcie_gen": "4", + "pcie_width": "16", + "uuid": "GPU-11111111-1111-1111-1111-111111111111" + }, + { + "index": 2, + "name": "NVIDIA A100-SXM4-80GB", + "memory_gb": 80.0, + "pcie_gen": "4", + "pcie_width": "16", + "uuid": "GPU-22222222-2222-2222-2222-222222222222" + }, + { + "index": 3, + "name": "NVIDIA A100-SXM4-80GB", + "memory_gb": 80.0, + "pcie_gen": "4", + "pcie_width": "16", + "uuid": "GPU-33333333-3333-3333-3333-333333333333" + } + ], + "links": [ + { + "gpu_a": 0, + "gpu_b": 1, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 0, + "gpu_b": 2, + "link_type": "SYS", + "link_label": "CrossNUMA", + "rank": 10 + }, + { + "gpu_a": 0, + "gpu_b": 3, + "link_type": "SYS", + "link_label": "CrossNUMA", + "rank": 10 + }, + { + "gpu_a": 1, + "gpu_b": 2, + "link_type": "SYS", + "link_label": "CrossNUMA", + "rank": 10 + }, + { + "gpu_a": 1, + "gpu_b": 3, + "link_type": "SYS", + "link_label": "CrossNUMA", + "rank": 10 + }, + { + "gpu_a": 2, + "gpu_b": 3, + "link_type": "NODE", + "link_label": "SameNUMA-NoBridge", + "rank": 20 + } + ] +} diff --git a/dream-server/tests/fixtures/topology_json/nvidia_smi_topo_matrix_5gpus_nv12_with_mlx5.json b/dream-server/tests/fixtures/topology_json/nvidia_smi_topo_matrix_5gpus_nv12_with_mlx5.json new file mode 100644 index 00000000..ed17f2a6 --- /dev/null +++ b/dream-server/tests/fixtures/topology_json/nvidia_smi_topo_matrix_5gpus_nv12_with_mlx5.json @@ -0,0 +1,58 @@ +{ + "vendor": "nvidia", + "gpu_count": 5, + "driver_version": "535.129.03", + "mig_enabled": false, + "numa": {}, + "gpus": [ + { + "index": 0, + "name": "NVIDIA A100-SXM4-80GB", + "memory_gb": 80.0, + "pcie_gen": "4", + "pcie_width": "16", + "uuid": "GPU-00000000-0000-0000-0000-000000000000" + }, + { + "index": 1, + "name": "NVIDIA A100-SXM4-80GB", + "memory_gb": 80.0, + "pcie_gen": "4", + "pcie_width": "16", + "uuid": "GPU-11111111-1111-1111-1111-111111111111" + }, + { + "index": 2, + "name": "NVIDIA A100-SXM4-80GB", + "memory_gb": 80.0, + "pcie_gen": "4", + "pcie_width": "16", + "uuid": "GPU-22222222-2222-2222-2222-222222222222" + }, + { + "index": 3, + "name": "NVIDIA A100-SXM4-80GB", + "memory_gb": 80.0, + "pcie_gen": "4", + "pcie_width": "16", + "uuid": "GPU-33333333-3333-3333-3333-333333333333" + }, + { + "index": 4, + "name": "NVIDIA A100-SXM4-80GB", + "memory_gb": 80.0, + "pcie_gen": "4", + "pcie_width": "16", + "uuid": "GPU-44444444-4444-4444-4444-444444444444" + } + ], + "links": [ + { + "gpu_a": 0, + "gpu_b": 1, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + } + ] +} diff --git a/dream-server/tests/fixtures/topology_json/nvidia_smi_topo_matrix_8gpus_nv12_full_mesh.json b/dream-server/tests/fixtures/topology_json/nvidia_smi_topo_matrix_8gpus_nv12_full_mesh.json new file mode 100644 index 00000000..58f45ab4 --- /dev/null +++ b/dream-server/tests/fixtures/topology_json/nvidia_smi_topo_matrix_8gpus_nv12_full_mesh.json @@ -0,0 +1,271 @@ +{ + "vendor": "nvidia", + "gpu_count": 8, + "driver_version": "535.129.03", + "mig_enabled": false, + "numa": {}, + "gpus": [ + { + "index": 0, + "name": "NVIDIA A100-SXM4-80GB", + "memory_gb": 80.0, + "pcie_gen": "4", + "pcie_width": "16", + "uuid": "GPU-00000000-0000-0000-0000-000000000000" + }, + { + "index": 1, + "name": "NVIDIA A100-SXM4-80GB", + "memory_gb": 80.0, + "pcie_gen": "4", + "pcie_width": "16", + "uuid": "GPU-11111111-1111-1111-1111-111111111111" + }, + { + "index": 2, + "name": "NVIDIA A100-SXM4-80GB", + "memory_gb": 80.0, + "pcie_gen": "4", + "pcie_width": "16", + "uuid": "GPU-22222222-2222-2222-2222-222222222222" + }, + { + "index": 3, + "name": "NVIDIA A100-SXM4-80GB", + "memory_gb": 80.0, + "pcie_gen": "4", + "pcie_width": "16", + "uuid": "GPU-33333333-3333-3333-3333-333333333333" + }, + { + "index": 4, + "name": "NVIDIA A100-SXM4-80GB", + "memory_gb": 80.0, + "pcie_gen": "4", + "pcie_width": "16", + "uuid": "GPU-44444444-4444-4444-4444-444444444444" + }, + { + "index": 5, + "name": "NVIDIA A100-SXM4-80GB", + "memory_gb": 80.0, + "pcie_gen": "4", + "pcie_width": "16", + "uuid": "GPU-55555555-5555-5555-5555-555555555555" + }, + { + "index": 6, + "name": "NVIDIA A100-SXM4-80GB", + "memory_gb": 80.0, + "pcie_gen": "4", + "pcie_width": "16", + "uuid": "GPU-66666666-6666-6666-6666-666666666666" + }, + { + "index": 7, + "name": "NVIDIA A100-SXM4-80GB", + "memory_gb": 80.0, + "pcie_gen": "4", + "pcie_width": "16", + "uuid": "GPU-77777777-7777-7777-7777-777777777777" + } + ], + "links": [ + { + "gpu_a": 0, + "gpu_b": 1, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 0, + "gpu_b": 2, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 0, + "gpu_b": 3, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 0, + "gpu_b": 4, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 0, + "gpu_b": 5, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 0, + "gpu_b": 6, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 0, + "gpu_b": 7, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 1, + "gpu_b": 2, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 1, + "gpu_b": 3, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 1, + "gpu_b": 4, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 1, + "gpu_b": 5, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 1, + "gpu_b": 6, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 1, + "gpu_b": 7, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 2, + "gpu_b": 3, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 2, + "gpu_b": 4, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 2, + "gpu_b": 5, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 2, + "gpu_b": 6, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 2, + "gpu_b": 7, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 3, + "gpu_b": 4, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 3, + "gpu_b": 5, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 3, + "gpu_b": 6, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 3, + "gpu_b": 7, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 4, + "gpu_b": 5, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 4, + "gpu_b": 6, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 4, + "gpu_b": 7, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 5, + "gpu_b": 6, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 5, + "gpu_b": 7, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 6, + "gpu_b": 7, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + } + ] +} diff --git a/dream-server/tests/fixtures/topology_json/nvidia_smi_topo_matrix_8gpus_nv12_full_mesh_with_numa_id.json b/dream-server/tests/fixtures/topology_json/nvidia_smi_topo_matrix_8gpus_nv12_full_mesh_with_numa_id.json new file mode 100644 index 00000000..63074173 --- /dev/null +++ b/dream-server/tests/fixtures/topology_json/nvidia_smi_topo_matrix_8gpus_nv12_full_mesh_with_numa_id.json @@ -0,0 +1,271 @@ +{ + "vendor": "nvidia", + "gpu_count": 8, + "driver_version": "535.129.03", + "mig_enabled": false, + "numa": { "nodes": 2 }, + "gpus": [ + { + "index": 0, + "name": "NVIDIA A100-SXM4-80GB", + "memory_gb": 80.0, + "pcie_gen": "4", + "pcie_width": "16", + "uuid": "GPU-00000000-0000-0000-0000-000000000000" + }, + { + "index": 1, + "name": "NVIDIA A100-SXM4-80GB", + "memory_gb": 80.0, + "pcie_gen": "4", + "pcie_width": "16", + "uuid": "GPU-11111111-1111-1111-1111-111111111111" + }, + { + "index": 2, + "name": "NVIDIA A100-SXM4-80GB", + "memory_gb": 80.0, + "pcie_gen": "4", + "pcie_width": "16", + "uuid": "GPU-22222222-2222-2222-2222-222222222222" + }, + { + "index": 3, + "name": "NVIDIA A100-SXM4-80GB", + "memory_gb": 80.0, + "pcie_gen": "4", + "pcie_width": "16", + "uuid": "GPU-33333333-3333-3333-3333-333333333333" + }, + { + "index": 4, + "name": "NVIDIA A100-SXM4-80GB", + "memory_gb": 80.0, + "pcie_gen": "4", + "pcie_width": "16", + "uuid": "GPU-44444444-4444-4444-4444-444444444444" + }, + { + "index": 5, + "name": "NVIDIA A100-SXM4-80GB", + "memory_gb": 80.0, + "pcie_gen": "4", + "pcie_width": "16", + "uuid": "GPU-55555555-5555-5555-5555-555555555555" + }, + { + "index": 6, + "name": "NVIDIA A100-SXM4-80GB", + "memory_gb": 80.0, + "pcie_gen": "4", + "pcie_width": "16", + "uuid": "GPU-66666666-6666-6666-6666-666666666666" + }, + { + "index": 7, + "name": "NVIDIA A100-SXM4-80GB", + "memory_gb": 80.0, + "pcie_gen": "4", + "pcie_width": "16", + "uuid": "GPU-77777777-7777-7777-7777-777777777777" + } + ], + "links": [ + { + "gpu_a": 0, + "gpu_b": 1, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 0, + "gpu_b": 2, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 0, + "gpu_b": 3, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 0, + "gpu_b": 4, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 0, + "gpu_b": 5, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 0, + "gpu_b": 6, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 0, + "gpu_b": 7, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 1, + "gpu_b": 2, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 1, + "gpu_b": 3, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 1, + "gpu_b": 4, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 1, + "gpu_b": 5, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 1, + "gpu_b": 6, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 1, + "gpu_b": 7, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 2, + "gpu_b": 3, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 2, + "gpu_b": 4, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 2, + "gpu_b": 5, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 2, + "gpu_b": 6, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 2, + "gpu_b": 7, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 3, + "gpu_b": 4, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 3, + "gpu_b": 5, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 3, + "gpu_b": 6, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 3, + "gpu_b": 7, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 4, + "gpu_b": 5, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 4, + "gpu_b": 6, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 4, + "gpu_b": 7, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 5, + "gpu_b": 6, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 5, + "gpu_b": 7, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + }, + { + "gpu_a": 6, + "gpu_b": 7, + "link_type": "NV12", + "link_label": "NVLink", + "rank": 100 + } + ] +} diff --git a/dream-server/tests/fixtures/topology_json/nvidia_smi_topo_matrix_8gpus_nv1_nv2_partial_mesh.json b/dream-server/tests/fixtures/topology_json/nvidia_smi_topo_matrix_8gpus_nv1_nv2_partial_mesh.json new file mode 100644 index 00000000..949ff1a8 --- /dev/null +++ b/dream-server/tests/fixtures/topology_json/nvidia_smi_topo_matrix_8gpus_nv1_nv2_partial_mesh.json @@ -0,0 +1,271 @@ +{ + "vendor": "nvidia", + "gpu_count": 8, + "driver_version": "535.129.03", + "mig_enabled": false, + "numa": { "nodes": 2 }, + "gpus": [ + { + "index": 0, + "name": "NVIDIA V100-SXM2-32GB", + "memory_gb": 32.0, + "pcie_gen": "3", + "pcie_width": "16", + "uuid": "GPU-00000000-0000-0000-0000-000000000000" + }, + { + "index": 1, + "name": "NVIDIA V100-SXM2-32GB", + "memory_gb": 32.0, + "pcie_gen": "3", + "pcie_width": "16", + "uuid": "GPU-11111111-1111-1111-1111-111111111111" + }, + { + "index": 2, + "name": "NVIDIA V100-SXM2-32GB", + "memory_gb": 32.0, + "pcie_gen": "3", + "pcie_width": "16", + "uuid": "GPU-22222222-2222-2222-2222-222222222222" + }, + { + "index": 3, + "name": "NVIDIA V100-SXM2-32GB", + "memory_gb": 32.0, + "pcie_gen": "3", + "pcie_width": "16", + "uuid": "GPU-33333333-3333-3333-3333-333333333333" + }, + { + "index": 4, + "name": "NVIDIA V100-SXM2-32GB", + "memory_gb": 32.0, + "pcie_gen": "3", + "pcie_width": "16", + "uuid": "GPU-44444444-4444-4444-4444-444444444444" + }, + { + "index": 5, + "name": "NVIDIA V100-SXM2-32GB", + "memory_gb": 32.0, + "pcie_gen": "3", + "pcie_width": "16", + "uuid": "GPU-55555555-5555-5555-5555-555555555555" + }, + { + "index": 6, + "name": "NVIDIA V100-SXM2-32GB", + "memory_gb": 32.0, + "pcie_gen": "3", + "pcie_width": "16", + "uuid": "GPU-66666666-6666-6666-6666-666666666666" + }, + { + "index": 7, + "name": "NVIDIA V100-SXM2-32GB", + "memory_gb": 32.0, + "pcie_gen": "3", + "pcie_width": "16", + "uuid": "GPU-77777777-7777-7777-7777-777777777777" + } + ], + "links": [ + { + "gpu_a": 0, + "gpu_b": 1, + "link_type": "NV1", + "link_label": "NVLink", + "rank": 0 + }, + { + "gpu_a": 0, + "gpu_b": 2, + "link_type": "NV1", + "link_label": "NVLink", + "rank": 0 + }, + { + "gpu_a": 0, + "gpu_b": 3, + "link_type": "NV2", + "link_label": "NVLink", + "rank": 80 + }, + { + "gpu_a": 0, + "gpu_b": 4, + "link_type": "NV2", + "link_label": "NVLink", + "rank": 80 + }, + { + "gpu_a": 0, + "gpu_b": 5, + "link_type": "SYS", + "link_label": "CrossNUMA", + "rank": 10 + }, + { + "gpu_a": 0, + "gpu_b": 6, + "link_type": "SYS", + "link_label": "CrossNUMA", + "rank": 10 + }, + { + "gpu_a": 0, + "gpu_b": 7, + "link_type": "SYS", + "link_label": "CrossNUMA", + "rank": 10 + }, + { + "gpu_a": 1, + "gpu_b": 2, + "link_type": "NV2", + "link_label": "NVLink", + "rank": 80 + }, + { + "gpu_a": 1, + "gpu_b": 3, + "link_type": "NV1", + "link_label": "NVLink", + "rank": 0 + }, + { + "gpu_a": 1, + "gpu_b": 4, + "link_type": "SYS", + "link_label": "CrossNUMA", + "rank": 10 + }, + { + "gpu_a": 1, + "gpu_b": 5, + "link_type": "NV2", + "link_label": "NVLink", + "rank": 80 + }, + { + "gpu_a": 1, + "gpu_b": 6, + "link_type": "SYS", + "link_label": "CrossNUMA", + "rank": 10 + }, + { + "gpu_a": 1, + "gpu_b": 7, + "link_type": "SYS", + "link_label": "CrossNUMA", + "rank": 10 + }, + { + "gpu_a": 2, + "gpu_b": 3, + "link_type": "NV2", + "link_label": "NVLink", + "rank": 80 + }, + { + "gpu_a": 2, + "gpu_b": 4, + "link_type": "SYS", + "link_label": "CrossNUMA", + "rank": 10 + }, + { + "gpu_a": 2, + "gpu_b": 5, + "link_type": "SYS", + "link_label": "CrossNUMA", + "rank": 10 + }, + { + "gpu_a": 2, + "gpu_b": 6, + "link_type": "NV1", + "link_label": "NVLink", + "rank": 0 + }, + { + "gpu_a": 2, + "gpu_b": 7, + "link_type": "SYS", + "link_label": "CrossNUMA", + "rank": 10 + }, + { + "gpu_a": 3, + "gpu_b": 4, + "link_type": "SYS", + "link_label": "CrossNUMA", + "rank": 10 + }, + { + "gpu_a": 3, + "gpu_b": 5, + "link_type": "SYS", + "link_label": "CrossNUMA", + "rank": 10 + }, + { + "gpu_a": 3, + "gpu_b": 6, + "link_type": "SYS", + "link_label": "CrossNUMA", + "rank": 10 + }, + { + "gpu_a": 3, + "gpu_b": 7, + "link_type": "NV1", + "link_label": "NVLink", + "rank": 0 + }, + { + "gpu_a": 4, + "gpu_b": 5, + "link_type": "NV1", + "link_label": "NVLink", + "rank": 0 + }, + { + "gpu_a": 4, + "gpu_b": 6, + "link_type": "NV1", + "link_label": "NVLink", + "rank": 0 + }, + { + "gpu_a": 4, + "gpu_b": 7, + "link_type": "NV2", + "link_label": "NVLink", + "rank": 80 + }, + { + "gpu_a": 5, + "gpu_b": 6, + "link_type": "NV2", + "link_label": "NVLink", + "rank": 80 + }, + { + "gpu_a": 5, + "gpu_b": 7, + "link_type": "NV1", + "link_label": "NVLink", + "rank": 0 + }, + { + "gpu_a": 6, + "gpu_b": 7, + "link_type": "NV2", + "link_label": "NVLink", + "rank": 80 + } + ] +} diff --git a/dream-server/tests/fixtures/topology_matrix/nvidia_smi_topo_matrix_1gpu_pcie.txt b/dream-server/tests/fixtures/topology_matrix/nvidia_smi_topo_matrix_1gpu_pcie.txt new file mode 100644 index 00000000..b8233177 --- /dev/null +++ b/dream-server/tests/fixtures/topology_matrix/nvidia_smi_topo_matrix_1gpu_pcie.txt @@ -0,0 +1,11 @@ + GPU0 CPU Affinity +GPU0 X 0-7 + +Legend: + + X = Self + SOC = Connection traversing PCIe as well as the SMP link between CPU sockets(e.g. QPI) + PHB = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU) + PXB = Connection traversing multiple PCIe switches (without traversing the PCIe Host Bridge) + PIX = Connection traversing a single PCIe switch + NV# = Connection traversing a bonded set of # NVLinks diff --git a/dream-server/tests/fixtures/topology_matrix/nvidia_smi_topo_matrix_4gpus_soc.txt b/dream-server/tests/fixtures/topology_matrix/nvidia_smi_topo_matrix_4gpus_soc.txt new file mode 100644 index 00000000..553de1cd --- /dev/null +++ b/dream-server/tests/fixtures/topology_matrix/nvidia_smi_topo_matrix_4gpus_soc.txt @@ -0,0 +1,14 @@ + GPU0 GPU1 GPU2 GPU3 CPU Affinity +GPU0 X PHB SOC SOC 0-9,20-29 +GPU1 PHB X SOC SOC 0-9,20-29 +GPU2 SOC SOC X PHB 10-19,30-39 +GPU3 SOC SOC PHB X 10-19,30-39 + +Legend: + + X = Self + SOC = Connection traversing PCIe as well as the SMP link between CPU sockets(e.g. QPI) + PHB = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU) + PXB = Connection traversing multiple PCIe switches (without traversing the PCIe Host Bridge) + PIX = Connection traversing a single PCIe switch + NV# = Connection traversing a bonded set of # NVLinks diff --git a/dream-server/tests/fixtures/topology_matrix/nvidia_smi_topo_matrix_4gpus_sys_separated_nv_pairs.txt b/dream-server/tests/fixtures/topology_matrix/nvidia_smi_topo_matrix_4gpus_sys_separated_nv_pairs.txt new file mode 100644 index 00000000..25e0cd9d --- /dev/null +++ b/dream-server/tests/fixtures/topology_matrix/nvidia_smi_topo_matrix_4gpus_sys_separated_nv_pairs.txt @@ -0,0 +1,17 @@ + GPU0 GPU1 GPU2 GPU3 mlx5_0 mlx5_1 CPU Affinity NUMA Affinity +GPU0 X NV12 SYS SYS NODE NODE 0-27,56-83 0 +GPU1 NV12 X SYS SYS NODE NODE 0-27,56-83 0 +GPU2 SYS SYS X NODE SYS SYS 28-55,84-111 1 +GPU3 SYS SYS NODE X SYS SYS 28-55,84-111 1 +mlx5_0 NODE NODE SYS SYS X PIX +mlx5_1 NODE NODE SYS SYS PIX X + +Legend: + + X = Self + SYS = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI) + NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node + PHB = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU) + PXB = Connection traversing multiple PCIe switches (without traversing the PCIe Host Bridge) + PIX = Connection traversing a single PCIe switch + NV# = Connection traversing a bonded set of # NVLinks diff --git a/dream-server/tests/fixtures/topology_matrix/nvidia_smi_topo_matrix_5gpus_nv12_with_mlx5.txt b/dream-server/tests/fixtures/topology_matrix/nvidia_smi_topo_matrix_5gpus_nv12_with_mlx5.txt new file mode 100644 index 00000000..6e662f1c --- /dev/null +++ b/dream-server/tests/fixtures/topology_matrix/nvidia_smi_topo_matrix_5gpus_nv12_with_mlx5.txt @@ -0,0 +1,16 @@ + GPU0 GPU1 mlx5_0 mlx5_1 mlx5_2 CPU Affinity NUMA Affinity +GPU0 X NV12 SYS SYS NODE 64-127,192-255 1 +GPU1 NV12 X SYS SYS NODE 64-127,192-255 1 +mlx5_0 SYS SYS X NODE SYS +mlx5_1 SYS SYS NODE X SYS +mlx5_2 NODE NODE SYS SYS X + +Legend: + + X = Self + SYS = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI) + NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node + PHB = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU) + PXB = Connection traversing multiple PCIe switches (without traversing the PCIe Host Bridge) + PIX = Connection traversing a single PCIe switch + NV# = Connection traversing a bonded set of # NVLinks diff --git a/dream-server/tests/fixtures/topology_matrix/nvidia_smi_topo_matrix_8gpus_nv12_full_mesh.txt b/dream-server/tests/fixtures/topology_matrix/nvidia_smi_topo_matrix_8gpus_nv12_full_mesh.txt new file mode 100644 index 00000000..66f3fde6 --- /dev/null +++ b/dream-server/tests/fixtures/topology_matrix/nvidia_smi_topo_matrix_8gpus_nv12_full_mesh.txt @@ -0,0 +1,21 @@ + GPU0 GPU1 GPU2 GPU3 GPU4 GPU5 GPU6 GPU7 NIC0 NIC1 CPU Affinity NUMA Affinity +GPU0 X NV12 NV12 NV12 NV12 NV12 NV12 NV12 PXB PXB 48-63,176-191 3 +GPU1 NV12 X NV12 NV12 NV12 NV12 NV12 NV12 PXB PXB 48-63,176-191 3 +GPU2 NV12 NV12 X NV12 NV12 NV12 NV12 NV12 SYS SYS 16-31,144-159 1 +GPU3 NV12 NV12 NV12 X NV12 NV12 NV12 NV12 SYS SYS 16-31,144-159 1 +GPU4 NV12 NV12 NV12 NV12 X NV12 NV12 NV12 SYS SYS 112-127,240-255 7 +GPU5 NV12 NV12 NV12 NV12 NV12 X NV12 NV12 SYS SYS 112-127,240-255 7 +GPU6 NV12 NV12 NV12 NV12 NV12 NV12 X NV12 SYS SYS 80-95,208-223 5 +GPU7 NV12 NV12 NV12 NV12 NV12 NV12 NV12 X SYS SYS 80-95,208-223 5 +NIC0 PXB PXB SYS SYS SYS SYS SYS SYS X PIX +NIC1 PXB PXB SYS SYS SYS SYS SYS SYS PIX X + +Legend: + + X = Self + SYS = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI) + NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node + PHB = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU) + PXB = Connection traversing multiple PCIe switches (without traversing the PCIe Host Bridge) + PIX = Connection traversing a single PCIe switch + NV# = Connection traversing a bonded set of # NVLinks diff --git a/dream-server/tests/fixtures/topology_matrix/nvidia_smi_topo_matrix_8gpus_nv12_full_mesh_with_numa_id.txt b/dream-server/tests/fixtures/topology_matrix/nvidia_smi_topo_matrix_8gpus_nv12_full_mesh_with_numa_id.txt new file mode 100644 index 00000000..aa0ba290 --- /dev/null +++ b/dream-server/tests/fixtures/topology_matrix/nvidia_smi_topo_matrix_8gpus_nv12_full_mesh_with_numa_id.txt @@ -0,0 +1,19 @@ + GPU0 GPU1 GPU2 GPU3 GPU4 GPU5 GPU6 GPU7 NIC0 NIC1 NIC2 NIC3 NIC4 NIC5 NIC6 NIC7 NIC8 NIC9 NIC10 NIC11 CPU Affinity NUMA Affinity GPU NUMA ID +GPU0 X NV12 NV12 NV12 NV12 NV12 NV12 NV12 PXB PXB SYS SYS SYS SYS SYS SYS SYS SYS SYS SYS 48-63,176-191 3 N/A +GPU1 NV12 X NV12 NV12 NV12 NV12 NV12 NV12 PXB PXB SYS SYS SYS SYS SYS SYS SYS SYS SYS SYS 48-63,176-191 3 N/A +GPU2 NV12 NV12 X NV12 NV12 NV12 NV12 NV12 SYS SYS PXB PXB SYS SYS SYS SYS SYS SYS SYS SYS 16-31,144-159 1 N/A +GPU3 NV12 NV12 NV12 X NV12 NV12 NV12 NV12 SYS SYS PXB PXB SYS SYS SYS SYS SYS SYS SYS SYS 16-31,144-159 1 N/A +GPU4 NV12 NV12 NV12 NV12 X NV12 NV12 NV12 SYS SYS SYS SYS SYS SYS PXB PXB SYS SYS SYS SYS 112-127,240-255 7 N/A +GPU5 NV12 NV12 NV12 NV12 NV12 X NV12 NV12 SYS SYS SYS SYS SYS SYS PXB PXB SYS SYS SYS SYS 112-127,240-255 7 N/A +GPU6 NV12 NV12 NV12 NV12 NV12 NV12 X NV12 SYS SYS SYS SYS SYS SYS SYS SYS PXB PXB SYS SYS 80-95,208-223 5 N/A +GPU7 NV12 NV12 NV12 NV12 NV12 NV12 NV12 X SYS SYS SYS SYS SYS SYS SYS SYS PXB PXB SYS SYS 80-95,208-223 5 N/A + +Legend: + + X = Self + SYS = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI) + NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node + PHB = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU) + PXB = Connection traversing multiple PCIe switches (without traversing the PCIe Host Bridge) + PIX = Connection traversing a single PCIe switch + NV# = Connection traversing a bonded set of # NVLinks diff --git a/dream-server/tests/fixtures/topology_matrix/nvidia_smi_topo_matrix_8gpus_nv1_nv2_partial_mesh.txt b/dream-server/tests/fixtures/topology_matrix/nvidia_smi_topo_matrix_8gpus_nv1_nv2_partial_mesh.txt new file mode 100644 index 00000000..7675dd1a --- /dev/null +++ b/dream-server/tests/fixtures/topology_matrix/nvidia_smi_topo_matrix_8gpus_nv1_nv2_partial_mesh.txt @@ -0,0 +1,23 @@ + GPU0 GPU1 GPU2 GPU3 GPU4 GPU5 GPU6 GPU7 mlx5_0 mlx5_2 mlx5_1 mlx5_3 CPU Affinity +GPU0 X NV1 NV1 NV2 NV2 SYS SYS SYS PIX SYS PHB SYS 0-19,40-59 +GPU1 NV1 X NV2 NV1 SYS NV2 SYS SYS PIX SYS PHB SYS 0-19,40-59 +GPU2 NV1 NV2 X NV2 SYS SYS NV1 SYS PHB SYS PIX SYS 0-19,40-59 +GPU3 NV2 NV1 NV2 X SYS SYS SYS NV1 PHB SYS PIX SYS 0-19,40-59 +GPU4 NV2 SYS SYS SYS X NV1 NV1 NV2 SYS PIX SYS PHB 20-39,60-79 +GPU5 SYS NV2 SYS SYS NV1 X NV2 NV1 SYS PIX SYS PHB 20-39,60-79 +GPU6 SYS SYS NV1 SYS NV1 NV2 X NV2 SYS PHB SYS PIX 20-39,60-79 +GPU7 SYS SYS SYS NV1 NV2 NV1 NV2 X SYS PHB SYS PIX 20-39,60-79 +mlx5_0 PIX PIX PHB PHB SYS SYS SYS SYS X SYS PHB SYS +mlx5_2 SYS SYS SYS SYS PIX PIX PHB PHB SYS X SYS PHB +mlx5_1 PHB PHB PIX PIX SYS SYS SYS SYS PHB SYS X SYS +mlx5_3 SYS SYS SYS SYS PHB PHB PIX PIX SYS PHB SYS X + +Legend: + + X = Self + SYS = Connection traversing PCIe as well as the SMP interconnect between NUMA nodes (e.g., QPI/UPI) + NODE = Connection traversing PCIe as well as the interconnect between PCIe Host Bridges within a NUMA node + PHB = Connection traversing PCIe as well as a PCIe Host Bridge (typically the CPU) + PXB = Connection traversing multiple PCIe switches (without traversing the PCIe Host Bridge) + PIX = Connection traversing a single PCIe switch + NV# = Connection traversing a bonded set of # NVLinks diff --git a/dream-server/tests/test-assign-gpus.py b/dream-server/tests/test-assign-gpus.py new file mode 100755 index 00000000..9b0f4f57 --- /dev/null +++ b/dream-server/tests/test-assign-gpus.py @@ -0,0 +1,561 @@ +import json +import os +import subprocess +import sys +import pytest + +SCRIPT = os.path.join(os.path.dirname(__file__), "../scripts/assign_gpus.py") +FIXTURES_DIR = os.path.join(os.path.dirname(__file__), "fixtures/topology_json") + +def fixture_path(name): + return os.path.join(FIXTURES_DIR, name) + +def run(topology_path, model_size_mb): + result = subprocess.run( + [sys.executable, SCRIPT, "--topology", topology_path, "--model-size", str(model_size_mb)], + capture_output=True, text=True, + ) + output = None + if result.returncode == 0: + output = json.loads(result.stdout)["gpu_assignment"] + return result.returncode, output, result.stderr + +def all_assigned_uuids(output): + uuids = set() + for svc in output["services"].values(): + uuids.update(svc["gpus"]) + return uuids + +def llama(output): + return output["services"]["llama_server"] + +def parallelism(output): + return llama(output)["parallelism"] + + +# ── 1 GPU — single ──────────────────────────────────────────────────────────── + +class TestSingleGpu: + TOPO = fixture_path("nvidia_smi_topo_matrix_1gpu_pcie.json") + UUID = "GPU-12345678-1234-1234-1234-123456789012" + + def test_strategy_is_single(self): + _, out, _ = run(self.TOPO, 20000) + assert out["strategy"] == "single" + + def test_all_services_share_only_gpu(self): + _, out, _ = run(self.TOPO, 20000) + for svc in out["services"].values(): + assert svc["gpus"] == [self.UUID] + + def test_llama_mode_none(self): + _, out, _ = run(self.TOPO, 20000) + p = parallelism(out) + assert p["mode"] == "none" + assert p["tensor_parallel_size"] == 1 + assert p["pipeline_parallel_size"] == 1 + + def test_model_too_large_errors(self): + rc, _, stderr = run(self.TOPO, 30000) + assert rc == 1 + assert "exceeds" in stderr.lower() + + def test_model_exactly_fits(self): + rc, out, _ = run(self.TOPO, 24576) + assert rc == 0 + assert out["strategy"] == "single" + + def test_no_topology_analysis_needed(self): + rc, out, _ = run(self.TOPO, 10000) + assert rc == 0 + + +# ── 2 GPU — rank-first means PHB pair always wins over single GPU ───────────── + +class TestTwoGpuColoc: + TOPO = fixture_path("nvidia_smi_topo_matrix_2gpus_phb_coloc.json") + GPU0 = "GPU-00000000-0000-0000-0000-000000000000" + GPU1 = "GPU-11111111-1111-1111-1111-111111111111" + + def test_model_fits_one_gpu_rank_first_takes_pair(self): + # rank-first: PHB pair rank=30 beats single rank=0, + # so llama always gets both GPUs when there are only 2 + _, out, _ = run(self.TOPO, 20000) + assert set(llama(out)["gpus"]) == {self.GPU0, self.GPU1} + + def test_model_fits_one_gpu_strategy_colocated(self): + # remaining=0 after llama takes both → colocated + _, out, _ = run(self.TOPO, 20000) + assert out["strategy"] == "colocated" + + def test_model_fits_one_gpu_services_share_last(self): + _, out, _ = run(self.TOPO, 20000) + for name in ("whisper", "comfyui", "embeddings"): + assert out["services"][name]["gpus"] == [self.GPU1] + + def test_model_fits_one_gpu_pipeline(self): + # PHB rank=30, n=2 → pipeline + _, out, _ = run(self.TOPO, 20000) + p = parallelism(out) + assert p["mode"] == "pipeline" + assert p["tensor_parallel_size"] == 1 + assert p["pipeline_parallel_size"] == 2 + + def test_model_needs_both_gpus_strategy_colocated(self): + _, out, _ = run(self.TOPO, 30000) + assert out["strategy"] == "colocated" + + def test_model_needs_both_gpus_llama_gets_both(self): + _, out, _ = run(self.TOPO, 30000) + assert set(llama(out)["gpus"]) == {self.GPU0, self.GPU1} + + def test_model_needs_both_gpus_services_share_llamas_last(self): + _, out, _ = run(self.TOPO, 30000) + for name in ("whisper", "comfyui", "embeddings"): + assert out["services"][name]["gpus"] == [self.GPU1] + + def test_model_needs_both_gpus_llama_pipeline(self): + _, out, _ = run(self.TOPO, 30000) + p = parallelism(out) + assert p["mode"] == "pipeline" + assert p["tensor_parallel_size"] == 1 + assert p["pipeline_parallel_size"] == 2 + + def test_no_gpu_idle(self): + for model_size in (20000, 30000): + _, out, _ = run(self.TOPO, model_size) + assert all_assigned_uuids(out) == {self.GPU0, self.GPU1} + + +# ── 4 GPU — SOC / cross-NUMA PCIe ──────────────────────────────────────────── + +class TestFourGpuSoc: + """4x A100-80GB. GPUs 0-1 and 2-3 are PHB pairs rank=30, cross pairs SOC rank=10.""" + TOPO = fixture_path("nvidia_smi_topo_matrix_4gpus_soc.json") + UUIDS = [ + "GPU-00000000-0000-0000-0000-000000000000", + "GPU-11111111-1111-1111-1111-111111111111", + "GPU-22222222-2222-2222-2222-222222222222", + "GPU-33333333-3333-3333-3333-333333333333", + ] + + def test_model_fits_one_gpu_picks_phb_pair(self): + # rank-first: PHB pair rank=30 beats single rank=0 + _, out, _ = run(self.TOPO, 70000) + llama_uuids = set(llama(out)["gpus"]) + phb_pair_a = {self.UUIDS[0], self.UUIDS[1]} + phb_pair_b = {self.UUIDS[2], self.UUIDS[3]} + assert llama_uuids in (phb_pair_a, phb_pair_b) + + def test_model_fits_one_gpu_colocated(self): + # remaining=2 after PHB pair → colocated + _, out, _ = run(self.TOPO, 70000) + assert out["strategy"] == "colocated" + + def test_model_fits_one_gpu_pipeline(self): + # PHB rank=30, n=2 → pipeline + _, out, _ = run(self.TOPO, 70000) + p = parallelism(out) + assert p["mode"] == "pipeline" + assert p["pipeline_parallel_size"] == 2 + + def test_model_fits_one_gpu_no_gpu_idle(self): + _, out, _ = run(self.TOPO, 70000) + assert all_assigned_uuids(out) == set(self.UUIDS) + + def test_model_needs_two_gpus_colocated(self): + _, out, _ = run(self.TOPO, 100000) + assert out["strategy"] == "colocated" + + def test_model_needs_two_gpus_picks_phb_pair(self): + _, out, _ = run(self.TOPO, 100000) + llama_uuids = set(llama(out)["gpus"]) + phb_pair_a = {self.UUIDS[0], self.UUIDS[1]} + phb_pair_b = {self.UUIDS[2], self.UUIDS[3]} + assert llama_uuids in (phb_pair_a, phb_pair_b) + + def test_model_needs_two_gpus_pipeline(self): + _, out, _ = run(self.TOPO, 100000) + p = parallelism(out) + assert p["mode"] == "pipeline" + assert p["pipeline_parallel_size"] == 2 + + def test_model_needs_three_gpus_colocated(self): + _, out, _ = run(self.TOPO, 200000) + assert out["strategy"] == "colocated" + + def test_model_needs_three_gpus_pipeline_cross_numa(self): + _, out, _ = run(self.TOPO, 200000) + p = parallelism(out) + assert p["mode"] == "pipeline" + assert p["pipeline_parallel_size"] == 3 + + def test_model_too_large_errors(self): + rc, _, stderr = run(self.TOPO, 400000) + assert rc == 1 + assert "exceeds" in stderr.lower() + + +# ── 4 GPU — SYS-separated NVLink pairs ─────────────────────────────────────── + +class TestFourGpuSysNvPairs: + """4x A100-80GB. GPU 0-1 NVLink rank=100, GPU 2-3 NODE rank=20, cross SYS rank=10.""" + TOPO = fixture_path("nvidia_smi_topo_matrix_4gpus_sys_separated_nv_pairs.json") + UUIDS = [ + "GPU-00000000-0000-0000-0000-000000000000", + "GPU-11111111-1111-1111-1111-111111111111", + "GPU-22222222-2222-2222-2222-222222222222", + "GPU-33333333-3333-3333-3333-333333333333", + ] + + def test_model_fits_one_gpu_picks_nvlink_pair(self): + # rank-first: NVLink pair rank=100 always wins + _, out, _ = run(self.TOPO, 70000) + assert set(llama(out)["gpus"]) == {self.UUIDS[0], self.UUIDS[1]} + + def test_model_fits_one_gpu_colocated(self): + # remaining=2 → colocated + _, out, _ = run(self.TOPO, 70000) + assert out["strategy"] == "colocated" + + def test_model_fits_one_gpu_tensor(self): + # NVLink rank=100, n=2 → tensor + _, out, _ = run(self.TOPO, 70000) + p = parallelism(out) + assert p["mode"] == "tensor" + assert p["tensor_parallel_size"] == 2 + assert p["pipeline_parallel_size"] == 1 + assert p["gpu_memory_utilization"] == 0.92 + + def test_model_needs_two_gpus_picks_nvlink_pair(self): + _, out, _ = run(self.TOPO, 100000) + assert set(llama(out)["gpus"]) == {self.UUIDS[0], self.UUIDS[1]} + + def test_model_needs_two_gpus_tensor(self): + _, out, _ = run(self.TOPO, 100000) + p = parallelism(out) + assert p["mode"] == "tensor" + assert p["tensor_parallel_size"] == 2 + assert p["pipeline_parallel_size"] == 1 + assert p["gpu_memory_utilization"] == 0.92 + + def test_model_needs_two_gpus_colocated(self): + _, out, _ = run(self.TOPO, 100000) + assert out["strategy"] == "colocated" + + def test_model_needs_three_gpus_cross_numa_pipeline(self): + _, out, _ = run(self.TOPO, 200000) + p = parallelism(out) + assert p["mode"] == "pipeline" + assert p["pipeline_parallel_size"] == 3 + + def test_no_gpu_idle(self): + for model_size in (70000, 100000, 200000): + _, out, _ = run(self.TOPO, model_size) + assert all_assigned_uuids(out) == set(self.UUIDS) + + +# ── 5 GPU — NV12 pair + 3 unlinked ─────────────────────────────────────────── + +class TestFiveGpuNv12WithMlx5: + """5x A100-80GB. Only GPU 0-1 NV12 rank=100. All others rank=0.""" + TOPO = fixture_path("nvidia_smi_topo_matrix_5gpus_nv12_with_mlx5.json") + UUIDS = [ + "GPU-00000000-0000-0000-0000-000000000000", + "GPU-11111111-1111-1111-1111-111111111111", + "GPU-22222222-2222-2222-2222-222222222222", + "GPU-33333333-3333-3333-3333-333333333333", + "GPU-44444444-4444-4444-4444-444444444444", + ] + + def test_model_fits_one_gpu_picks_nvlink_pair(self): + # rank-first: NVLink pair rank=100 always wins + _, out, _ = run(self.TOPO, 70000) + assert set(llama(out)["gpus"]) == {self.UUIDS[0], self.UUIDS[1]} + + def test_model_fits_one_gpu_dedicated(self): + # remaining=3 exactly → dedicated, no extras back to llama + _, out, _ = run(self.TOPO, 70000) + assert out["strategy"] == "dedicated" + + def test_model_fits_one_gpu_llama_stays_2gpus(self): + # remaining=3 → services each get 1, no extras push back + _, out, _ = run(self.TOPO, 70000) + assert len(llama(out)["gpus"]) == 2 + + def test_model_fits_one_gpu_tensor(self): + # NVLink rank=100, n=2 → tensor (no extra GPU degrading to pipeline) + _, out, _ = run(self.TOPO, 70000) + p = parallelism(out) + assert p["mode"] == "tensor" + assert p["tensor_parallel_size"] == 2 + assert p["pipeline_parallel_size"] == 1 + + def test_model_fits_one_gpu_services_get_dedicated_gpus(self): + _, out, _ = run(self.TOPO, 70000) + svcs = out["services"] + for name in ("whisper", "comfyui", "embeddings"): + assert len(svcs[name]["gpus"]) == 1 + service_uuids = [svcs[n]["gpus"][0] for n in ("whisper", "comfyui", "embeddings")] + assert len(set(service_uuids)) == 3 + + def test_model_needs_nvlink_pair_tensor(self): + _, out, _ = run(self.TOPO, 100000) + assert set(llama(out)["gpus"]) == {self.UUIDS[0], self.UUIDS[1]} + p = parallelism(out) + assert p["mode"] == "tensor" + assert p["tensor_parallel_size"] == 2 + assert p["pipeline_parallel_size"] == 1 + + def test_model_needs_nvlink_pair_dedicated(self): + _, out, _ = run(self.TOPO, 100000) + assert out["strategy"] == "dedicated" + + def test_model_needs_nvlink_pair_no_extras_back(self): + _, out, _ = run(self.TOPO, 100000) + assert len(llama(out)["gpus"]) == 2 + + def test_no_gpu_idle(self): + for model_size in (70000, 100000): + _, out, _ = run(self.TOPO, model_size) + assert all_assigned_uuids(out) == set(self.UUIDS) + + +# ── 8 GPU — NV1/NV2 partial mesh ───────────────────────────────────────────── + +class TestEightGpuPartialMesh: + """8x V100-32GB. NV1=rank 0, NV2=rank 80, SYS=rank 10.""" + TOPO = fixture_path("nvidia_smi_topo_matrix_8gpus_nv1_nv2_partial_mesh.json") + ALL_UUIDS = {f"GPU-{str(i)*8}-{str(i)*4}-{str(i)*4}-{str(i)*4}-{str(i)*12}" for i in range(8)} + NV2_PAIRS = [ + {"GPU-00000000-0000-0000-0000-000000000000", "GPU-33333333-3333-3333-3333-333333333333"}, + {"GPU-00000000-0000-0000-0000-000000000000", "GPU-44444444-4444-4444-4444-444444444444"}, + {"GPU-11111111-1111-1111-1111-111111111111", "GPU-22222222-2222-2222-2222-222222222222"}, + {"GPU-11111111-1111-1111-1111-111111111111", "GPU-55555555-5555-5555-5555-555555555555"}, + {"GPU-22222222-2222-2222-2222-222222222222", "GPU-33333333-3333-3333-3333-333333333333"}, + {"GPU-44444444-4444-4444-4444-444444444444", "GPU-77777777-7777-7777-7777-777777777777"}, + {"GPU-55555555-5555-5555-5555-555555555555", "GPU-66666666-6666-6666-6666-666666666666"}, + {"GPU-66666666-6666-6666-6666-666666666666", "GPU-77777777-7777-7777-7777-777777777777"}, + ] + + def test_model_fits_one_gpu_dedicated(self): + # remaining=6 → dedicated (3 to services, 3 extras back to llama) + _, out, _ = run(self.TOPO, 20000) + assert out["strategy"] == "dedicated" + + def test_model_fits_one_gpu_picks_nv2_pair(self): + _, out, _ = run(self.TOPO, 20000) + initial_pair = set(llama(out)["gpus"][:2]) + assert any(initial_pair == p for p in self.NV2_PAIRS) + + def test_model_fits_one_gpu_extras_back_to_llama(self): + # remaining=6: services get 3, extras 3 → llama total=5 + _, out, _ = run(self.TOPO, 20000) + assert len(llama(out)["gpus"]) == 5 + + def test_model_fits_one_gpu_pipeline(self): + # extras degrade min_rank → pipeline + _, out, _ = run(self.TOPO, 20000) + assert parallelism(out)["mode"] == "pipeline" + + def test_model_needs_nv2_pair_picks_nv2_pair(self): + _, out, _ = run(self.TOPO, 50000) + initial_pair = set(llama(out)["gpus"][:2]) + assert any(initial_pair == p for p in self.NV2_PAIRS) + + def test_model_needs_nv2_pair_extras_make_pipeline(self): + _, out, _ = run(self.TOPO, 50000) + assert parallelism(out)["mode"] == "pipeline" + + def test_no_gpu_idle(self): + for model_size in (20000, 50000): + _, out, _ = run(self.TOPO, model_size) + assert all_assigned_uuids(out) == self.ALL_UUIDS + + def test_model_too_large_errors(self): + rc, _, stderr = run(self.TOPO, 300000) + assert rc == 1 + assert "exceeds" in stderr.lower() + + +# ── 8 GPU — NV12 full mesh ──────────────────────────────────────────────────── + +class TestEightGpuNv12FullMesh: + """8x A100-80GB. All pairs NV12 rank=100.""" + TOPO = fixture_path("nvidia_smi_topo_matrix_8gpus_nv12_full_mesh.json") + ALL_UUIDS = {f"GPU-{str(i)*8}-{str(i)*4}-{str(i)*4}-{str(i)*4}-{str(i)*12}" for i in range(8)} + + def test_model_fits_one_gpu_dedicated(self): + _, out, _ = run(self.TOPO, 70000) + assert out["strategy"] == "dedicated" + + def test_model_fits_one_gpu_services_get_dedicated_gpus(self): + _, out, _ = run(self.TOPO, 70000) + svcs = out["services"] + uuids = [svcs[n]["gpus"][0] for n in ("whisper", "comfyui", "embeddings")] + assert len(set(uuids)) == 3 + + def test_model_fits_one_gpu_extras_back_to_llama_nvlink(self): + # NVLink pair wins, remaining=6: 3 to services, 3 extras → llama=5 GPUs, hybrid + _, out, _ = run(self.TOPO, 70000) + p = parallelism(out) + assert p["mode"] == "hybrid" + assert p["gpu_memory_utilization"] == 0.93 + + def test_model_fits_one_gpu_llama_5gpus(self): + _, out, _ = run(self.TOPO, 70000) + assert len(llama(out)["gpus"]) == 5 + + def test_model_needs_two_gpus_extras_back_to_llama(self): + _, out, _ = run(self.TOPO, 100000) + assert len(llama(out)["gpus"]) == 5 + + def test_model_needs_two_gpus_hybrid_nvlink(self): + _, out, _ = run(self.TOPO, 100000) + p = parallelism(out) + assert p["mode"] == "hybrid" + assert p["tensor_parallel_size"] == 2 + assert p["pipeline_parallel_size"] == 2 + assert p["gpu_memory_utilization"] == 0.93 + + def test_model_needs_five_gpus_no_extras(self): + # 350GB needs 5 GPUs. remaining=3 exactly → no extras → llama has 5 GPUs + _, out, _ = run(self.TOPO, 350000) + assert len(llama(out)["gpus"]) == 5 + assert out["strategy"] == "dedicated" + + def test_model_needs_five_gpus_hybrid(self): + _, out, _ = run(self.TOPO, 350000) + p = parallelism(out) + assert p["mode"] == "hybrid" + assert p["tensor_parallel_size"] == 2 + assert p["pipeline_parallel_size"] == 2 + + def test_model_needs_five_gpus_services_dedicated(self): + _, out, _ = run(self.TOPO, 350000) + svcs = out["services"] + uuids = [svcs[n]["gpus"][0] for n in ("whisper", "comfyui", "embeddings")] + assert len(set(uuids)) == 3 + + def test_no_gpu_idle(self): + for model_size in (70000, 100000, 350000): + _, out, _ = run(self.TOPO, model_size) + assert all_assigned_uuids(out) == self.ALL_UUIDS + + def test_model_too_large_errors(self): + rc, _, stderr = run(self.TOPO, 700000) + assert rc == 1 + assert "exceeds" in stderr.lower() + + +# ── 8 GPU — NV12 full mesh with NUMA annotation ─────────────────────────────── + +class TestEightGpuNv12FullMeshWithNuma: + """NUMA annotation should not affect results.""" + TOPO_WITH_NUMA = fixture_path("nvidia_smi_topo_matrix_8gpus_nv12_full_mesh_with_numa_id.json") + TOPO_WITHOUT_NUMA = fixture_path("nvidia_smi_topo_matrix_8gpus_nv12_full_mesh.json") + + def test_numa_annotation_does_not_affect_strategy(self): + _, out_numa, _ = run(self.TOPO_WITH_NUMA, 100000) + _, out_no_numa, _ = run(self.TOPO_WITHOUT_NUMA, 100000) + assert out_numa["strategy"] == out_no_numa["strategy"] + + def test_numa_annotation_does_not_affect_parallelism_mode(self): + _, out_numa, _ = run(self.TOPO_WITH_NUMA, 100000) + _, out_no_numa, _ = run(self.TOPO_WITHOUT_NUMA, 100000) + assert parallelism(out_numa)["mode"] == parallelism(out_no_numa)["mode"] + + def test_numa_annotation_does_not_affect_llama_gpu_count(self): + _, out_numa, _ = run(self.TOPO_WITH_NUMA, 100000) + _, out_no_numa, _ = run(self.TOPO_WITHOUT_NUMA, 100000) + assert len(llama(out_numa)["gpus"]) == len(llama(out_no_numa)["gpus"]) + + +# ── Output schema ───────────────────────────────────────────────────────────── + +class TestOutputSchema: + TOPO = fixture_path("nvidia_smi_topo_matrix_8gpus_nv12_full_mesh.json") + + def test_version_field_present(self): + _, out, _ = run(self.TOPO, 100000) + assert out["version"] == "1.0" + + def test_strategy_field_present(self): + _, out, _ = run(self.TOPO, 100000) + assert out["strategy"] in ("single", "dedicated", "colocated", "user-defined") + + def test_all_four_services_present(self): + _, out, _ = run(self.TOPO, 100000) + for svc in ("llama_server", "whisper", "comfyui", "embeddings"): + assert svc in out["services"] + + def test_gpus_always_a_list(self): + _, out, _ = run(self.TOPO, 100000) + for svc in out["services"].values(): + assert isinstance(svc["gpus"], list) + assert len(svc["gpus"]) >= 1 + + def test_non_llama_services_have_no_parallelism_block(self): + _, out, _ = run(self.TOPO, 100000) + for name in ("whisper", "comfyui", "embeddings"): + assert "parallelism" not in out["services"][name] + + def test_llama_always_has_parallelism_block(self): + _, out, _ = run(self.TOPO, 100000) + p = parallelism(out) + assert "mode" in p + assert "tensor_parallel_size" in p + assert "pipeline_parallel_size" in p + assert "gpu_memory_utilization" in p + + def test_tensor_split_absent_when_homogeneous(self): + _, out, _ = run(self.TOPO, 100000) + assert "tensor_split" not in parallelism(out) + + def test_gpu_uuids_are_strings(self): + _, out, _ = run(self.TOPO, 100000) + for svc in out["services"].values(): + for uuid in svc["gpus"]: + assert isinstance(uuid, str) + assert uuid.startswith("GPU-") + + +# ── Parallelism mode selection ──────────────────────────────────────────────── + +class TestParallelismModeSelection: + + def test_nvlink_two_gpus_tensor(self): + _, out, _ = run(fixture_path("nvidia_smi_topo_matrix_4gpus_sys_separated_nv_pairs.json"), 100000) + assert parallelism(out)["mode"] == "tensor" + + def test_pcie_phb_two_gpus_pipeline(self): + # PHB rank=30 → pipeline + _, out, _ = run(fixture_path("nvidia_smi_topo_matrix_4gpus_soc.json"), 100000) + assert parallelism(out)["mode"] == "pipeline" + + def test_cross_numa_three_gpus_pipeline(self): + _, out, _ = run(fixture_path("nvidia_smi_topo_matrix_4gpus_soc.json"), 200000) + p = parallelism(out) + assert p["mode"] == "pipeline" + assert p["pipeline_parallel_size"] == 3 + + def test_nvlink_full_mesh_five_gpus_hybrid(self): + # NV12 full mesh, extras push back → 5 GPUs → hybrid + _, out, _ = run(fixture_path("nvidia_smi_topo_matrix_8gpus_nv12_full_mesh.json"), 100000) + assert parallelism(out)["mode"] == "hybrid" + + def test_mem_util_none_is_095(self): + _, out, _ = run(fixture_path("nvidia_smi_topo_matrix_1gpu_pcie.json"), 20000) + assert parallelism(out)["gpu_memory_utilization"] == 0.95 + + def test_mem_util_tensor_is_092(self): + _, out, _ = run(fixture_path("nvidia_smi_topo_matrix_4gpus_sys_separated_nv_pairs.json"), 100000) + assert parallelism(out)["gpu_memory_utilization"] == 0.92 + + def test_mem_util_hybrid_is_093(self): + _, out, _ = run(fixture_path("nvidia_smi_topo_matrix_8gpus_nv12_full_mesh.json"), 100000) + assert parallelism(out)["gpu_memory_utilization"] == 0.93 + + def test_mem_util_pipeline_is_095(self): + _, out, _ = run(fixture_path("nvidia_smi_topo_matrix_4gpus_soc.json"), 100000) + assert parallelism(out)["gpu_memory_utilization"] == 0.95 \ No newline at end of file diff --git a/dream-server/tests/test-nvidia-topo.sh b/dream-server/tests/test-nvidia-topo.sh new file mode 100755 index 00000000..3de6a43b --- /dev/null +++ b/dream-server/tests/test-nvidia-topo.sh @@ -0,0 +1,307 @@ +#!/usr/bin/env bash +# ============================================================================ +# Dream Server — NVIDIA Topology Detection Test +# ============================================================================ +# Part of: tests/ +# Purpose: Test NVIDIA topology detection against fixture files +# +# Usage: ./test-nvidia-topo.sh +# ============================================================================ + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +FIXTURES_DIR="$SCRIPT_DIR/fixtures/topology_matrix" +TOPO_SCRIPT="$SCRIPT_DIR/../installers/lib/nvidia-topo.sh" + +source "$SCRIPT_DIR/../installers/lib/constants.sh" + +# Counters +TESTS_RUN=0 +TESTS_PASSED=0 +TESTS_FAILED=0 + +# Check dependencies +if ! command -v jq &>/dev/null; then + echo -e "${RED}ERROR: jq is required but not installed${NC}" + exit 1 +fi + +# Test fixture: nvidia_smi_topo_matrix_1gpu_pcie.txt +test_1gpu_pcie() { + echo -e "${BLU}Testing: nvidia_smi_topo_matrix_1gpu_pcie.txt${NC}" + TESTS_RUN=$((TESTS_RUN + 1)) + + nvidia-smi() { + if [[ "$1" == "topo" && "$2" == "-m" ]]; then + cat "$FIXTURES_DIR/nvidia_smi_topo_matrix_1gpu_pcie.txt" + elif [[ "$*" == "--query-gpu=index,name,memory.total,pcie.link.gen.current,pcie.link.width.current,uuid --format=csv,noheader,nounits" ]]; then + echo "0, NVIDIA RTX 4090, 24564, 4, 16, GPU-12345678-1234-1234-1234-123456789012" + elif [[ "$*" == "--query-gpu=driver_version --format=csv,noheader" ]]; then + echo "535.129.03" + elif [[ "$1" == "-q" ]]; then + echo "MIG Mode: Disabled" + fi + } + + source "$TOPO_SCRIPT" + local result=$(detect_nvidia_topo) + + # Assertions + local gpu_count=$(echo "$result" | jq -r '.gpu_count') + local links_count=$(echo "$result" | jq -r '.links | length') + + if [[ "$gpu_count" == "1" ]] && [[ "$links_count" == "0" ]]; then + echo -e "${GRN}✓ PASS: 1 GPU, 0 links${NC}" + TESTS_PASSED=$((TESTS_PASSED + 1)) + else + echo -e "${RED}✗ FAIL: Expected 1 GPU and 0 links, got $gpu_count GPUs and $links_count links${NC}" + TESTS_FAILED=$((TESTS_FAILED + 1)) + fi +} + +# Test fixture: nvidia_smi_topo_matrix_4gpus_soc.txt +test_4gpus_soc() { + echo -e "${BLU}Testing: nvidia_smi_topo_matrix_4gpus_soc.txt${NC}" + TESTS_RUN=$((TESTS_RUN + 1)) + + nvidia-smi() { + if [[ "$1" == "topo" && "$2" == "-m" ]]; then + cat "$FIXTURES_DIR/nvidia_smi_topo_matrix_4gpus_soc.txt" + elif [[ "$*" == "--query-gpu=index,name,memory.total,pcie.link.gen.current,pcie.link.width.current,uuid --format=csv,noheader,nounits" ]]; then + echo "0, NVIDIA A100-SXM4-80GB, 81920, 4, 16, GPU-00000000-0000-0000-0000-000000000000" + echo "1, NVIDIA A100-SXM4-80GB, 81920, 4, 16, GPU-11111111-1111-1111-1111-111111111111" + echo "2, NVIDIA A100-SXM4-80GB, 81920, 4, 16, GPU-22222222-2222-2222-2222-222222222222" + echo "3, NVIDIA A100-SXM4-80GB, 81920, 4, 16, GPU-33333333-3333-3333-3333-333333333333" + elif [[ "$*" == "--query-gpu=driver_version --format=csv,noheader" ]]; then + echo "535.129.03" + elif [[ "$1" == "-q" ]]; then + echo "MIG Mode: Disabled" + fi + } + + source "$TOPO_SCRIPT" + local result=$(detect_nvidia_topo) + + # Assertions + local gpu_count=$(echo "$result" | jq -r '.gpu_count') + local links_count=$(echo "$result" | jq -r '.links | length') + local has_soc=$(echo "$result" | jq -r '.links[] | select(.link_type == "SOC") | .link_type' | head -1) + + if [[ "$gpu_count" == "4" ]] && [[ "$links_count" -gt "0" ]] && [[ "$has_soc" == "SOC" ]]; then + echo -e "${GRN}✓ PASS: 4 GPUs, $links_count links, SOC topology detected${NC}" + TESTS_PASSED=$((TESTS_PASSED + 1)) + else + echo -e "${RED}✗ FAIL: Expected 4 GPUs with SOC links, got $gpu_count GPUs, $links_count links${NC}" + TESTS_FAILED=$((TESTS_FAILED + 1)) + fi +} + +# Test fixture: nvidia_smi_topo_matrix_4gpus_sys_separated_nv_pairs.txt +test_4gpus_sys_separated_nv_pairs() { + echo -e "${BLU}Testing: nvidia_smi_topo_matrix_4gpus_sys_separated_nv_pairs.txt${NC}" + TESTS_RUN=$((TESTS_RUN + 1)) + + nvidia-smi() { + if [[ "$1" == "topo" && "$2" == "-m" ]]; then + cat "$FIXTURES_DIR/nvidia_smi_topo_matrix_4gpus_sys_separated_nv_pairs.txt" + elif [[ "$*" == "--query-gpu=index,name,memory.total,pcie.link.gen.current,pcie.link.width.current,uuid --format=csv,noheader,nounits" ]]; then + echo "0, NVIDIA A100-SXM4-80GB, 81920, 4, 16, GPU-00000000-0000-0000-0000-000000000000" + echo "1, NVIDIA A100-SXM4-80GB, 81920, 4, 16, GPU-11111111-1111-1111-1111-111111111111" + echo "2, NVIDIA A100-SXM4-80GB, 81920, 4, 16, GPU-22222222-2222-2222-2222-222222222222" + echo "3, NVIDIA A100-SXM4-80GB, 81920, 4, 16, GPU-33333333-3333-3333-3333-333333333333" + elif [[ "$*" == "--query-gpu=driver_version --format=csv,noheader" ]]; then + echo "535.129.03" + elif [[ "$1" == "-q" ]]; then + echo "MIG Mode: Disabled" + fi + } + + source "$TOPO_SCRIPT" + local result=$(detect_nvidia_topo) + + # Assertions + local gpu_count=$(echo "$result" | jq -r '.gpu_count') + local nvlink_count=$(echo "$result" | jq -r '[.links[] | select(.link_type | startswith("NV"))] | length') + + if [[ "$gpu_count" == "4" ]] && [[ "$nvlink_count" -gt "0" ]]; then + echo -e "${GRN}✓ PASS: 4 GPUs, $nvlink_count NVLink connections${NC}" + TESTS_PASSED=$((TESTS_PASSED + 1)) + else + echo -e "${RED}✗ FAIL: Expected 4 GPUs with NVLink, got $gpu_count GPUs, $nvlink_count NVLinks${NC}" + TESTS_FAILED=$((TESTS_FAILED + 1)) + fi +} + +# Test fixture: nvidia_smi_topo_matrix_5gpus_nv12_with_mlx5.txt +test_5gpus_nv12_with_mlx5() { + echo -e "${BLU}Testing: nvidia_smi_topo_matrix_5gpus_nv12_with_mlx5.txt${NC}" + TESTS_RUN=$((TESTS_RUN + 1)) + + nvidia-smi() { + if [[ "$1" == "topo" && "$2" == "-m" ]]; then + cat "$FIXTURES_DIR/nvidia_smi_topo_matrix_5gpus_nv12_with_mlx5.txt" + elif [[ "$*" == "--query-gpu=index,name,memory.total,pcie.link.gen.current,pcie.link.width.current,uuid --format=csv,noheader,nounits" ]]; then + echo "0, NVIDIA A100-SXM4-80GB, 81920, 4, 16, GPU-00000000-0000-0000-0000-000000000000" + echo "1, NVIDIA A100-SXM4-80GB, 81920, 4, 16, GPU-11111111-1111-1111-1111-111111111111" + echo "2, NVIDIA A100-SXM4-80GB, 81920, 4, 16, GPU-22222222-2222-2222-2222-222222222222" + echo "3, NVIDIA A100-SXM4-80GB, 81920, 4, 16, GPU-33333333-3333-3333-3333-333333333333" + echo "4, NVIDIA A100-SXM4-80GB, 81920, 4, 16, GPU-44444444-4444-4444-4444-444444444444" + elif [[ "$*" == "--query-gpu=driver_version --format=csv,noheader" ]]; then + echo "535.129.03" + elif [[ "$1" == "-q" ]]; then + echo "MIG Mode: Disabled" + fi + } + + source "$TOPO_SCRIPT" + local result=$(detect_nvidia_topo) + + # Assertions + local gpu_count=$(echo "$result" | jq -r '.gpu_count') + local nv12_count=$(echo "$result" | jq -r '[.links[] | select(.link_type == "NV12")] | length') + + if [[ "$gpu_count" == "5" ]] && [[ "$nv12_count" -gt "0" ]]; then + echo -e "${GRN}✓ PASS: 5 GPUs, $nv12_count NV12 connections${NC}" + TESTS_PASSED=$((TESTS_PASSED + 1)) + else + echo -e "${RED}✗ FAIL: Expected 5 GPUs with NV12, got $gpu_count GPUs, $nv12_count NV12 links${NC}" + TESTS_FAILED=$((TESTS_FAILED + 1)) + fi +} + +# Test fixture: nvidia_smi_topo_matrix_8gpus_nv12_full_mesh.txt +test_8gpus_nv12_full_mesh() { + echo -e "${BLU}Testing: nvidia_smi_topo_matrix_8gpus_nv12_full_mesh.txt${NC}" + TESTS_RUN=$((TESTS_RUN + 1)) + + nvidia-smi() { + if [[ "$1" == "topo" && "$2" == "-m" ]]; then + cat "$FIXTURES_DIR/nvidia_smi_topo_matrix_8gpus_nv12_full_mesh.txt" + elif [[ "$*" == "--query-gpu=index,name,memory.total,pcie.link.gen.current,pcie.link.width.current,uuid --format=csv,noheader,nounits" ]]; then + for i in {0..7}; do + echo "$i, NVIDIA A100-SXM4-80GB, 81920, 4, 16, GPU-${i}${i}${i}${i}${i}${i}${i}${i}-${i}${i}${i}${i}-${i}${i}${i}${i}-${i}${i}${i}${i}-${i}${i}${i}${i}${i}${i}${i}${i}${i}${i}${i}${i}" + done + elif [[ "$*" == "--query-gpu=driver_version --format=csv,noheader" ]]; then + echo "535.129.03" + elif [[ "$1" == "-q" ]]; then + echo "MIG Mode: Disabled" + fi + } + + source "$TOPO_SCRIPT" + local result=$(detect_nvidia_topo) + + # Assertions + local gpu_count=$(echo "$result" | jq -r '.gpu_count') + local nv12_count=$(echo "$result" | jq -r '[.links[] | select(.link_type == "NV12")] | length') + + # Full mesh of 8 GPUs should have 28 links (8*7/2) + if [[ "$gpu_count" == "8" ]] && [[ "$nv12_count" -gt "20" ]]; then + echo -e "${GRN}✓ PASS: 8 GPUs, $nv12_count NV12 connections (full mesh)${NC}" + TESTS_PASSED=$((TESTS_PASSED + 1)) + else + echo -e "${RED}✗ FAIL: Expected 8 GPUs with full mesh NV12, got $gpu_count GPUs, $nv12_count NV12 links${NC}" + TESTS_FAILED=$((TESTS_FAILED + 1)) + fi +} + +# Test fixture: nvidia_smi_topo_matrix_8gpus_nv12_full_mesh_with_numa_id.txt +test_8gpus_nv12_full_mesh_with_numa() { + echo -e "${BLU}Testing: nvidia_smi_topo_matrix_8gpus_nv12_full_mesh_with_numa_id.txt${NC}" + TESTS_RUN=$((TESTS_RUN + 1)) + + nvidia-smi() { + if [[ "$1" == "topo" && "$2" == "-m" ]]; then + cat "$FIXTURES_DIR/nvidia_smi_topo_matrix_8gpus_nv12_full_mesh_with_numa_id.txt" + elif [[ "$*" == "--query-gpu=index,name,memory.total,pcie.link.gen.current,pcie.link.width.current,uuid --format=csv,noheader,nounits" ]]; then + for i in {0..7}; do + echo "$i, NVIDIA A100-SXM4-80GB, 81920, 4, 16, GPU-${i}${i}${i}${i}${i}${i}${i}${i}-${i}${i}${i}${i}-${i}${i}${i}${i}-${i}${i}${i}${i}-${i}${i}${i}${i}${i}${i}${i}${i}${i}${i}${i}${i}" + done + elif [[ "$*" == "--query-gpu=driver_version --format=csv,noheader" ]]; then + echo "535.129.03" + elif [[ "$1" == "-q" ]]; then + echo "MIG Mode: Disabled" + fi + } + + numactl() { + if [[ "$1" == "--hardware" ]]; then + echo "node 0 cpus: 0 1 2 3" + echo "node 1 cpus: 4 5 6 7" + fi + } + + source "$TOPO_SCRIPT" + local result=$(detect_nvidia_topo) + + # Assertions + local gpu_count=$(echo "$result" | jq -r '.gpu_count') + local numa_nodes=$(echo "$result" | jq -r '.numa.nodes') + local nv12_count=$(echo "$result" | jq -r '[.links[] | select(.link_type == "NV12")] | length') + + if [[ "$gpu_count" == "8" ]] && [[ "$numa_nodes" == "2" ]] && [[ "$nv12_count" -gt "20" ]]; then + echo -e "${GRN}✓ PASS: 8 GPUs, 2 NUMA nodes, $nv12_count NV12 connections${NC}" + TESTS_PASSED=$((TESTS_PASSED + 1)) + else + echo -e "${RED}✗ FAIL: Expected 8 GPUs, 2 NUMA nodes with NV12, got $gpu_count GPUs, $numa_nodes NUMA nodes, $nv12_count NV12 links${NC}" + TESTS_FAILED=$((TESTS_FAILED + 1)) + fi +} + +# Test fixture: nvidia_smi_topo_matrix_8gpus_nv1_nv2_partial_mesh.txt +test_8gpus_nv1_nv2_partial_mesh() { + echo -e "${BLU}Testing: nvidia_smi_topo_matrix_8gpus_nv1_nv2_partial_mesh.txt${NC}" + TESTS_RUN=$((TESTS_RUN + 1)) + + nvidia-smi() { + if [[ "$1" == "topo" && "$2" == "-m" ]]; then + cat "$FIXTURES_DIR/nvidia_smi_topo_matrix_8gpus_nv1_nv2_partial_mesh.txt" + elif [[ "$*" == "--query-gpu=index,name,memory.total,pcie.link.gen.current,pcie.link.width.current,uuid --format=csv,noheader,nounits" ]]; then + for i in {0..7}; do + echo "$i, NVIDIA V100-SXM2-32GB, 32768, 3, 16, GPU-${i}${i}${i}${i}${i}${i}${i}${i}-${i}${i}${i}${i}-${i}${i}${i}${i}-${i}${i}${i}${i}-${i}${i}${i}${i}${i}${i}${i}${i}${i}${i}${i}${i}" + done + elif [[ "$*" == "--query-gpu=driver_version --format=csv,noheader" ]]; then + echo "535.129.03" + elif [[ "$1" == "-q" ]]; then + echo "MIG Mode: Disabled" + fi + } + + source "$TOPO_SCRIPT" + local result=$(detect_nvidia_topo) + + # Assertions + local gpu_count=$(echo "$result" | jq -r '.gpu_count') + local nvlink_count=$(echo "$result" | jq -r '[.links[] | select(.link_type | startswith("NV"))] | length') + + if [[ "$gpu_count" == "8" ]] && [[ "$nvlink_count" -gt "0" ]]; then + echo -e "${GRN}✓ PASS: 8 GPUs, $nvlink_count NVLink connections (partial mesh)${NC}" + TESTS_PASSED=$((TESTS_PASSED + 1)) + else + echo -e "${RED}✗ FAIL: Expected 8 GPUs with NVLink, got $gpu_count GPUs, $nvlink_count NVLink connections${NC}" + TESTS_FAILED=$((TESTS_FAILED + 1)) + fi +} + +# Main test runner +echo -e "${MAG}=== NVIDIA Topology Detection Tests ===${NC}\n" + +test_1gpu_pcie +test_4gpus_soc +test_4gpus_sys_separated_nv_pairs +test_5gpus_nv12_with_mlx5 +test_8gpus_nv12_full_mesh +test_8gpus_nv12_full_mesh_with_numa +test_8gpus_nv1_nv2_partial_mesh + +echo -e "\n${MAG}=== Test Summary ===${NC}" +echo -e "Tests run: $TESTS_RUN" +echo -e "${GRN}Tests passed: $TESTS_PASSED${NC}" +if [[ $TESTS_FAILED -gt 0 ]]; then + echo -e "${RED}Tests failed: $TESTS_FAILED${NC}" + exit 1 +else + echo -e "${GRN}All tests passed!${NC}" + exit 0 +fi