diff --git a/dream-server/extensions/services/dashboard-api/routers/updates.py b/dream-server/extensions/services/dashboard-api/routers/updates.py index de7d7559..ff140375 100644 --- a/dream-server/extensions/services/dashboard-api/routers/updates.py +++ b/dream-server/extensions/services/dashboard-api/routers/updates.py @@ -18,6 +18,8 @@ router = APIRouter(tags=["updates"]) +_VALID_ACTIONS = {"check", "backup", "update"} + _GITHUB_HEADERS = {"Accept": "application/vnd.github.v3+json"} diff --git a/dream-server/scripts/assign_gpus.py b/dream-server/scripts/assign_gpus.py index 4f63cc21..c504180b 100644 --- a/dream-server/scripts/assign_gpus.py +++ b/dream-server/scripts/assign_gpus.py @@ -19,14 +19,14 @@ from typing import Optional -# Constants +# Constants HIGH_BW_THRESHOLD = 80 # min rank for NVLink / XGMI DEFAULT_SERVICES = ["llama_server", "whisper", "comfyui", "embeddings"] NON_LLAMA = ["whisper", "comfyui", "embeddings"] -# Data Models +# Data Models @dataclass class GPU: @@ -69,7 +69,7 @@ class AssignmentResult: services: dict -# Phase 1: Topology Analysis +# Phase 1: Topology Analysis def parse_gpus(topology: dict) -> list: gpus = [] @@ -156,7 +156,7 @@ def enumerate_subsets(gpus: list, rank_matrix: dict) -> list: ) -# Phase 2: GPU Assignment +# Phase 2: GPU Assignment def find_llama_subset(ordered_subsets: list, model_size_mb: float) -> Subset: """ @@ -257,7 +257,7 @@ def assign_services(all_gpus: list, llama_gpus: list, rank_matrix: dict, enabled return assignments, final_llama_gpus, strategy -# Phase 3: Llama Parallelism +# Phase 3: Llama Parallelism def largest_pow2_divisor(n: int) -> int: """ @@ -369,7 +369,7 @@ def select_parallelism(subset: Subset) -> LlamaParallelism: ) -# Phase 4: Build Output JSON +# Phase 4: Build Output JSON def build_output(result: AssignmentResult) -> dict: services = {} @@ -400,7 +400,7 @@ def build_output(result: AssignmentResult) -> dict: } -# Entry Point +# Entry Point def main(): parser = argparse.ArgumentParser(description="GPU assignment algorithm for DreamServer") @@ -429,7 +429,7 @@ def main(): print("ERROR: no GPUs found in topology", file=sys.stderr) sys.exit(1) - # Early exit: single GPU + # Early exit: single GPU if gpu_count == 1: gpu = parse_gpus(topology)[0] if model_size_mb > gpu.memory_mb: @@ -453,13 +453,13 @@ def main(): print(json.dumps(build_output(result), indent=2)) return - # Phase 1: Topology analysis + # Phase 1: Topology analysis gpus = parse_gpus(topology) links = parse_links(topology) rank_matrix = build_rank_matrix(links) ordered = enumerate_subsets(gpus, rank_matrix) - # Phase 2: GPU assignment + # Phase 2: GPU assignment try: llama_subset = find_llama_subset(ordered, model_size_mb) if llama_subset is None: @@ -472,12 +472,12 @@ def main(): gpus, llama_subset.gpus, rank_matrix, enabled_services ) - # Phase 3: Llama parallelism + # Phase 3: Llama parallelism final_subset = compute_subset(final_llama_gpus, rank_matrix) parallelism = select_parallelism(final_subset) service_assignments["llama_server"].parallelism = parallelism - # Phase 4: Emit JSON + # Phase 4: Emit JSON result = AssignmentResult(strategy=strategy, services=service_assignments) print(json.dumps(build_output(result), indent=2)) diff --git a/dream-server/tests/bats-tests/tier-map.bats b/dream-server/tests/bats-tests/tier-map.bats index 48342f03..90c566d2 100644 --- a/dream-server/tests/bats-tests/tier-map.bats +++ b/dream-server/tests/bats-tests/tier-map.bats @@ -20,29 +20,29 @@ setup() { # ── resolve_tier_config ───────────────────────────────────────────────────── -@test "resolve_tier_config: tier 1 sets Entry Level with qwen3-8b" { +@test "resolve_tier_config: tier 1 sets Entry Level with qwen3.5-9b" { TIER=1 resolve_tier_config assert_equal "$TIER_NAME" "Entry Level" - assert_equal "$LLM_MODEL" "qwen3-8b" - assert_equal "$GGUF_FILE" "Qwen3-8B-Q4_K_M.gguf" + assert_equal "$LLM_MODEL" "qwen3.5-9b" + assert_equal "$GGUF_FILE" "Qwen3.5-9B-Q4_K_M.gguf" assert_equal "$MAX_CONTEXT" "16384" } -@test "resolve_tier_config: tier 2 sets Prosumer with qwen3-8b" { +@test "resolve_tier_config: tier 2 sets Prosumer with qwen3.5-9b" { TIER=2 resolve_tier_config assert_equal "$TIER_NAME" "Prosumer" - assert_equal "$LLM_MODEL" "qwen3-8b" + assert_equal "$LLM_MODEL" "qwen3.5-9b" assert_equal "$MAX_CONTEXT" "32768" } -@test "resolve_tier_config: tier 3 sets Pro with qwen3-14b" { +@test "resolve_tier_config: tier 3 sets Pro with qwen3.5-27b" { TIER=3 resolve_tier_config assert_equal "$TIER_NAME" "Pro" - assert_equal "$LLM_MODEL" "qwen3-14b" - assert_equal "$GGUF_FILE" "Qwen3-14B-Q4_K_M.gguf" + assert_equal "$LLM_MODEL" "qwen3.5-27b" + assert_equal "$GGUF_FILE" "Qwen3.5-27B-Q4_K_M.gguf" assert_equal "$MAX_CONTEXT" "32768" } @@ -101,13 +101,13 @@ setup() { @test "tier_to_model: maps all numeric tiers correctly" { run tier_to_model 1 - assert_output "qwen3-8b" + assert_output "qwen3.5-9b" run tier_to_model 2 - assert_output "qwen3-8b" + assert_output "qwen3.5-9b" run tier_to_model 3 - assert_output "qwen3-14b" + assert_output "qwen3.5-27b" run tier_to_model 4 assert_output "qwen3-30b-a3b" @@ -115,13 +115,13 @@ setup() { @test "tier_to_model: maps T-prefix aliases correctly" { run tier_to_model T1 - assert_output "qwen3-8b" + assert_output "qwen3.5-9b" run tier_to_model T2 - assert_output "qwen3-8b" + assert_output "qwen3.5-9b" run tier_to_model T3 - assert_output "qwen3-14b" + assert_output "qwen3.5-27b" run tier_to_model T4 assert_output "qwen3-30b-a3b" diff --git a/dream-server/tests/test-assign-gpus.py b/dream-server/tests/test-assign-gpus.py index 006cfc9d..02f1ccf9 100755 --- a/dream-server/tests/test-assign-gpus.py +++ b/dream-server/tests/test-assign-gpus.py @@ -557,4 +557,4 @@ def test_mem_util_hybrid_is_093(self): def test_mem_util_pipeline_is_095(self): _, out, _ = run(fixture_path("nvidia_smi_topo_matrix_4gpus_soc.json"), 100000) - assert parallelism(out)["gpu_memory_utilization"] == 0.95 \ No newline at end of file + assert parallelism(out)["gpu_memory_utilization"] == 0.95