Light-Heart-Labs · Lightheartdevs · Mar 21, 2026 · Mar 21, 2026 · Mar 21, 2026
diff --git a/dream-server/extensions/services/dashboard-api/helpers.py b/dream-server/extensions/services/dashboard-api/helpers.py
@@ -23,7 +23,7 @@
 # poll cycle and prevents file-descriptor exhaustion.
 
 _aio_session: Optional[aiohttp.ClientSession] = None
-_HEALTH_TIMEOUT = aiohttp.ClientTimeout(total=5)
+_HEALTH_TIMEOUT = aiohttp.ClientTimeout(total=30)
 
 
 async def _get_aio_session() -> aiohttp.ClientSession:
@@ -167,6 +167,25 @@ async def get_llama_context_size(model_hint: Optional[str] = None) -> Optional[i
         return None
 
 
+# --- Service Health Cache ---
+# Written by background poll loop in main.py, read by API endpoints.
+# Keeps health checking decoupled from request handling so slow DNS
+# lookups (Docker Desktop) never block API responses.
+
+_services_cache: Optional[list] = None  # list[ServiceStatus], set by poll loop
+
+
+def set_services_cache(statuses: list) -> None:
+    """Store latest health check results (called by background poll)."""
+    global _services_cache
+    _services_cache = statuses
+
+
+def get_cached_services() -> Optional[list]:
+    """Read cached health check results. Returns None if no poll has completed yet."""
+    return _services_cache
+
+
 # --- Service Health ---
 
 async def check_service_health(service_id: str, config: dict) -> ServiceStatus:
@@ -218,6 +237,8 @@ async def _check_host_service_health(service_id: str, config: dict) -> ServiceSt
         async with session.get(url) as resp:
             response_time = (asyncio.get_event_loop().time() - start) * 1000
             status = "healthy" if resp.status < 400 else "unhealthy"
+    except asyncio.TimeoutError:
+        status = "down"
     except aiohttp.ClientConnectorError:
         status = "down"
     except (aiohttp.ClientError, OSError) as e:

diff --git a/dream-server/extensions/services/dashboard-api/main.py b/dream-server/extensions/services/dashboard-api/main.py
@@ -36,7 +36,7 @@
 from security import verify_api_key
 from gpu import get_gpu_info
 from helpers import (
-    get_all_services,
+    get_all_services, get_cached_services, set_services_cache,
     get_disk_usage, get_model_info, get_bootstrap_status,
     get_uptime, get_cpu_metrics, get_ram_metrics,
     get_llama_metrics, get_loaded_model, get_llama_context_size,
@@ -74,6 +74,7 @@ def set(self, key: str, value: object, ttl: float):
 _GPU_CACHE_TTL = 3.0
 _STATUS_CACHE_TTL = 2.0
 _STORAGE_CACHE_TTL = 30.0
+_SERVICE_POLL_INTERVAL = 10.0  # background health check interval
 
 # --- Router imports ---
 from routers import workflows, features, setup, updates, agents, privacy
@@ -243,7 +244,10 @@ async def gpu(api_key: str = Depends(verify_api_key)):
 
 @app.get("/services", response_model=list[ServiceStatus])
 async def services(api_key: str = Depends(verify_api_key)):
-    """Get all service health statuses."""
+    """Get all service health statuses (from background poll cache)."""
+    cached = get_cached_services()
+    if cached is not None:
+        return cached
     return await get_all_services()
 
 
@@ -266,7 +270,7 @@ async def bootstrap(api_key: str = Depends(verify_api_key)):
 async def status(api_key: str = Depends(verify_api_key)):
     """Get full system status. Runs sync helpers in thread pool concurrently."""
     service_statuses, gpu_info, disk_info, model_info, bootstrap_info, uptime = await asyncio.gather(
-        get_all_services(),
+        _get_services(),
         asyncio.to_thread(get_gpu_info),
         asyncio.to_thread(get_disk_usage),
         asyncio.to_thread(get_model_info),
@@ -324,7 +328,7 @@ async def _build_api_status() -> dict:
         asyncio.to_thread(get_uptime),
         asyncio.to_thread(get_cpu_metrics),
         asyncio.to_thread(get_ram_metrics),
-        get_all_services(),
+        _get_services(),
         get_loaded_model(),
     )
 
@@ -483,12 +487,40 @@ def dir_size_gb(path: Path) -> float:
     return result
 
 
+# --- Service Health Polling ---
+
+async def _get_services() -> list[ServiceStatus]:
+    """Return cached service health, falling back to live check."""
+    cached = get_cached_services()
+    if cached is not None:
+        return cached
+    return await get_all_services()
+
+
+async def _poll_service_health():
+    """Background task: poll all service health on a timer.
+
+    Results stored via set_services_cache(). API endpoints read
+    cached results instead of running live checks. The poll can
+    take as long as it needs — nobody waits for it.
+    """
+    await asyncio.sleep(2)  # let services start
+    while True:
+        try:
+            statuses = await get_all_services()
+            set_services_cache(statuses)
+        except Exception:
+            logger.exception("Service health poll failed")
+        await asyncio.sleep(_SERVICE_POLL_INTERVAL)
+
+
 # --- Startup ---
 
 @app.on_event("startup")
 async def startup_event():
-    """Start background metrics collection."""
+    """Start background tasks."""
     asyncio.create_task(collect_metrics())
+    asyncio.create_task(_poll_service_health())
 
 
 if __name__ == "__main__":

diff --git a/dream-server/extensions/services/dashboard-api/routers/features.py b/dream-server/extensions/services/dashboard-api/routers/features.py
@@ -99,11 +99,11 @@ def calculate_feature_status(feature: dict, services: list, gpu_info: Optional[G
 async def api_features(api_key: str = Depends(verify_api_key)):
     """Get feature discovery data."""
     import asyncio
-    from helpers import get_all_services
-    gpu_info, service_list = await asyncio.gather(
-        asyncio.to_thread(get_gpu_info),
-        get_all_services(),
-    )
+    from helpers import get_all_services, get_cached_services
+    service_list = get_cached_services()
+    if service_list is None:
+        service_list = await get_all_services()
+    gpu_info = await asyncio.to_thread(get_gpu_info)
 
     feature_statuses = [calculate_feature_status(f, service_list, gpu_info) for f in FEATURES]
     feature_statuses.sort(key=lambda x: x["priority"])