StarTrail-org · raoabinav · Jun 4, 2026 · Jun 3, 2026 · Jun 4, 2026
diff --git a/packages/leann-core/src/leann/chat.py b/packages/leann-core/src/leann/chat.py
@@ -8,6 +8,7 @@
 import logging
 import os
 from abc import ABC, abstractmethod
+from collections.abc import Callable
 from typing import Any, Optional, cast
 
 from .settings import (
@@ -27,6 +28,36 @@
 logger = logging.getLogger(__name__)
 
 
+def _run_with_optional_posix_alarm(
+    operation: Callable[[], Any], timeout_seconds: int, timeout_message: str
+) -> Any:
+    """Run an operation with a POSIX alarm where the platform supports it."""
+    try:
+        import signal
+    except ImportError:
+        return operation()
+
+    if not hasattr(signal, "SIGALRM") or not hasattr(signal, "alarm"):
+        return operation()
+
+    def timeout_handler(signum, frame):
+        raise TimeoutError(timeout_message)
+
+    try:
+        old_handler = signal.signal(signal.SIGALRM, timeout_handler)
+    except ValueError:
+        # signal.signal only works from the main thread. Loading without an
+        # alarm is preferable to failing before model loading starts.
+        return operation()
+
+    signal.alarm(timeout_seconds)
+    try:
+        return operation()
+    finally:
+        signal.alarm(0)
+        signal.signal(signal.SIGALRM, old_handler)
+
+
 def check_ollama_models(host: str) -> list[str]:
     """Check available Ollama models and return a list"""
     try:
@@ -606,20 +637,12 @@ def __init__(
             self.device = "cpu"
             logger.info("No GPU detected. Using CPU.")
 
-        # Load tokenizer and model with timeout protection
+        # Load tokenizer and model with timeout protection when POSIX alarms are available.
         try:
-            import signal
-
-            def timeout_handler(signum, frame):
-                raise TimeoutError("Model download/loading timed out")
-
-            # Set timeout for model loading (60 seconds)
-            old_handler = signal.signal(signal.SIGALRM, timeout_handler)
-            signal.alarm(60)
 
-            try:
+            def load_model_assets():
                 logger.info(f"Loading tokenizer for {model_name}...")
-                self.tokenizer = AutoTokenizer.from_pretrained(
+                tokenizer = AutoTokenizer.from_pretrained(
                     model_name, trust_remote_code=self.trust_remote_code
                 )
 
@@ -634,16 +657,20 @@ def timeout_handler(signum, frame):
                     # Auto mode: let HuggingFace distribute across available GPUs
                     device_map = "auto"
 
-                self.model = AutoModelForCausalLM.from_pretrained(
+                model = AutoModelForCausalLM.from_pretrained(
                     model_name,
                     torch_dtype=torch.float16 if self.device != "cpu" else torch.float32,
                     device_map=device_map,
                     trust_remote_code=self.trust_remote_code,
                 )
                 logger.info(f"Successfully loaded {model_name}")
-            finally:
-                signal.alarm(0)  # Cancel the alarm
-                signal.signal(signal.SIGALRM, old_handler)  # Restore old handler
+                return tokenizer, model
+
+            self.tokenizer, self.model = _run_with_optional_posix_alarm(
+                load_model_assets,
+                timeout_seconds=60,
+                timeout_message="Model download/loading timed out",
+            )
 
         except TimeoutError:
             logger.error(f"Model loading timed out for {model_name}")

diff --git a/packages/leann-core/src/leann/embedding_compute.py b/packages/leann-core/src/leann/embedding_compute.py
@@ -278,7 +278,7 @@ def _query_lmstudio_context_limit(model_name: str, base_url: str) -> Optional[in
                 # Append to existing NODE_PATH if present
                 existing_node_path = env.get("NODE_PATH", "")
                 env["NODE_PATH"] = (
-                    f"{global_modules}:{existing_node_path}"
+                    os.pathsep.join([global_modules, existing_node_path])
                     if existing_node_path
                     else global_modules
                 )
@@ -495,7 +495,8 @@ def compute_embeddings_sentence_transformers(
             # TODO: Haven't tested this yet
             torch.set_num_threads(min(8, os.cpu_count() or 4))
             try:
-                torch.backends.mkldnn.enabled = True
+                mkldnn_backend = cast(Any, torch.backends.mkldnn)
+                mkldnn_backend.enabled = True
             except AttributeError:
                 pass
 

diff --git a/tests/test_windows_native_polish.py b/tests/test_windows_native_polish.py
@@ -0,0 +1,54 @@
+import sys
+from types import SimpleNamespace
+from unittest.mock import Mock
+
+from leann import embedding_compute
+from leann.chat import _run_with_optional_posix_alarm
+from leann.embedding_compute import _query_lmstudio_context_limit
+
+
+def test_hf_loader_timeout_helper_runs_without_sigalrm(monkeypatch):
+    monkeypatch.setitem(sys.modules, "signal", SimpleNamespace())
+
+    assert _run_with_optional_posix_alarm(lambda: "loaded", 60, "timeout") == "loaded"
+
+
+def test_hf_loader_timeout_helper_runs_when_signal_registration_is_unavailable(monkeypatch):
+    fake_signal = SimpleNamespace(
+        SIGALRM=14,
+        alarm=Mock(side_effect=AssertionError("alarm should not be called")),
+        signal=Mock(side_effect=ValueError("signal only works in main thread")),
+    )
+    monkeypatch.setitem(sys.modules, "signal", fake_signal)
+
+    assert _run_with_optional_posix_alarm(lambda: "loaded", 60, "timeout") == "loaded"
+
+
+def test_lmstudio_node_path_uses_platform_separator(monkeypatch):
+    captured_node_path = None
+
+    def mock_run(cmd, **kwargs):
+        nonlocal captured_node_path
+        if cmd == ["npm", "root", "-g"]:
+            result = Mock()
+            result.returncode = 0
+            result.stdout = r"C:\npm\node_modules" + "\n"
+            result.stderr = ""
+            return result
+
+        assert cmd[0] == "node"
+        captured_node_path = kwargs["env"]["NODE_PATH"]
+        result = Mock()
+        result.returncode = 0
+        result.stdout = '{"contextLength": 8192, "identifier": "custom-model"}'
+        result.stderr = ""
+        return result
+
+    monkeypatch.setattr(embedding_compute.subprocess, "run", mock_run)
+    monkeypatch.setattr(embedding_compute.os, "pathsep", ";")
+    monkeypatch.setenv("NODE_PATH", r"C:\existing;D:\more")
+
+    limit = _query_lmstudio_context_limit(model_name="custom-model", base_url="ws://localhost:1234")
+
+    assert limit == 8192
+    assert captured_node_path == r"C:\npm\node_modules;C:\existing;D:\more"
diff --git a/uv.lock b/uv.lock