GradientHQ · gufengc · May 22, 2026 · May 22, 2026 · May 22, 2026
diff --git a/pyproject.toml b/pyproject.toml
@@ -43,23 +43,23 @@ parallax = "parallax.cli:main"
 [project.optional-dependencies]
 
 mac = [
-  "nanobind==2.10.2",
+  "nanobind==2.12.0",
   "torch==2.8.0",
-  "mlx-lm==0.30.6",
-  "mlx==0.30.4",
+  "mlx-lm==0.31.3",
+  "mlx==0.31.2",
 ]
 
 gpu = [
   "sglang[all]==0.5.12",
   "accelerate",
-  "mlx-lm==0.28.4",
-  "mlx[cpu]==0.30.0",
+  "mlx-lm==0.31.3",
+  "mlx[cpu]==0.31.2",
 ]
 
 vllm = [
   "vllm==0.14.0",
-  "mlx-lm==0.28.4",
-  "mlx[cpu]==0.30.0",
+  "mlx-lm==0.31.3",
+  "mlx[cpu]==0.31.2",
 ]
 
 benchmark = [

diff --git a/src/parallax/utils/utils.py b/src/parallax/utils/utils.py
@@ -27,10 +27,7 @@ def is_mps_available():
 def is_metal_available():
     """Check if MLX Metal backend is available"""
     try:
-        import mlx.core as mx
-
-        mx.metal.device_info()
-        return True
+        return mx.metal.is_available()
     except (RuntimeError, AttributeError, ImportError):
         return False
 
@@ -43,7 +40,7 @@ def get_current_device():
     device = "cpu"
     if is_cuda_available():
         device = "cuda"
-    if is_mps_available():
+    if is_metal_available():
         device = "mlx"
     return device
 

diff --git a/src/parallax_extensions/kernels/paged_attention.cpp b/src/parallax_extensions/kernels/paged_attention.cpp
@@ -120,7 +120,7 @@ void PagedAttentionV1::eval_gpu(
     auto kernel = d.get_kernel(kname, lib, hash_name, func_consts);
 
     // Prepare to encode kernel
-    auto& compute_encoder = d.get_command_encoder(s.index);
+    auto& compute_encoder = mx::metal::get_command_encoder(s);
     compute_encoder.set_compute_pipeline_state(kernel);
 
     // Shared Memory

diff --git a/src/parallax_extensions/kernels/reshape_and_cache.cpp b/src/parallax_extensions/kernels/reshape_and_cache.cpp
@@ -15,8 +15,8 @@ namespace parallax_ext {
 mx::array reshape_and_cache(
     const mx::array& key,          // [num_tokens, num_heads, head_size]
     const mx::array& value,        // [num_tokens, num_heads, head_size]
-    mx::array& key_cache,          // [num_blocks, num_heads, head_size/x, block_size, x]
-    mx::array& value_cache,        // [num_blocks, num_heads, head_size/x, block_size]
+    const mx::array& key_cache,    // [num_blocks, num_heads, head_size/x, block_size, x]
+    const mx::array& value_cache,  // [num_blocks, num_heads, head_size/x, block_size]
     const mx::array& slot_mapping, // [num_tokens]
     mx::StreamOrDevice s /* = {} */ // Stream on which to schedule the operation
 ) {
@@ -88,7 +88,7 @@ void ReshapeAndCache::eval_gpu(
     auto kernel = d.get_kernel(kname, lib, hash_name, func_consts);
 
     // Prepare to encode kernel
-    auto& compute_encoder = d.get_command_encoder(s.index);
+    auto& compute_encoder = mx::metal::get_command_encoder(s);
     compute_encoder.set_compute_pipeline_state(kernel);
 
     // Calculate parameters

diff --git a/src/parallax_extensions/kernels/reshape_and_cache.h b/src/parallax_extensions/kernels/reshape_and_cache.h
@@ -8,8 +8,8 @@ namespace parallax_ext {
 mx::array reshape_and_cache(
     const mx::array& key,           // [num_tokens, num_heads, head_size]
     const mx::array& value,         // [num_tokens, num_heads, head_size]
-    mx::array& key_cache,           // [num_blocks, num_heads, head_size/x, block_size, x]
-    mx::array& value_cache,         // [num_blocks, num_heads, head_size/x, block_size]
+    const mx::array& key_cache,     // [num_blocks, num_heads, head_size/x, block_size, x]
+    const mx::array& value_cache,   // [num_blocks, num_heads, head_size/x, block_size]
     const mx::array& slot_mapping,  // [num_tokens]
     mx::StreamOrDevice s /* = {} */ // Stream on which to schedule the operation
 );

diff --git a/src/parallax_extensions/lib/_ext.cpython-311-darwin.so b/src/parallax_extensions/lib/_ext.cpython-311-darwin.so
diff --git a/src/parallax_extensions/lib/_ext.cpython-312-darwin.so b/src/parallax_extensions/lib/_ext.cpython-312-darwin.so
diff --git a/src/parallax_extensions/lib/_ext.cpython-313-darwin.so b/src/parallax_extensions/lib/_ext.cpython-313-darwin.so
diff --git a/src/parallax_extensions/lib/libparallax_ext.dylib b/src/parallax_extensions/lib/libparallax_ext.dylib
diff --git a/src/parallax_extensions/lib/parallax_ext.metallib b/src/parallax_extensions/lib/parallax_ext.metallib
diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -0,0 +1,33 @@
+from types import SimpleNamespace
+
+from parallax.utils import utils
+
+
+def test_is_metal_available_uses_mlx_metal_is_available(monkeypatch):
+    fake_mx = SimpleNamespace(metal=SimpleNamespace(is_available=lambda: True))
+
+    monkeypatch.setattr(utils, "mx", fake_mx)
+
+    assert utils.is_metal_available() is True
+
+
+def test_is_metal_available_returns_false_when_metal_api_missing(monkeypatch):
+    fake_mx = SimpleNamespace()
+
+    monkeypatch.setattr(utils, "mx", fake_mx)
+
+    assert utils.is_metal_available() is False
+
+
+def test_get_current_device_prefers_mlx_when_metal_available(monkeypatch):
+    monkeypatch.setattr(utils, "is_cuda_available", lambda: False)
+    monkeypatch.setattr(utils, "is_metal_available", lambda: True)
+
+    assert utils.get_current_device() == "mlx"
+
+
+def test_get_current_device_prefers_mlx_when_both_backends_report_available(monkeypatch):
+    monkeypatch.setattr(utils, "is_cuda_available", lambda: True)
+    monkeypatch.setattr(utils, "is_metal_available", lambda: True)
+
+    assert utils.get_current_device() == "mlx"