refactor(mlx): move mlx_cache.py from common to mlx backend

blightbow · claude · blightbow · commit 06f012b366ef · 2025-12-14T00:40:36.000-05:00
The ThreadSafeLRUPromptCache is only used by the mlx backend. After evaluating mlx-vlm, it was determined that the cache cannot be shared because mlx-vlm's generate/stream_generate functions don't support the prompt_cache parameter that mlx_lm provides. - Move mlx_cache.py from backend/python/common/ to backend/python/mlx/ - Remove sys.path manipulation from backend.py and test.py - Fix test assertion to expect "MLX model loaded successfully" 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> Signed-off-by: Blightbow <blightbow@users.noreply.github.com>
diff --git a/backend/python/mlx/backend.py b/backend/python/mlx/backend.py
@@ -19,8 +19,6 @@
 import base64
 import io
 
-# Add common module to path for shared cache
-sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'common'))
 from mlx_cache import ThreadSafeLRUPromptCache
 
 _ONE_DAY_IN_SECONDS = 60 * 60 * 24
diff --git a/backend/python/mlx/mlx_cache.py b/backend/python/mlx/mlx_cache.py
diff --git a/backend/python/mlx/test.py b/backend/python/mlx/test.py
@@ -49,7 +49,7 @@ def test_load_model(self):
                 stub = backend_pb2_grpc.BackendStub(channel)
                 response = stub.LoadModel(backend_pb2.ModelOptions(Model="mlx-community/Llama-3.2-1B-Instruct-4bit"))
                 self.assertTrue(response.success)
-                self.assertEqual(response.message, "Model loaded successfully")
+                self.assertEqual(response.message, "MLX model loaded successfully")
         except Exception as err:
             print(err)
             self.fail("LoadModel service failed")
@@ -245,9 +245,6 @@ class TestThreadSafeLRUPromptCache(unittest.TestCase):
     """
 
     def setUp(self):
-        import sys
-        import os
-        sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'common'))
         from mlx_cache import ThreadSafeLRUPromptCache
         self.cache = ThreadSafeLRUPromptCache(max_size=3)