pymc-devs · williambdean · Apr 11, 2025 · Apr 11, 2025 · Apr 11, 2025 · Apr 11, 2025
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -82,6 +82,7 @@ jobs:
         install-numba: [0]
         install-jax: [0]
         install-torch: [0]
+        install-mlx: [0]
         install-xarray: [0]
         part:
           - "tests --ignore=tests/tensor --ignore=tests/scan --ignore=tests/sparse --ignore=tests/xtensor"
@@ -116,6 +117,7 @@ jobs:
             install-numba: 0
             install-jax: 0
             install-torch: 0
+            install-mlx: 0
             install-xarray: 0
           - install-numba: 1
             os: "ubuntu-latest"
@@ -152,6 +154,13 @@ jobs:
             fast-compile: 0
             float32: 0
             part: "tests/link/pytorch"
+          - install-mlx: 1
+            os: "ubuntu-latest"
+            python-version: "3.10"
+            numpy-version: ">=2.0"
+            fast-compile: 0
+            float32: 0
+            part: "tests/link/mlx"
           - install-xarray: 1
             os: "ubuntu-latest"
             python-version: "3.13"
@@ -205,6 +214,7 @@ jobs:
           if [[ $INSTALL_NUMBA == "1" ]]; then micromamba install --yes -q -c conda-forge "python~=${PYTHON_VERSION}" "numba>=0.57"; fi
           if [[ $INSTALL_JAX == "1" ]]; then micromamba install --yes -q -c conda-forge "python~=${PYTHON_VERSION}" jax jaxlib numpyro && pip install tensorflow-probability; fi
           if [[ $INSTALL_TORCH == "1" ]]; then micromamba install --yes -q -c conda-forge "python~=${PYTHON_VERSION}" pytorch pytorch-cuda=12.1 "mkl<=2024.0" -c pytorch -c nvidia; fi
+          if [[ $INSTALL_MLX == "1" ]]; then micromamba install --yes -q -c conda-forge "python~=${PYTHON_VERSION}" mlx; fi
           if [[ $INSTALL_XARRAY == "1" ]]; then micromamba install --yes -q -c conda-forge "python~=${PYTHON_VERSION}" xarray xarray-einstats; fi
           pip install pytest-sphinx
 
@@ -222,6 +232,7 @@ jobs:
           INSTALL_NUMBA: ${{ matrix.install-numba }}
           INSTALL_JAX: ${{ matrix.install-jax }}
           INSTALL_TORCH: ${{ matrix.install-torch}}
+          INSTALL_MLX: ${{ matrix.install-mlx }}
           INSTALL_XARRAY: ${{ matrix.install-xarray }}
           OS: ${{ matrix.os}}
 

diff --git a/.gitignore b/.gitignore
@@ -27,7 +27,6 @@ __pycache__
 \#*\#
 build
 compiled/*.cpp
-core.*
 cutils_ext.cpp
 dist
 doc/.build/

diff --git a/doc/_drafts/benchmark_mlx_v_jax_corrected.ipynb b/doc/_drafts/benchmark_mlx_v_jax_corrected.ipynb
diff --git a/pytensor/compile/mode.py b/pytensor/compile/mode.py
@@ -27,6 +27,7 @@
 from pytensor.link.basic import Linker, PerformLinker
 from pytensor.link.c.basic import CLinker, OpWiseCLinker
 from pytensor.link.jax.linker import JAXLinker
+from pytensor.link.mlx.linker import MLXLinker
 from pytensor.link.numba.linker import NumbaLinker
 from pytensor.link.pytorch.linker import PytorchLinker
 from pytensor.link.vm import VMLinker
@@ -50,6 +51,7 @@
     "jax": JAXLinker(),
     "pytorch": PytorchLinker(),
     "numba": NumbaLinker(),
+    "mlx": MLXLinker(),
 }
 
 
@@ -504,13 +506,28 @@ def clone(self, link_kwargs=None, optimizer="", **kwargs):
     ),
 )
 
+MLX = Mode(
+    MLXLinker(),
+    RewriteDatabaseQuery(
+        include=["fast_run"],
+        exclude=[
+            "cxx_only",
+            "BlasOpt",
+            "fusion",
+            "inplace",
+            "scan_save_mem_prealloc",
+        ],
+    ),
+)
+
 
 predefined_modes = {
     "FAST_COMPILE": FAST_COMPILE,
     "FAST_RUN": FAST_RUN,
     "JAX": JAX,
     "NUMBA": NUMBA,
     "PYTORCH": PYTORCH,
+    "MLX": MLX,
 }
 
 _CACHED_RUNTIME_MODES: dict[str, Mode] = {}

diff --git a/pytensor/link/mlx/__init__.py b/pytensor/link/mlx/__init__.py
@@ -0,0 +1 @@
+from pytensor.link.mlx.linker import MLXLinker
diff --git a/pytensor/link/mlx/dispatch/__init__.py b/pytensor/link/mlx/dispatch/__init__.py
@@ -0,0 +1,13 @@
+# isort: off
+from pytensor.link.mlx.dispatch.basic import mlx_funcify, mlx_typify
+
+import pytensor.link.mlx.dispatch.math
+import pytensor.link.mlx.dispatch.basic
+import pytensor.link.mlx.dispatch.elemwise
+import pytensor.link.mlx.dispatch.shape
+import pytensor.link.mlx.dispatch.subtensor
+import pytensor.link.mlx.dispatch.core
+import pytensor.link.mlx.dispatch.signal
+import pytensor.link.mlx.dispatch.signal.conv
+import pytensor.link.mlx.dispatch.blockwise
+# isort: on
diff --git a/pytensor/link/mlx/dispatch/basic.py b/pytensor/link/mlx/dispatch/basic.py
@@ -0,0 +1,84 @@
+import warnings
+from copy import deepcopy
+from functools import singledispatch
+from types import NoneType
+
+import mlx.core as mx
+import numpy as np
+
+from pytensor.compile.ops import DeepCopyOp
+from pytensor.graph.fg import FunctionGraph
+from pytensor.link.utils import fgraph_to_python
+from pytensor.raise_op import Assert, CheckAndRaise
+
+
+@singledispatch
+def mlx_typify(data, **kwargs):
+    raise NotImplementedError(f"mlx_typify is not implemented for {type(data)}")
+
+
+@mlx_typify.register(np.ndarray)
+def mlx_typify_tensor(data, dtype=None, **kwargs):
+    return mx.array(data, dtype=dtype)
+
+
+@mlx_typify.register(slice)
+@mlx_typify.register(NoneType)
+@mlx_typify.register(np.number)
+@mlx_typify.register(mx.array)
+def mlx_typify_no_conversion_needed(data, **kwargs):
+    return data
+
+
+@mlx_typify.register(int)
+@mlx_typify.register(float)
+def mlx_typify_python_scalar(data, **kwargs):
+    return mx.array(data)
+
+
+@singledispatch
+def mlx_funcify(op, node=None, storage_map=None, **kwargs):
+    """Create a MLX compatible function from an PyTensor `Op`."""
+    raise NotImplementedError(
+        f"No MLX conversion for the given `Op`: {op}.\nCheck out `https://github.com/pymc-devs/pytensor/issues/1350` for progress or to request we prioritize this operation"
+    )
+
+
+@mlx_funcify.register(FunctionGraph)
+def mlx_funcify_FunctionGraph(
+    fgraph,
+    node=None,
+    fgraph_name="mlx_funcified_fgraph",
+    conversion_func=mlx_funcify,
+    **kwargs,
+):
+    built_kwargs = {"conversion_func": conversion_func, **kwargs}
+    return fgraph_to_python(
+        fgraph,
+        conversion_func,
+        type_conversion_fn=mlx_typify,
+        fgraph_name=fgraph_name,
+        **built_kwargs,
+    )
+
+
+@mlx_funcify.register(DeepCopyOp)
+def mlx_funcify_DeepCopyOp(op, **kwargs):
+    def deepcopyop(x):
+        return deepcopy(x)
+
+    return deepcopyop
+
+
+@mlx_funcify.register(Assert)
+@mlx_funcify.register(CheckAndRaise)
+def mlx_funcify_CheckAndRaise(op, **kwargs):
+    warnings.warn(
+        f"""Skipping `CheckAndRaise` Op (assertion: {op.msg}) as MLX tracing would remove it.""",
+        stacklevel=2,
+    )
+
+    def assert_fn(x, *inputs):
+        return x
+
+    return assert_fn
diff --git a/pytensor/link/mlx/dispatch/blockwise.py b/pytensor/link/mlx/dispatch/blockwise.py
@@ -0,0 +1,107 @@
+import mlx.core as mx
+
+from pytensor.link.mlx.dispatch import mlx_funcify
+from pytensor.tensor.blockwise import Blockwise
+from pytensor.tensor.signal.conv import Convolve1d as Conv1d
+
+
+def blockwise_conv1d(op, node, **kwargs):
+    """
+    Custom implementation of Blockwise.conv1d for MLX.
+    """
+
+    def batched_conv1d(
+        x: mx.array,
+        kernels: mx.array,
+        mode: str = op.core_op.mode,
+        stride: int = 1,
+        dilation: int = 1,
+    ) -> mx.array:
+        """
+        Apply B separate 1D convolutions (full or valid) to B sequences in parallel.
+
+        Parameters
+        ----------
+        x        : array of shape (B, T)
+                B sequences of length T.
+        kernels  : array of shape (B, K)
+                B kernels of length K.
+        mode     : {"valid", "full"}
+                "valid" → no padding, output length = T - K + 1
+                "full"  → zero-pad so output length = T + K - 1
+        stride   : int, convolution stride (default=1)
+        dilation : int, convolution dilation (default=1)
+
+        Returns
+        -------
+        out      : array of shape (B, L)
+                where L =
+                    - T - K + 1   if mode="valid"
+                    - T + K - 1   if mode="full"
+        """
+        # --- 1) shape checks ---
+        B, T = x.shape
+        Bk, K = kernels.shape
+        if B != Bk:
+            raise ValueError(f"Batch mismatch: x has {B}, kernels has {Bk}")
+
+        # --- 2) flip kernels for convolution ---
+        kernels_flipped = kernels[:, ::-1]  # shape (B, K)
+
+        # --- 3) decide padding ---
+        if mode == "valid":
+            pad = 0
+        elif mode == "full":
+            pad = (K - 1) * dilation
+        else:
+            raise ValueError(f"Unsupported mode {mode!r}: choose 'valid' or 'full'")
+
+        # --- 4) reshape into MLX conv1d form ---
+        #   input: (N=1, H=T, C_in=B)
+        x_in = x.T[None, :, :]
+
+        #   weight: (C_out=B, H_f=K, C_in=1)
+        w = kernels_flipped[:, :, None]
+
+        # --- 5) run grouped conv1d ---
+        y = mx.conv1d(x_in, w, stride=stride, padding=pad, dilation=dilation, groups=B)
+        # y shape: (1, H_out, B)
+
+        # --- 6) return shape (B, H_out) ---
+        return y[0].T
+
+    return batched_conv1d
+
+
+@mlx_funcify.register(Blockwise)
+def funcify_Blockwise(op: Blockwise, node, **kwargs):
+    # 1) If it's a Conv1d Blockwise, use the custom implementation
+    if isinstance(op.core_op, Conv1d):
+        return blockwise_conv1d(op, node, **kwargs)
+
+    # 2) Otherwise, get the core python function for this Blockwise
+    core_node = op._create_dummy_core_node(node.inputs)
+    core_f = mlx_funcify(op.core_op, core_node)
+
+    # 3) Determine how many inputs correspond to batch dimensions
+    n_batch = op.batch_ndim(node)
+
+    # 4) Build in_axes: map only the first n_batch args, keep the rest static
+    in_axes = tuple(0 if i < n_batch else None for i in range(len(node.inputs)))
+
+    # 5) Handle case where no vectorization is needed
+    if n_batch == 0 or all(axis is None for axis in in_axes):
+        # No batch dimensions, just return the core function
+        def blockwise_fun(*inputs):
+            return core_f(*inputs)
+
+        return blockwise_fun
+
+    # 6) Vectorize (vmap) with in_axes
+    blockwise_f = mx.vmap(core_f, in_axes=in_axes)
+
+    # 7) Return the mapped function
+    def blockwise_fun(*inputs):
+        return blockwise_f(*inputs)
+
+    return blockwise_fun
-Original file line number
+Diff line change
@@ Expand Up / @@ -27,7 +27,6 @@ __pycache__ @@
     \#*\#
     build
     compiled/*.cpp
-    core.*
     cutils_ext.cpp
     dist
     doc/.build/
@@ Expand Down @@