From 1d3a6152cac4a5798cf1bd1d04198284cddadb09 Mon Sep 17 00:00:00 2001 From: Aman Gupta Date: Sun, 2 Nov 2025 19:49:47 +0800 Subject: [PATCH] CUDA: avoid mul + bias fusion when doing fusion --- ggml/src/ggml-cuda/ggml-cuda.cu | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index 61a8f1df87de1..fb4ee785d1502 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -2115,6 +2115,14 @@ static bool ggml_cuda_should_fuse_mul_mat_vec_f(const ggml_tensor * tensor) { const int cc = ggml_cuda_info().devices[ggml_cuda_get_device()].cc; use_mul_mat_vec_f = use_mul_mat_vec_f && ggml_cuda_should_use_mmvf(src0->type, cc, src0->ne, is_mul_mat_id ? src1->ne[2] : src1->ne[1]); + const bool split = ggml_backend_buft_is_cuda_split(src0->buffer->buft) || + ggml_backend_buft_is_cuda_split(src1->buffer->buft); + + //TODO: add support for fusion for split buffers + if (split) { + return false; + } + //we only support fusion for ncols_dst = 1 if (tensor->op == GGML_OP_MUL_MAT && dst->ne[1] != 1) { return false; @@ -2154,6 +2162,15 @@ static bool ggml_cuda_should_fuse_mul_mat_vec_q(const ggml_tensor * tensor) { return false; } + + const bool split = ggml_backend_buft_is_cuda_split(src0->buffer->buft) || + ggml_backend_buft_is_cuda_split(src1->buffer->buft); + + //TODO: add support for fusion for split buffers + if (split) { + return false; + } + return use_mul_mat_vec_q; }