Skip to content

Commit 944e1fe

Browse files
committed
musa: use fixed warp size (32) in mul_mat_vec_q_cuda
Signed-off-by: Xiaodong Ye <[email protected]>
1 parent fd123cf commit 944e1fe

File tree

1 file changed

+5
-0
lines changed

1 file changed

+5
-0
lines changed

ggml/src/ggml-cuda/mmvq.cu

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,12 @@ static void mul_mat_vec_q_cuda(
219219
GGML_ASSERT(ncols_y <= MMVQ_MAX_BATCH_SIZE);
220220

221221
const int device = ggml_cuda_get_device();
222+
#ifndef GGML_USE_MUSA
222223
const int warp_size = ggml_cuda_info().devices[device].warp_size;
224+
#else // GGML_USE_MUSA
225+
const int warp_size = WARP_SIZE;
226+
#endif // GGML_USE_MUSA
227+
223228
const mmvq_parameter_table_id table_id = get_device_table_id(ggml_cuda_info().devices[device].cc);
224229

225230
switch (ncols_y) {

0 commit comments

Comments
 (0)