From 761cfc78ed3c3bfd515ebe4fd02871e0a2c7986f Mon Sep 17 00:00:00 2001 From: brijrajk <22271048+brijrajk@users.noreply.github.com> Date: Tue, 23 Jun 2026 00:34:53 +0530 Subject: [PATCH] [GLUTEN-10992][VL] Fix MatchError for KeyGroupedPartitioning in native shuffle When Spark 4.0's V2 bucketing shuffle (spark.sql.v2.bucketing.shuffle.enabled=true) is used in a join where only one side reports partitioning, Spark generates a ShuffleExchangeExec with KeyGroupedPartitioning as its output. The default case in VeloxSparkPlanExecApi.genColumnarShuffleExchange created a ColumnarShuffleExchangeExec for this node, which then crashed with a scala.MatchError in ExecUtil.genShuffleDependency because KeyGroupedPartitioning was not handled in the native partitioning match. Fix by adding an explicit KeyGroupedPartitioning case to genColumnarShuffleExchange that marks the shuffle for fallback to vanilla Spark. Also harden ExecUtil.genShuffleDependency with an explicit wildcard that throws GlutenNotSupportException instead of a cryptic MatchError for any future unknown partitioning types. Co-Authored-By: Claude Sonnet 4.6 --- .../gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala | 6 ++++++ .../org/apache/spark/sql/execution/utils/ExecUtil.scala | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala index be21337d994..b4f027acc68 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala @@ -470,6 +470,12 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi with Logging { } } } + case _: KeyGroupedPartitioning => + FallbackTags.add( + shuffle, + ValidationResult.failed( + "KeyGroupedPartitioning is not supported by Gluten native shuffle")) + shuffle.withNewChildren(child :: Nil) case _ => ColumnarShuffleExchangeExec(shuffle, child, null) } diff --git a/backends-velox/src/main/scala/org/apache/spark/sql/execution/utils/ExecUtil.scala b/backends-velox/src/main/scala/org/apache/spark/sql/execution/utils/ExecUtil.scala index dcfa0ee525c..e55927b9141 100644 --- a/backends-velox/src/main/scala/org/apache/spark/sql/execution/utils/ExecUtil.scala +++ b/backends-velox/src/main/scala/org/apache/spark/sql/execution/utils/ExecUtil.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.utils import org.apache.gluten.backendsapi.BackendsApiManager import org.apache.gluten.columnarbatch.{ColumnarBatches, VeloxColumnarBatches} import org.apache.gluten.config.ShuffleWriterType +import org.apache.gluten.exception.GlutenNotSupportException import org.apache.gluten.iterator.Iterators import org.apache.gluten.memory.arrow.alloc.ArrowBufferAllocators import org.apache.gluten.runtime.Runtimes @@ -172,6 +173,9 @@ object ExecUtil { // range partitioning fall back to row-based partition id computation case RangePartitioning(orders, n) => new NativePartitioning(GlutenShuffleUtils.RangePartitioningShortName, n) + case other => + throw new GlutenNotSupportException( + s"Partitioning ${other.getClass.getSimpleName} is not supported by native shuffle") } val isRoundRobin = newPartitioning.isInstanceOf[RoundRobinPartitioning] &&