From 761cfc78ed3c3bfd515ebe4fd02871e0a2c7986f Mon Sep 17 00:00:00 2001
From: brijrajk <22271048+brijrajk@users.noreply.github.com>
Date: Tue, 23 Jun 2026 00:34:53 +0530
Subject: [PATCH] [GLUTEN-10992][VL] Fix MatchError for KeyGroupedPartitioning
 in native shuffle

When Spark 4.0's V2 bucketing shuffle (spark.sql.v2.bucketing.shuffle.enabled=true)
is used in a join where only one side reports partitioning, Spark generates a
ShuffleExchangeExec with KeyGroupedPartitioning as its output. The default
case in VeloxSparkPlanExecApi.genColumnarShuffleExchange created a
ColumnarShuffleExchangeExec for this node, which then crashed with a
scala.MatchError in ExecUtil.genShuffleDependency because KeyGroupedPartitioning
was not handled in the native partitioning match.

Fix by adding an explicit KeyGroupedPartitioning case to genColumnarShuffleExchange
that marks the shuffle for fallback to vanilla Spark. Also harden
ExecUtil.genShuffleDependency with an explicit wildcard that throws
GlutenNotSupportException instead of a cryptic MatchError for any future
unknown partitioning types.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala    | 6 ++++++
 .../org/apache/spark/sql/execution/utils/ExecUtil.scala     | 4 ++++
 2 files changed, 10 insertions(+)

diff --git a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
index be21337d994..b4f027acc68 100644
--- a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
+++ b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
@@ -470,6 +470,12 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi with Logging {
             }
           }
         }
+      case _: KeyGroupedPartitioning =>
+        FallbackTags.add(
+          shuffle,
+          ValidationResult.failed(
+            "KeyGroupedPartitioning is not supported by Gluten native shuffle"))
+        shuffle.withNewChildren(child :: Nil)
       case _ =>
         ColumnarShuffleExchangeExec(shuffle, child, null)
     }
diff --git a/backends-velox/src/main/scala/org/apache/spark/sql/execution/utils/ExecUtil.scala b/backends-velox/src/main/scala/org/apache/spark/sql/execution/utils/ExecUtil.scala
index dcfa0ee525c..e55927b9141 100644
--- a/backends-velox/src/main/scala/org/apache/spark/sql/execution/utils/ExecUtil.scala
+++ b/backends-velox/src/main/scala/org/apache/spark/sql/execution/utils/ExecUtil.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.utils
 import org.apache.gluten.backendsapi.BackendsApiManager
 import org.apache.gluten.columnarbatch.{ColumnarBatches, VeloxColumnarBatches}
 import org.apache.gluten.config.ShuffleWriterType
+import org.apache.gluten.exception.GlutenNotSupportException
 import org.apache.gluten.iterator.Iterators
 import org.apache.gluten.memory.arrow.alloc.ArrowBufferAllocators
 import org.apache.gluten.runtime.Runtimes
@@ -172,6 +173,9 @@ object ExecUtil {
       // range partitioning fall back to row-based partition id computation
       case RangePartitioning(orders, n) =>
         new NativePartitioning(GlutenShuffleUtils.RangePartitioningShortName, n)
+      case other =>
+        throw new GlutenNotSupportException(
+          s"Partitioning ${other.getClass.getSimpleName} is not supported by native shuffle")
     }
 
     val isRoundRobin = newPartitioning.isInstanceOf[RoundRobinPartitioning] &&