diff --git a/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 29a1bfd68517..168d1592a21d 100644 --- a/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -547,7 +547,6 @@ class VeloxTestSettings extends BackendTestSettings { // TODO: fix on Spark-4.1 .excludeByPrefix("SPARK-53535") // see https://issues.apache.org/jira/browse/SPARK-53535 .excludeByPrefix("vectorized reader: missing all struct fields") - .excludeByPrefix("SPARK-54220") // https://issues.apache.org/jira/browse/SPARK-54220 enableSuite[GlutenParquetV1PartitionDiscoverySuite] enableSuite[GlutenParquetV2PartitionDiscoverySuite] enableSuite[GlutenParquetProtobufCompatibilitySuite] diff --git a/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetIOSuite.scala b/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetIOSuite.scala index ad1ae40f928c..2ccf0070bb89 100644 --- a/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetIOSuite.scala +++ b/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetIOSuite.scala @@ -17,9 +17,14 @@ package org.apache.spark.sql.execution.datasources.parquet import org.apache.spark.sql._ +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.{IntegerType, StringType, StructType} /** A test suite that tests basic Parquet I/O. */ class GlutenParquetIOSuite extends ParquetIOSuite with GlutenSQLTestsBaseTrait { + override def testNameBlackList: Seq[String] = + Seq("SPARK-54220: vectorized reader: missing all struct fields, struct with NullType only") + override protected def testFile(fileName: String): String = { getWorkspaceFilePath("sql", "core", "src", "test", "resources").toString + "/" + fileName } @@ -27,4 +32,36 @@ class GlutenParquetIOSuite extends ParquetIOSuite with GlutenSQLTestsBaseTrait { override protected def readResourceParquetFile(name: String): DataFrame = { spark.read.parquet(testFile(name)) } + + testGluten( + "SPARK-54220: vectorized reader: missing all struct fields, struct with NullType only") { + val data = Seq( + Tuple1((null, null)), + Tuple1((null, null)), + Tuple1(null) + ) + val readSchema = new StructType().add( + "_1", + new StructType() + .add("_3", IntegerType, nullable = true) + .add("_4", StringType, nullable = true), + nullable = true) + val expectedAnswer = Row(Row(null, null)) :: Row(Row(null, null)) :: Row(null) :: Nil + + withParquetFile(data) { + file => + for (offheapEnabled <- Seq(true, false)) { + withSQLConf( + SQLConf.PARQUET_VECTORIZED_READER_NESTED_COLUMN_ENABLED.key -> "true", + SQLConf.LEGACY_PARQUET_RETURN_NULL_STRUCT_IF_ALL_FIELDS_MISSING.key -> "false", + SQLConf.COLUMN_VECTOR_OFFHEAP_ENABLED.key -> offheapEnabled.toString + ) { + withAllParquetReaders { + val df = spark.read.schema(readSchema).parquet(file) + checkAnswer(df, expectedAnswer) + } + } + } + } + } }