-
Notifications
You must be signed in to change notification settings - Fork 234
Feat: support array_except function #1343
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 9 commits
3abe5c4
b411ab3
b083d76
61f70db
b661960
9697eae
56c68e1
615f048
ffaf5a7
f0955c1
41a9804
785c548
9251dc1
1f5782f
dabeed2
2f138b7
281b454
9798361
9042243
4c9daef
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -292,4 +292,89 @@ class CometArrayExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelp | |
} | ||
} | ||
|
||
test("array_except - basic test (only integer values)") { | ||
withSQLConf(CometConf.COMET_EXPR_ALLOW_INCOMPATIBLE.key -> "true") { | ||
Seq(true, false).foreach { dictionaryEnabled => | ||
withTempDir { dir => | ||
val path = new Path(dir.toURI.toString, "test.parquet") | ||
makeParquetFileAllTypes(path, dictionaryEnabled, 10000) | ||
spark.read.parquet(path.toString).createOrReplaceTempView("t1") | ||
|
||
checkSparkAnswerAndOperator( | ||
sql("SELECT array_except(array(_2, _3, _4), array(_3, _4)) from t1")) | ||
checkSparkAnswerAndOperator(sql("SELECT array_except(array(_18), array(_19)) from t1")) | ||
checkSparkAnswerAndOperator(spark.sql( | ||
"SELECT array_except((CASE WHEN _2 = _3 THEN array(_2, _3, _4) END), array(_4)) FROM t1")) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are right, it is necessary to add the condition is not null, thank you. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I saw you added There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a test case for deduplication checking. |
||
} | ||
} | ||
} | ||
} | ||
|
||
test("array_except - test all types (native Parquet reader)") { | ||
withTempDir { dir => | ||
val path = new Path(dir.toURI.toString, "test.parquet") | ||
val filename = path.toString | ||
val random = new Random(42) | ||
withSQLConf(CometConf.COMET_ENABLED.key -> "false") { | ||
ParquetGenerator.makeParquetFile( | ||
random, | ||
spark, | ||
filename, | ||
100, | ||
DataGenOptions( | ||
allowNull = true, | ||
generateNegativeZero = true, | ||
generateArray = false, | ||
generateStruct = false, | ||
generateMap = false)) | ||
} | ||
val table = spark.read.parquet(filename) | ||
table.createOrReplaceTempView("t1") | ||
// test with array of each column | ||
for (fieldName <- table.schema.fieldNames) { | ||
sql(s"SELECT array($fieldName, $fieldName) as a, array(a[0], $fieldName) as b FROM t1") | ||
.createOrReplaceTempView("t2") | ||
val df = sql("SELECT array_except(a, b) FROM t2") | ||
checkSparkAnswerAndOperator(df) | ||
} | ||
} | ||
} | ||
|
||
test("array_except - test all types (convert from Parquet)") { | ||
withTempDir { dir => | ||
val path = new Path(dir.toURI.toString, "test.parquet") | ||
val filename = path.toString | ||
val random = new Random(42) | ||
withSQLConf(CometConf.COMET_ENABLED.key -> "false") { | ||
val options = DataGenOptions( | ||
allowNull = true, | ||
generateNegativeZero = true, | ||
generateArray = true, | ||
generateStruct = true, | ||
generateMap = false) | ||
ParquetGenerator.makeParquetFile(random, spark, filename, 100, options) | ||
} | ||
withSQLConf( | ||
CometConf.COMET_NATIVE_SCAN_ENABLED.key -> "false", | ||
CometConf.COMET_SPARK_TO_ARROW_ENABLED.key -> "true", | ||
CometConf.COMET_CONVERT_FROM_PARQUET_ENABLED.key -> "true") { | ||
val table = spark.read.parquet(filename) | ||
table.createOrReplaceTempView("t1") | ||
// test with array of each column | ||
for (field <- table.schema.fields) { | ||
val fieldName = field.name | ||
sql(s"SELECT array($fieldName, $fieldName) as a, array(a[0], $fieldName) as b FROM t1") | ||
.createOrReplaceTempView("t2") | ||
val df = sql("SELECT array_except(a, b) FROM t2") | ||
field.dataType match { | ||
case _: StructType => | ||
// skip due to https://github.com/apache/datafusion-comet/issues/1314 | ||
case _ => | ||
checkSparkAnswer(df) | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
} |
Uh oh!
There was an error while loading. Please reload this page.