Skip to content

Commit ed92a5c

Browse files
davidm-dbcloud-fan
authored andcommitted
[SPARK-54609][SQL] Disable TIME type by default
Introducing a new SQL config for TIME type: `spark.sql.timeType.enabled`. The default value is `false` and it is enabled only in tests. TIME data type support is not complete, so we need to guard it before it is completed, especially ahead of Spark 4.1 release. No. Need to add tests for disabled config. No. Closes #53344 from davidm-db/davidm-db/time-config. Lead-authored-by: David Milicevic <[email protected]> Co-authored-by: Wenchen Fan <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]> (cherry picked from commit 18a9435) Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent ebe1fcc commit ed92a5c

File tree

9 files changed

+69
-15
lines changed

9 files changed

+69
-15
lines changed

common/utils/src/main/resources/error/error-conditions.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7141,6 +7141,12 @@
71417141
],
71427142
"sqlState" : "0A001"
71437143
},
7144+
"UNSUPPORTED_TIME_TYPE" : {
7145+
"message" : [
7146+
"The data type TIME is not supported."
7147+
],
7148+
"sqlState" : "0A000"
7149+
},
71447150
"UNSUPPORTED_TYPED_LITERAL" : {
71457151
"message" : [
71467152
"Literals of the type <unsupportedType> are not supported. Supported types are <supportedTypes>."

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,11 @@ import java.time.{Duration, Instant, LocalDate, LocalDateTime, LocalTime, Period
2525
import java.util.{Map => JavaMap}
2626
import javax.annotation.Nullable
2727

28-
import scala.language.existentials
29-
3028
import org.apache.spark.SparkIllegalArgumentException
3129
import org.apache.spark.sql.Row
3230
import org.apache.spark.sql.catalyst.expressions._
3331
import org.apache.spark.sql.catalyst.util._
32+
import org.apache.spark.sql.errors.QueryCompilationErrors
3433
import org.apache.spark.sql.internal.SQLConf
3534
import org.apache.spark.sql.types._
3635
import org.apache.spark.sql.types.DayTimeIntervalType._
@@ -79,6 +78,8 @@ object CatalystTypeConverters {
7978
new GeometryConverter(g)
8079
case DateType if SQLConf.get.datetimeJava8ApiEnabled => LocalDateConverter
8180
case DateType => DateConverter
81+
case _: TimeType if !SQLConf.get.isTimeTypeEnabled =>
82+
QueryCompilationErrors.unsupportedTimeTypeError()
8283
case _: TimeType => TimeConverter
8384
case TimestampType if SQLConf.get.datetimeJava8ApiEnabled => InstantConverter
8485
case TimestampType => TimestampConverter

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ import org.apache.spark.sql.catalyst.util._
3535
import org.apache.spark.sql.catalyst.util.DateTimeConstants._
3636
import org.apache.spark.sql.catalyst.util.DateTimeUtils._
3737
import org.apache.spark.sql.catalyst.util.IntervalUtils.{dayTimeIntervalToByte, dayTimeIntervalToDecimal, dayTimeIntervalToInt, dayTimeIntervalToLong, dayTimeIntervalToShort, yearMonthIntervalToByte, yearMonthIntervalToInt, yearMonthIntervalToShort}
38-
import org.apache.spark.sql.errors.{QueryErrorsBase, QueryExecutionErrors}
38+
import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryErrorsBase, QueryExecutionErrors}
3939
import org.apache.spark.sql.internal.SQLConf
4040
import org.apache.spark.sql.types._
4141
import org.apache.spark.unsafe.types.{GeographyVal, UTF8String, VariantVal}
@@ -602,6 +602,12 @@ case class Cast(
602602
}
603603

604604
override def checkInputDataTypes(): TypeCheckResult = {
605+
dataType match {
606+
// If the cast is to a TIME type, first check if TIME type is enabled.
607+
case _: TimeType if !SQLConf.get.isTimeTypeEnabled =>
608+
throw QueryCompilationErrors.unsupportedTimeTypeError()
609+
case _ =>
610+
}
605611
val canCast = evalMode match {
606612
case EvalMode.LEGACY => Cast.canCast(child.dataType, dataType)
607613
case EvalMode.ANSI => Cast.canAnsiCast(child.dataType, dataType)

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/timeExpressions.scala

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,22 @@ import org.apache.spark.sql.catalyst.util.DateTimeUtils
3232
import org.apache.spark.sql.catalyst.util.TimeFormatter
3333
import org.apache.spark.sql.catalyst.util.TypeUtils.ordinalNumber
3434
import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
35+
import org.apache.spark.sql.internal.SQLConf
3536
import org.apache.spark.sql.internal.types.StringTypeWithCollation
3637
import org.apache.spark.sql.types.{AbstractDataType, AnyTimeType, ByteType, DataType, DayTimeIntervalType, DecimalType, IntegerType, LongType, ObjectType, TimeType}
3738
import org.apache.spark.sql.types.DayTimeIntervalType.{HOUR, SECOND}
3839
import org.apache.spark.unsafe.types.UTF8String
3940

41+
trait TimeExpression extends Expression {
42+
override def checkInputDataTypes(): TypeCheckResult = {
43+
if (SQLConf.get.isTimeTypeEnabled) {
44+
super.checkInputDataTypes()
45+
} else {
46+
throw QueryCompilationErrors.unsupportedTimeTypeError()
47+
}
48+
}
49+
}
50+
4051
/**
4152
* Parses a column to a time based on the given format.
4253
*/
@@ -64,7 +75,7 @@ import org.apache.spark.unsafe.types.UTF8String
6475
since = "4.1.0")
6576
// scalastyle:on line.size.limit
6677
case class ToTime(str: Expression, format: Option[Expression])
67-
extends RuntimeReplaceable with ExpectsInputTypes {
78+
extends RuntimeReplaceable with ExpectsInputTypes with TimeExpression {
6879

6980
def this(str: Expression, format: Expression) = this(str, Option(format))
7081
def this(str: Expression) = this(str, None)
@@ -200,7 +211,7 @@ object TryToTimeExpressionBuilder extends ExpressionBuilder {
200211
// scalastyle:on line.size.limit
201212
case class MinutesOfTime(child: Expression)
202213
extends RuntimeReplaceable
203-
with ExpectsInputTypes {
214+
with ExpectsInputTypes with TimeExpression {
204215

205216
override def replacement: Expression = StaticInvoke(
206217
classOf[DateTimeUtils.type],
@@ -259,7 +270,7 @@ object MinuteExpressionBuilder extends ExpressionBuilder {
259270

260271
case class HoursOfTime(child: Expression)
261272
extends RuntimeReplaceable
262-
with ExpectsInputTypes {
273+
with ExpectsInputTypes with TimeExpression {
263274

264275
override def replacement: Expression = StaticInvoke(
265276
classOf[DateTimeUtils.type],
@@ -316,7 +327,7 @@ object HourExpressionBuilder extends ExpressionBuilder {
316327

317328
case class SecondsOfTimeWithFraction(child: Expression)
318329
extends RuntimeReplaceable
319-
with ExpectsInputTypes {
330+
with ExpectsInputTypes with TimeExpression {
320331
override def replacement: Expression = {
321332
val precision = child.dataType match {
322333
case TimeType(p) => p
@@ -342,7 +353,7 @@ case class SecondsOfTimeWithFraction(child: Expression)
342353

343354
case class SecondsOfTime(child: Expression)
344355
extends RuntimeReplaceable
345-
with ExpectsInputTypes {
356+
with ExpectsInputTypes with TimeExpression {
346357

347358
override def replacement: Expression = StaticInvoke(
348359
classOf[DateTimeUtils.type],
@@ -433,7 +444,8 @@ object SecondExpressionBuilder extends ExpressionBuilder {
433444
case class CurrentTime(
434445
child: Expression = Literal(TimeType.MICROS_PRECISION),
435446
timeZoneId: Option[String] = None) extends UnaryExpression
436-
with TimeZoneAwareExpression with ImplicitCastInputTypes with CodegenFallback {
447+
with TimeZoneAwareExpression with ImplicitCastInputTypes with CodegenFallback
448+
with TimeExpression {
437449

438450
def this() = {
439451
this(Literal(TimeType.MICROS_PRECISION), None)
@@ -545,7 +557,7 @@ case class MakeTime(
545557
secsAndMicros: Expression)
546558
extends RuntimeReplaceable
547559
with ImplicitCastInputTypes
548-
with ExpectsInputTypes {
560+
with ExpectsInputTypes with TimeExpression {
549561

550562
// Accept `sec` as DecimalType to avoid loosing precision of microseconds while converting
551563
// it to the fractional part of `sec`. If `sec` is an IntegerType, it can be cast into decimal
@@ -570,7 +582,8 @@ case class MakeTime(
570582
* Adds day-time interval to time.
571583
*/
572584
case class TimeAddInterval(time: Expression, interval: Expression)
573-
extends BinaryExpression with RuntimeReplaceable with ExpectsInputTypes {
585+
extends BinaryExpression with RuntimeReplaceable with ExpectsInputTypes
586+
with TimeExpression {
574587
override def nullIntolerant: Boolean = true
575588

576589
override def left: Expression = time
@@ -611,7 +624,8 @@ case class TimeAddInterval(time: Expression, interval: Expression)
611624
* Returns a day-time interval between time values.
612625
*/
613626
case class SubtractTimes(left: Expression, right: Expression)
614-
extends BinaryExpression with RuntimeReplaceable with ExpectsInputTypes {
627+
extends BinaryExpression with RuntimeReplaceable with ExpectsInputTypes
628+
with TimeExpression {
615629
override def nullIntolerant: Boolean = true
616630
override def inputTypes: Seq[AbstractDataType] = Seq(AnyTimeType, AnyTimeType)
617631

@@ -668,7 +682,8 @@ case class TimeDiff(
668682
end: Expression)
669683
extends TernaryExpression
670684
with RuntimeReplaceable
671-
with ImplicitCastInputTypes {
685+
with ImplicitCastInputTypes
686+
with TimeExpression {
672687

673688
override def first: Expression = unit
674689
override def second: Expression = start
@@ -723,7 +738,8 @@ case class TimeDiff(
723738
since = "4.1.0")
724739
// scalastyle:on line.size.limit
725740
case class TimeTrunc(unit: Expression, time: Expression)
726-
extends BinaryExpression with RuntimeReplaceable with ImplicitCastInputTypes {
741+
extends BinaryExpression with RuntimeReplaceable with ImplicitCastInputTypes
742+
with TimeExpression {
727743

728744
override def left: Expression = unit
729745
override def right: Expression = time

sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4492,4 +4492,10 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
44924492
"colType" -> "metadata",
44934493
"errors" -> errors.mkString("- ", "\n- ", "")))
44944494
}
4495+
4496+
def unsupportedTimeTypeError(): Throwable = {
4497+
new AnalysisException(
4498+
errorClass = "UNSUPPORTED_TIME_TYPE",
4499+
messageParameters = Map.empty)
4500+
}
44954501
}

sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6621,6 +6621,13 @@ object SQLConf {
66216621
.booleanConf
66226622
.createWithDefault(false)
66236623

6624+
val TIME_TYPE_ENABLED =
6625+
buildConf("spark.sql.timeType.enabled")
6626+
.doc("When true, the TIME data type is supported.")
6627+
.version("4.1.0")
6628+
.booleanConf
6629+
.createWithDefault(Utils.isTesting)
6630+
66246631
/**
66256632
* Holds information about keys that have been deprecated.
66266633
*
@@ -7781,6 +7788,8 @@ class SQLConf extends Serializable with Logging with SqlApiConf {
77817788
def coerceMergeNestedTypes: Boolean =
77827789
getConf(SQLConf.MERGE_INTO_NESTED_TYPE_COERCION_ENABLED)
77837790

7791+
def isTimeTypeEnabled: Boolean = getConf(SQLConf.TIME_TYPE_ENABLED)
7792+
77847793
/** ********************** SQLConf functionality methods ************ */
77857794

77867795
/** Set Spark SQL configuration properties. */

sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/SparkConnectPlanExecution.scala

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,11 @@ import org.apache.spark.sql.connect.config.Connect.{CONNECT_GRPC_ARROW_MAX_BATCH
3737
import org.apache.spark.sql.connect.planner.{InvalidInputErrors, SparkConnectPlanner}
3838
import org.apache.spark.sql.connect.service.ExecuteHolder
3939
import org.apache.spark.sql.connect.utils.MetricGenerator
40+
import org.apache.spark.sql.errors.QueryCompilationErrors
4041
import org.apache.spark.sql.execution.{DoNotCleanup, LocalTableScanExec, QueryExecution, RemoveShuffleFiles, SkipMigration, SQLExecution}
4142
import org.apache.spark.sql.execution.arrow.ArrowConverters
4243
import org.apache.spark.sql.internal.SQLConf
43-
import org.apache.spark.sql.types.{DataType, StructType}
44+
import org.apache.spark.sql.types.{DataType, StructType, TimeType}
4445
import org.apache.spark.util.ThreadUtils
4546

4647
/**
@@ -133,6 +134,10 @@ private[execution] class SparkConnectPlanExecution(executeHolder: ExecuteHolder)
133134
errorClass = "UNSUPPORTED_FEATURE.GEOSPATIAL_DISABLED",
134135
messageParameters = scala.collection.immutable.Map.empty)
135136
}
137+
val timeTypeEnabled = spark.sessionState.conf.isTimeTypeEnabled
138+
if (!timeTypeEnabled && schema.existsRecursively(_.isInstanceOf[TimeType])) {
139+
throw QueryCompilationErrors.unsupportedTimeTypeError()
140+
}
136141
val maxRecordsPerBatch = spark.sessionState.conf.arrowMaxRecordsPerBatch
137142
val timeZoneId = spark.sessionState.conf.sessionLocalTimeZone
138143
val largeVarTypes = spark.sessionState.conf.arrowUseLargeVarTypes

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,9 @@ object DataSourceUtils extends PredicateHelper {
9393
* in a driver side.
9494
*/
9595
def verifySchema(format: FileFormat, schema: StructType, readOnly: Boolean = false): Unit = {
96+
if (!SQLConf.get.isTimeTypeEnabled && schema.existsRecursively(_.isInstanceOf[TimeType])) {
97+
throw QueryCompilationErrors.unsupportedTimeTypeError()
98+
}
9699
schema.foreach { field =>
97100
val supported = if (readOnly) {
98101
format.supportReadDataType(field.dataType)

sql/gen-sql-functions-docs.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,8 @@ def generate_functions_examples_html(jvm, jspark, html_output_dir):
240240
</pre></div>
241241
242242
"""
243+
print("Enabling TIME data type")
244+
jspark.sql("SET spark.sql.timeType.enabled = true")
243245
print("Running SQL examples to generate formatted output.")
244246
for key, infos in _list_grouped_function_infos(jvm):
245247
examples = _make_pretty_examples(jspark, infos)

0 commit comments

Comments
 (0)