Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions common/utils/src/main/resources/error/error-conditions.json
Original file line number Diff line number Diff line change
Expand Up @@ -565,6 +565,12 @@
],
"sqlState" : "22KD3"
},
"CANNOT_USE_MULTI_ALIASES_IN_WATERMARK_CLAUSE" : {
"message" : [
"Multiple aliases are not supported in watermark clause."
],
"sqlState" : "42000"
},
"CANNOT_WRITE_STATE_STORE" : {
"message" : [
"Error writing state store files for provider <providerClass>."
Expand Down
2 changes: 2 additions & 0 deletions docs/sql-ref-ansi-compliance.md
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,7 @@ Below is a list of all the keywords in Spark SQL.
|DEFAULT|non-reserved|non-reserved|non-reserved|
|DEFINED|non-reserved|non-reserved|non-reserved|
|DEFINER|non-reserved|non-reserved|non-reserved|
|DELAY|non-reserved|non-reserved|non-reserved|
|DELETE|non-reserved|non-reserved|reserved|
|DELIMITED|non-reserved|non-reserved|non-reserved|
|DESC|non-reserved|non-reserved|non-reserved|
Expand Down Expand Up @@ -791,6 +792,7 @@ Below is a list of all the keywords in Spark SQL.
|VIEW|non-reserved|non-reserved|non-reserved|
|VIEWS|non-reserved|non-reserved|non-reserved|
|VOID|non-reserved|non-reserved|non-reserved|
|WATERMARK|non-reserved|non-reserved|non-reserved|
|WEEK|non-reserved|non-reserved|non-reserved|
|WEEKS|non-reserved|non-reserved|non-reserved|
|WHEN|reserved|non-reserved|reserved|
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@ DECLARE: 'DECLARE';
DEFAULT: 'DEFAULT';
DEFINED: 'DEFINED';
DEFINER: 'DEFINER';
DELAY: 'DELAY';
DELETE: 'DELETE';
DELIMITED: 'DELIMITED';
DESC: 'DESC';
Expand Down Expand Up @@ -499,6 +500,7 @@ VERSION: 'VERSION';
VIEW: 'VIEW';
VIEWS: 'VIEWS';
VOID: 'VOID';
WATERMARK: 'WATERMARK';
WEEK: 'WEEK';
WEEKS: 'WEEKS';
WHEN: 'WHEN';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -376,8 +376,10 @@ createPipelineDatasetHeader
;

streamRelationPrimary
: STREAM multipartIdentifier optionsClause? tableAlias #streamTableName
| STREAM LEFT_PAREN multipartIdentifier RIGHT_PAREN optionsClause? tableAlias #streamTableName
: STREAM multipartIdentifier optionsClause? watermarkClause?
tableAlias #streamTableName
| STREAM LEFT_PAREN multipartIdentifier RIGHT_PAREN
optionsClause? watermarkClause? tableAlias #streamTableName
;

setResetStatement
Expand Down Expand Up @@ -921,6 +923,10 @@ lateralView
: LATERAL VIEW (OUTER)? qualifiedName LEFT_PAREN (expression (COMMA expression)*)? RIGHT_PAREN tblName=identifier (AS? colName+=identifier (COMMA colName+=identifier)*)?
;

watermarkClause
: WATERMARK colName=namedExpression DELAY OF delay=interval
;

setQuantifier
: DISTINCT
| ALL
Expand Down Expand Up @@ -995,9 +1001,11 @@ identifierComment
relationPrimary
: streamRelationPrimary #streamRelation
| identifierReference temporalClause?
optionsClause? sample? tableAlias #tableName
| LEFT_PAREN query RIGHT_PAREN sample? tableAlias #aliasedQuery
| LEFT_PAREN relation RIGHT_PAREN sample? tableAlias #aliasedRelation
optionsClause? sample? watermarkClause? tableAlias #tableName
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

watermarkClause is already defined in streamRelationPrimary. Do we still need it here? Is it also applied for non-stream relation?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

relation here can be a temp view which could be technically streaming without STREAM keyword

| LEFT_PAREN query RIGHT_PAREN sample? watermarkClause?
tableAlias #aliasedQuery
| LEFT_PAREN relation RIGHT_PAREN sample?
watermarkClause? tableAlias #aliasedRelation
| inlineTable #inlineTableDefault2
| functionTable #tableValuedFunction
;
Expand All @@ -1006,6 +1014,8 @@ optionsClause
: WITH options=propertyList
;

// Unlike all other types of expression for relation, we do not support watermarkClause for
// inlineTable.
inlineTable
: VALUES expression (COMMA expression)* tableAlias
;
Expand Down Expand Up @@ -1042,10 +1052,13 @@ functionTableArgument
| functionArgument
;

// This is only used in relationPrimary where having watermarkClause makes sense. If this becomes
// referred by other clause, please check wheter watermarkClause makes sense to the clause.
// If not, consider separate this rule.
functionTable
: funcName=functionName LEFT_PAREN
(functionTableArgument (COMMA functionTableArgument)*)?
RIGHT_PAREN tableAlias
RIGHT_PAREN watermarkClause? tableAlias
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From the doc https://docs.databricks.com/aws/en/sql/language-manual/sql-ref-syntax-qry-select-watermark, it looks like table_valued_function has no watermark_clause support, but we want to have it here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ouch I think it's missed in that doc.

;

tableAlias
Expand Down Expand Up @@ -1793,6 +1806,7 @@ ansiNonReserved
| DEFAULT
| DEFINED
| DEFINER
| DELAY
| DELETE
| DELIMITED
| DESC
Expand Down Expand Up @@ -2035,6 +2049,7 @@ ansiNonReserved
| WEEK
| WEEKS
| WHILE
| WATERMARK
| WINDOW
| WITHOUT
| YEAR
Expand Down Expand Up @@ -2160,6 +2175,7 @@ nonReserved
| DEFAULT
| DEFINED
| DEFINER
| DELAY
| DELETE
| DELIMITED
| DESC
Expand Down Expand Up @@ -2439,6 +2455,7 @@ nonReserved
| VIEW
| VIEWS
| VOID
| WATERMARK
| WEEK
| WEEKS
| WHILE
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.catalyst.analysis

import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.expressions.{Expression, NamedExpression}
import org.apache.spark.sql.catalyst.plans.logical.{EventTimeWatermark, LogicalPlan, Project}
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.catalyst.trees.TreePattern

/**
* Resolve [[UnresolvedEventTimeWatermark]] to [[EventTimeWatermark]].
*/
object ResolveEventTimeWatermark extends Rule[LogicalPlan] {
override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUpWithPruning(
_.containsPattern(TreePattern.UNRESOLVED_EVENT_TIME_WATERMARK), ruleId) {

case u: UnresolvedEventTimeWatermark if u.eventTimeColExpr.resolved && u.childrenResolved =>
val uuid = java.util.UUID.randomUUID()

if (u.eventTimeColExpr.isInstanceOf[MultiAlias]) {
throw new AnalysisException(
errorClass = "CANNOT_USE_MULTI_ALIASES_IN_WATERMARK_CLAUSE",
messageParameters = Map()
)
}

val namedExpression = u.eventTimeColExpr match {
case e: NamedExpression => e
case e: Expression => UnresolvedAlias(e)
}

if (u.child.outputSet.contains(namedExpression)) {
// We don't need to have projection since the attribute being referenced will be available.
EventTimeWatermark(uuid, namedExpression.toAttribute, u.delay, u.child)
} else {
// We need to inject projection as we can't find the matching column directly in the
// child output.
val proj = Project(Seq(namedExpression, UnresolvedStar(None)), u.child)
val attrRef = proj.projectList.head.toAttribute
EventTimeWatermark(uuid, attrRef, u.delay, proj)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import org.apache.spark.sql.connector.catalog.TableWritePrivilege
import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
import org.apache.spark.sql.types.{DataType, Metadata, StructType}
import org.apache.spark.sql.util.{CaseInsensitiveStringMap, SchemaUtils}
import org.apache.spark.unsafe.types.CalendarInterval
import org.apache.spark.util.ArrayImplicits._

/**
Expand Down Expand Up @@ -1228,3 +1229,15 @@ case class UnresolvedExecuteImmediate(

final override val nodePatterns: Seq[TreePattern] = Seq(EXECUTE_IMMEDIATE)
}

case class UnresolvedEventTimeWatermark(
eventTimeColExpr: Expression,
delay: CalendarInterval,
child: LogicalPlan)
extends UnresolvedUnaryNode {

final override val nodePatterns: Seq[TreePattern] = Seq(UNRESOLVED_EVENT_TIME_WATERMARK)

override protected def withNewChildInternal(
newChild: LogicalPlan): UnresolvedEventTimeWatermark = copy(child = newChild)
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ import org.apache.spark.sql.catalyst.trees.CurrentOrigin
import org.apache.spark.sql.catalyst.types.DataTypeUtils
import org.apache.spark.sql.catalyst.util.CollationFactory
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String
import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}

/**
* A collection of implicit conversions that create a DSL for constructing catalyst data structures.
Expand Down Expand Up @@ -566,6 +566,17 @@ package object dsl extends SQLConfHelper {
}

def deduplicate(colNames: Attribute*): LogicalPlan = Deduplicate(colNames, logicalPlan)

def watermark(expr: Expression, delayThreshold: CalendarInterval): LogicalPlan = {
val namedExpression = expr match {
case e: NamedExpression => e
case e: Expression => UnresolvedAlias(e)
}
val proj = Project(Seq(namedExpression, UnresolvedStar(None)), logicalPlan)
val attrRef = proj.projectList.head.toAttribute

EventTimeWatermark(java.util.UUID.randomUUID(), attrRef, delayThreshold, proj)
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2076,6 +2076,22 @@ class AstBuilder extends DataTypeAstBuilder
query)
}

/**
* Add an [[EventTimeWatermark]] to a logical plan.
*/
private def withWatermark(
ctx: WatermarkClauseContext,
query: LogicalPlan): LogicalPlan = withOrigin(ctx) {
val expression = visitNamedExpression(ctx.namedExpression())
val delayInterval = visitInterval(ctx.delay)

val delay = IntervalUtils.fromIntervalString(delayInterval.toString)
require(!IntervalUtils.isNegative(delay),
s"delay threshold (${delayInterval.toString}) should not be negative.")

UnresolvedEventTimeWatermark(expression, delay, query)
}

/**
* Create a single relation referenced in a FROM clause. This method is used when a part of the
* join condition is nested, for example:
Expand Down Expand Up @@ -2252,7 +2268,8 @@ class AstBuilder extends DataTypeAstBuilder
val relation = createUnresolvedRelation(ctx.identifierReference, Option(ctx.optionsClause))
val table = mayApplyAliasPlan(
ctx.tableAlias, relation.optionalMap(ctx.temporalClause)(withTimeTravel))
table.optionalMap(ctx.sample)(withSample)
val sample = table.optionalMap(ctx.sample)(withSample)
sample.optionalMap(ctx.watermarkClause)(withWatermark)
}

override def visitVersion(ctx: VersionContext): Option[String] = {
Expand Down Expand Up @@ -2392,7 +2409,9 @@ class AstBuilder extends DataTypeAstBuilder

val tvfAliases = if (aliases.nonEmpty) UnresolvedTVFAliases(ident, tvf, aliases) else tvf

tvfAliases.optionalMap(func.tableAlias.strictIdentifier)(aliasPlan)
val watermarkClause = func.watermarkClause()
val tvfWithWatermark = tvfAliases.optionalMap(watermarkClause)(withWatermark)
tvfWithWatermark.optionalMap(func.tableAlias.strictIdentifier)(aliasPlan)
})
}

Expand All @@ -2404,7 +2423,9 @@ class AstBuilder extends DataTypeAstBuilder
optionsClause = Option(ctx.optionsClause),
writePrivileges = Seq.empty,
isStreaming = true)
mayApplyAliasPlan(ctx.tableAlias, tableStreamingRelation)

val tableWithWatermark = tableStreamingRelation.optionalMap(ctx.watermarkClause)(withWatermark)
mayApplyAliasPlan(ctx.tableAlias, tableWithWatermark)
}

/**
Expand Down Expand Up @@ -2447,7 +2468,8 @@ class AstBuilder extends DataTypeAstBuilder
*/
override def visitAliasedRelation(ctx: AliasedRelationContext): LogicalPlan = withOrigin(ctx) {
val relation = plan(ctx.relation).optionalMap(ctx.sample)(withSample)
mayApplyAliasPlan(ctx.tableAlias, relation)
val watermark = relation.optionalMap(ctx.watermarkClause)(withWatermark)
mayApplyAliasPlan(ctx.tableAlias, watermark)
}

/**
Expand All @@ -2460,7 +2482,7 @@ class AstBuilder extends DataTypeAstBuilder
*/
override def visitAliasedQuery(ctx: AliasedQueryContext): LogicalPlan = withOrigin(ctx) {
val relation = plan(ctx.query).optionalMap(ctx.sample)(withSample)
if (ctx.tableAlias.strictIdentifier == null) {
val alias = if (ctx.tableAlias.strictIdentifier == null) {
// For un-aliased subqueries, use a default alias name that is not likely to conflict with
// normal subquery names, so that parent operators can only access the columns in subquery by
// unqualified names. Users can still use this special qualifier to access columns if they
Expand All @@ -2469,6 +2491,7 @@ class AstBuilder extends DataTypeAstBuilder
} else {
mayApplyAliasPlan(ctx.tableAlias, relation)
}
alias.optionalMap(ctx.watermarkClause)(withWatermark)
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ object RuleIdCollection {
"org.apache.spark.sql.catalyst.analysis.ResolveRowLevelCommandAssignments" ::
"org.apache.spark.sql.catalyst.analysis.ResolveSetVariable" ::
"org.apache.spark.sql.catalyst.analysis.ResolveExecuteImmediate" ::
"org.apache.spark.sql.catalyst.analysis.ResolveEventTimeWatermark" ::
"org.apache.spark.sql.catalyst.analysis.ResolveTableSpec" ::
"org.apache.spark.sql.catalyst.analysis.ResolveTimeZone" ::
"org.apache.spark.sql.catalyst.analysis.ResolveUnion" ::
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ object TreePattern extends Enumeration {

// Unresolved Plan patterns (Alphabetically ordered)
val PLAN_WITH_UNRESOLVED_IDENTIFIER: Value = Value
val UNRESOLVED_EVENT_TIME_WATERMARK: Value = Value
val UNRESOLVED_HAVING: Value = Value
val UNRESOLVED_HINT: Value = Value
val UNRESOLVED_FUNC: Value = Value
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -595,9 +595,8 @@ class Dataset[T] private[sql](
val parsedDelay = IntervalUtils.fromIntervalString(delayThreshold)
require(!IntervalUtils.isNegative(parsedDelay),
s"delay threshold ($delayThreshold) should not be negative.")
EliminateEventTimeWatermark(
EventTimeWatermark(util.UUID.randomUUID(), UnresolvedAttribute(eventTime),
parsedDelay, logicalPlan))
EventTimeWatermark(util.UUID.randomUUID(), UnresolvedAttribute(eventTime),
parsedDelay, logicalPlan)
}

/** @inheritdoc */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package org.apache.spark.sql.internal
import org.apache.spark.annotation.Unstable
import org.apache.spark.sql.{DataSourceRegistration, ExperimentalMethods, SparkSessionExtensions, UDTFRegistration}
import org.apache.spark.sql.artifact.ArtifactManager
import org.apache.spark.sql.catalyst.analysis.{Analyzer, EvalSubqueriesForTimeTravel, FunctionRegistry, InvokeProcedures, ReplaceCharWithVarchar, ResolveDataSource, ResolveExecuteImmediate, ResolveSessionCatalog, ResolveTranspose, TableFunctionRegistry}
import org.apache.spark.sql.catalyst.analysis.{Analyzer, EvalSubqueriesForTimeTravel, FunctionRegistry, InvokeProcedures, ReplaceCharWithVarchar, ResolveDataSource, ResolveEventTimeWatermark, ResolveExecuteImmediate, ResolveSessionCatalog, ResolveTranspose, TableFunctionRegistry}
import org.apache.spark.sql.catalyst.analysis.resolver.ResolverExtension
import org.apache.spark.sql.catalyst.catalog.{FunctionExpressionBuilder, SessionCatalog}
import org.apache.spark.sql.catalyst.expressions.{Expression, ExtractSemiStructuredFields}
Expand Down Expand Up @@ -246,6 +246,7 @@ abstract class BaseSessionStateBuilder(
new InvokeProcedures(session) +:
ResolveExecuteImmediate(session, this.catalogManager) +:
ExtractSemiStructuredFields +:
ResolveEventTimeWatermark +:
customResolutionRules

override val postHocResolutionRules: Seq[Rule[LogicalPlan]] =
Expand Down
Loading