Skip to content

Add configuration for eliminating sort in subquery #15993

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,9 @@ config_namespace! {

/// Specifies the recursion depth limit when parsing complex SQL Queries
pub recursion_limit: usize, default = 50

/// When set to true, optimizer will try to eliminate sort in subquery.
pub enable_eliminate_subquery_sort: bool, default = true
}
}

Expand Down
2 changes: 2 additions & 0 deletions datafusion/core/src/execution/session_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,8 @@ impl SessionState {
support_varchar_with_length: sql_parser_options.support_varchar_with_length,
map_varchar_to_utf8view: sql_parser_options.map_varchar_to_utf8view,
collect_spans: sql_parser_options.collect_spans,
enable_eliminate_subquery_sort: sql_parser_options
.enable_eliminate_subquery_sort,
}
}

Expand Down
4 changes: 4 additions & 0 deletions datafusion/sql/src/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ pub struct ParserOptions {
pub collect_spans: bool,
/// Whether `VARCHAR` is mapped to `Utf8View` during SQL planning.
pub map_varchar_to_utf8view: bool,
/// Whether removing sorting in subqueries without LIMIT/OFFSET.
pub enable_eliminate_subquery_sort: bool,
}

impl ParserOptions {
Expand All @@ -75,6 +77,7 @@ impl ParserOptions {
map_varchar_to_utf8view: false,
enable_options_value_normalization: false,
collect_spans: false,
enable_eliminate_subquery_sort: true,
}
}

Expand Down Expand Up @@ -147,6 +150,7 @@ impl From<&SqlParserOptions> for ParserOptions {
enable_options_value_normalization: options
.enable_options_value_normalization,
collect_spans: options.collect_spans,
enable_eliminate_subquery_sort: options.enable_eliminate_subquery_sort,
}
}
}
Expand Down
11 changes: 8 additions & 3 deletions datafusion/sql/src/relation/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,9 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
}
};

let optimized_plan = optimize_subquery_sort(plan)?.data;
let optimized_plan =
optimize_subquery_sort(plan, self.options.enable_eliminate_subquery_sort)?
.data;
if let Some(alias) = alias {
self.apply_table_alias(optimized_plan, alias)
} else {
Expand Down Expand Up @@ -226,7 +228,10 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
}
}

fn optimize_subquery_sort(plan: LogicalPlan) -> Result<Transformed<LogicalPlan>> {
fn optimize_subquery_sort(
plan: LogicalPlan,
enable_eliminate: bool,
) -> Result<Transformed<LogicalPlan>> {
// When initializing subqueries, we examine sort options since they might be unnecessary.
// They are only important if the subquery result is affected by the ORDER BY statement,
// which can happen when we have:
Expand All @@ -241,7 +246,7 @@ fn optimize_subquery_sort(plan: LogicalPlan) -> Result<Transformed<LogicalPlan>>
}
match c {
LogicalPlan::Sort(s) => {
if !has_limit {
if !has_limit && enable_eliminate {
has_limit = false;
return Ok(Transformed::yes(s.input.as_ref().clone()));
}
Expand Down
3 changes: 3 additions & 0 deletions datafusion/sql/tests/sql_integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3363,6 +3363,7 @@ fn parse_decimals_parser_options() -> ParserOptions {
map_varchar_to_utf8view: false,
enable_options_value_normalization: false,
collect_spans: false,
enable_eliminate_subquery_sort: true,
}
}

Expand All @@ -3374,6 +3375,7 @@ fn ident_normalization_parser_options_no_ident_normalization() -> ParserOptions
map_varchar_to_utf8view: false,
enable_options_value_normalization: false,
collect_spans: false,
enable_eliminate_subquery_sort: true,
}
}

Expand All @@ -3385,6 +3387,7 @@ fn ident_normalization_parser_options_ident_normalization() -> ParserOptions {
map_varchar_to_utf8view: false,
enable_options_value_normalization: false,
collect_spans: false,
enable_eliminate_subquery_sort: true,
}
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be a good idea to add tests for

enable_eliminate_subquery_sort: false

too?


Expand Down
2 changes: 2 additions & 0 deletions datafusion/sqllogictest/test_files/information_schema.slt
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,7 @@ datafusion.optimizer.skip_failed_rules false
datafusion.optimizer.top_down_join_key_reordering true
datafusion.sql_parser.collect_spans false
datafusion.sql_parser.dialect generic
datafusion.sql_parser.enable_eliminate_subquery_sort true
datafusion.sql_parser.enable_ident_normalization true
datafusion.sql_parser.enable_options_value_normalization false
datafusion.sql_parser.map_varchar_to_utf8view false
Expand Down Expand Up @@ -415,6 +416,7 @@ datafusion.optimizer.skip_failed_rules false When set to true, the logical plan
datafusion.optimizer.top_down_join_key_reordering true When set to true, the physical plan optimizer will run a top down process to reorder the join keys
datafusion.sql_parser.collect_spans false When set to true, the source locations relative to the original SQL query (i.e. [`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html)) will be collected and recorded in the logical plan nodes.
datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB and Databricks.
datafusion.sql_parser.enable_eliminate_subquery_sort true When set to true, optimizer will try to eliminate sort in subquery.
datafusion.sql_parser.enable_ident_normalization true When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted)
datafusion.sql_parser.enable_options_value_normalization false When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically.
datafusion.sql_parser.map_varchar_to_utf8view false If true, `VARCHAR` is mapped to `Utf8View` during SQL planning. If false, `VARCHAR` is mapped to `Utf8` during SQL planning. Default is false.
Expand Down
32 changes: 32 additions & 0 deletions datafusion/sqllogictest/test_files/subquery_sort.slt
Original file line number Diff line number Diff line change
Expand Up @@ -176,3 +176,35 @@ b 5
c 4
d 1
e 1

statement ok
set datafusion.sql_parser.enable_eliminate_subquery_sort=false

query TT
EXPLAIN SELECT c1 FROM (SELECT c1 FROM sink_table ORDER BY c2) AS ttt
----
logical_plan
01)SubqueryAlias: ttt
02)--Projection: sink_table.c1
03)----Sort: sink_table.c2 ASC NULLS LAST
04)------TableScan: sink_table projection=[c1, c2]
physical_plan
01)ProjectionExec: expr=[c1@0 as c1]
02)--SortExec: expr=[c2@1 ASC NULLS LAST], preserve_partitioning=[false]
03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2], file_type=csv, has_header=true

query TT
explain with t as (select c1 from sink_table order by c2 nulls last) select * from t;
----
logical_plan
01)SubqueryAlias: t
02)--Projection: sink_table.c1
03)----Sort: sink_table.c2 ASC NULLS LAST
04)------TableScan: sink_table projection=[c1, c2]
physical_plan
01)ProjectionExec: expr=[c1@0 as c1]
02)--SortExec: expr=[c2@1 ASC NULLS LAST], preserve_partitioning=[false]
03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2], file_type=csv, has_header=true

statement ok
set datafusion.sql_parser.enable_eliminate_subquery_sort=true
1 change: 1 addition & 0 deletions docs/source/user-guide/configs.md
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus
| datafusion.sql_parser.map_varchar_to_utf8view | false | If true, `VARCHAR` is mapped to `Utf8View` during SQL planning. If false, `VARCHAR` is mapped to `Utf8` during SQL planning. Default is false. |
| datafusion.sql_parser.collect_spans | false | When set to true, the source locations relative to the original SQL query (i.e. [`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html)) will be collected and recorded in the logical plan nodes. |
| datafusion.sql_parser.recursion_limit | 50 | Specifies the recursion depth limit when parsing complex SQL Queries |
| datafusion.sql_parser.enable_eliminate_subquery_sort | true | When set to true, optimizer will try to eliminate sort in subquery. |
| datafusion.format.safe | true | If set to `true` any formatting errors will be written to the output instead of being converted into a [`std::fmt::Error`] |
| datafusion.format.null | | Format string for nulls |
| datafusion.format.date_format | %Y-%m-%d | Date format for date arrays |
Expand Down