apache · irenjj · May 8, 2025 · May 8, 2025 · May 9, 2025 · May 9, 2025
diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
@@ -271,6 +271,9 @@ config_namespace! {
 
         /// Specifies the recursion depth limit when parsing complex SQL Queries
         pub recursion_limit: usize, default = 50
+
+        /// When set to true, optimizer will try to eliminate sort in subquery.
+        pub enable_eliminate_subquery_sort: bool, default = true
     }
 }
 

diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs
@@ -496,6 +496,8 @@ impl SessionState {
             support_varchar_with_length: sql_parser_options.support_varchar_with_length,
             map_varchar_to_utf8view: sql_parser_options.map_varchar_to_utf8view,
             collect_spans: sql_parser_options.collect_spans,
+            enable_eliminate_subquery_sort: sql_parser_options
+                .enable_eliminate_subquery_sort,
         }
     }
 

diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
@@ -54,6 +54,8 @@ pub struct ParserOptions {
     pub collect_spans: bool,
     /// Whether `VARCHAR` is mapped to `Utf8View` during SQL planning.
     pub map_varchar_to_utf8view: bool,
+    /// Whether removing sorting in subqueries without LIMIT/OFFSET.
+    pub enable_eliminate_subquery_sort: bool,
 }
 
 impl ParserOptions {
@@ -75,6 +77,7 @@ impl ParserOptions {
             map_varchar_to_utf8view: false,
             enable_options_value_normalization: false,
             collect_spans: false,
+            enable_eliminate_subquery_sort: true,
         }
     }
 
@@ -147,6 +150,7 @@ impl From<&SqlParserOptions> for ParserOptions {
             enable_options_value_normalization: options
                 .enable_options_value_normalization,
             collect_spans: options.collect_spans,
+            enable_eliminate_subquery_sort: options.enable_eliminate_subquery_sort,
         }
     }
 }

diff --git a/datafusion/sql/src/relation/mod.rs b/datafusion/sql/src/relation/mod.rs
@@ -162,7 +162,9 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             }
         };
 
-        let optimized_plan = optimize_subquery_sort(plan)?.data;
+        let optimized_plan =
+            optimize_subquery_sort(plan, self.options.enable_eliminate_subquery_sort)?
+                .data;
         if let Some(alias) = alias {
             self.apply_table_alias(optimized_plan, alias)
         } else {
@@ -226,7 +228,10 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
     }
 }
 
-fn optimize_subquery_sort(plan: LogicalPlan) -> Result<Transformed<LogicalPlan>> {
+fn optimize_subquery_sort(
+    plan: LogicalPlan,
+    enable_eliminate: bool,
+) -> Result<Transformed<LogicalPlan>> {
     // When initializing subqueries, we examine sort options since they might be unnecessary.
     // They are only important if the subquery result is affected by the ORDER BY statement,
     // which can happen when we have:
@@ -241,7 +246,7 @@ fn optimize_subquery_sort(plan: LogicalPlan) -> Result<Transformed<LogicalPlan>>
         }
         match c {
             LogicalPlan::Sort(s) => {
-                if !has_limit {
+                if !has_limit && enable_eliminate {
                     has_limit = false;
                     return Ok(Transformed::yes(s.input.as_ref().clone()));
                 }

diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs
@@ -3363,6 +3363,7 @@ fn parse_decimals_parser_options() -> ParserOptions {
         map_varchar_to_utf8view: false,
         enable_options_value_normalization: false,
         collect_spans: false,
+        enable_eliminate_subquery_sort: true,
     }
 }
 
@@ -3374,6 +3375,7 @@ fn ident_normalization_parser_options_no_ident_normalization() -> ParserOptions
         map_varchar_to_utf8view: false,
         enable_options_value_normalization: false,
         collect_spans: false,
+        enable_eliminate_subquery_sort: true,
     }
 }
 
@@ -3385,6 +3387,7 @@ fn ident_normalization_parser_options_ident_normalization() -> ParserOptions {
         map_varchar_to_utf8view: false,
         enable_options_value_normalization: false,
         collect_spans: false,
+        enable_eliminate_subquery_sort: true,
     }
 }
 

diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -305,6 +305,7 @@ datafusion.optimizer.skip_failed_rules false
 datafusion.optimizer.top_down_join_key_reordering true
 datafusion.sql_parser.collect_spans false
 datafusion.sql_parser.dialect generic
+datafusion.sql_parser.enable_eliminate_subquery_sort true
 datafusion.sql_parser.enable_ident_normalization true
 datafusion.sql_parser.enable_options_value_normalization false
 datafusion.sql_parser.map_varchar_to_utf8view false
@@ -415,6 +416,7 @@ datafusion.optimizer.skip_failed_rules false When set to true, the logical plan
 datafusion.optimizer.top_down_join_key_reordering true When set to true, the physical plan optimizer will run a top down process to reorder the join keys
 datafusion.sql_parser.collect_spans false When set to true, the source locations relative to the original SQL query (i.e. [`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html)) will be collected and recorded in the logical plan nodes.
 datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB and Databricks.
+datafusion.sql_parser.enable_eliminate_subquery_sort true When set to true, optimizer will try to eliminate sort in subquery.
 datafusion.sql_parser.enable_ident_normalization true When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted)
 datafusion.sql_parser.enable_options_value_normalization false When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically.
 datafusion.sql_parser.map_varchar_to_utf8view false If true, `VARCHAR` is mapped to `Utf8View` during SQL planning. If false, `VARCHAR` is mapped to `Utf8`  during SQL planning. Default is false.

diff --git a/datafusion/sqllogictest/test_files/subquery_sort.slt b/datafusion/sqllogictest/test_files/subquery_sort.slt
@@ -176,3 +176,35 @@ b 5
 c 4
 d 1
 e 1
+
+statement ok
+set datafusion.sql_parser.enable_eliminate_subquery_sort=false
+
+query TT
+EXPLAIN SELECT c1 FROM (SELECT c1 FROM sink_table ORDER BY c2) AS ttt
+----
+logical_plan
+01)SubqueryAlias: ttt
+02)--Projection: sink_table.c1
+03)----Sort: sink_table.c2 ASC NULLS LAST
+04)------TableScan: sink_table projection=[c1, c2]
+physical_plan
+01)ProjectionExec: expr=[c1@0 as c1]
+02)--SortExec: expr=[c2@1 ASC NULLS LAST], preserve_partitioning=[false]
+03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2], file_type=csv, has_header=true
+
+query TT
+explain with t as (select c1 from sink_table order by c2 nulls last) select * from t;
+----
+logical_plan
+01)SubqueryAlias: t
+02)--Projection: sink_table.c1
+03)----Sort: sink_table.c2 ASC NULLS LAST
+04)------TableScan: sink_table projection=[c1, c2]
+physical_plan
+01)ProjectionExec: expr=[c1@0 as c1]
+02)--SortExec: expr=[c2@1 ASC NULLS LAST], preserve_partitioning=[false]
+03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2], file_type=csv, has_header=true
+
+statement ok
+set datafusion.sql_parser.enable_eliminate_subquery_sort=true
diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md
@@ -133,6 +133,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus
 | datafusion.sql_parser.map_varchar_to_utf8view                           | false                     | If true, `VARCHAR` is mapped to `Utf8View` during SQL planning. If false, `VARCHAR` is mapped to `Utf8` during SQL planning. Default is false.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
 | datafusion.sql_parser.collect_spans                                     | false                     | When set to true, the source locations relative to the original SQL query (i.e. [`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html)) will be collected and recorded in the logical plan nodes.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
 | datafusion.sql_parser.recursion_limit                                   | 50                        | Specifies the recursion depth limit when parsing complex SQL Queries                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+| datafusion.sql_parser.enable_eliminate_subquery_sort                    | true                      | When set to true, optimizer will try to eliminate sort in subquery.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
 | datafusion.format.safe                                                  | true                      | If set to `true` any formatting errors will be written to the output instead of being converted into a [`std::fmt::Error`]                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
 | datafusion.format.null                                                  |                           | Format string for nulls                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
 | datafusion.format.date_format                                           | %Y-%m-%d                  | Date format for date arrays                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |