Skip to content

Commit 7fa6378

Browse files
alambJefffrey
andauthored
Consolidate EliminateNestedUnion and EliminateOneUnion optimizer rules' (#18678)
## Which issue does this PR close? - Related to #17261 ## Rationale for this change Each time a LogicalPlan is rewritten to eliminate a Union, we traverse the entire plan tree and copy some non trivial parts of it Thus it is faster to plan when we have fewer passes over the plan tree the EliminateNestedUnion and EliminateOneUnion rules both do similar things, and the EliminateNestedUnion rule is very simple. So let's combine them into a single rule that does both things in one pass over the plan tree. ## What changes are included in this PR? Consolidate `EliminateNestedUnion` and `EliminateOneUnion` optimizer rules into a single pass ## Are these changes tested? Yes with existing tests I will also run planning benchmarks ## Are there any user-facing changes? No --------- Co-authored-by: Jeffrey Vo <[email protected]>
1 parent b05d811 commit 7fa6378

File tree

7 files changed

+53
-147
lines changed

7 files changed

+53
-147
lines changed

datafusion/optimizer/src/eliminate_one_union.rs

Lines changed: 0 additions & 121 deletions
This file was deleted.

datafusion/optimizer/src/lib.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,12 +51,17 @@ pub mod eliminate_filter;
5151
pub mod eliminate_group_by_constant;
5252
pub mod eliminate_join;
5353
pub mod eliminate_limit;
54-
pub mod eliminate_nested_union;
55-
pub mod eliminate_one_union;
54+
#[deprecated(since = "52.0.0", note = "Please use OptimizeUnions instead")]
55+
pub mod eliminate_nested_union {
56+
use crate::optimize_unions::OptimizeUnions;
57+
#[deprecated(since = "52.0.0", note = "Please use OptimizeUnions instead")]
58+
pub type EliminateNestedUnion = OptimizeUnions;
59+
}
5660
pub mod eliminate_outer_join;
5761
pub mod extract_equijoin_predicate;
5862
pub mod filter_null_join_keys;
5963
pub mod optimize_projections;
64+
pub mod optimize_unions;
6065
pub mod optimizer;
6166
pub mod propagate_empty_relation;
6267
pub mod push_down_filter;

datafusion/optimizer/src/eliminate_nested_union.rs renamed to datafusion/optimizer/src/optimize_unions.rs

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
//! [`EliminateNestedUnion`]: flattens nested `Union` to a single `Union`
18+
//! [`OptimizeUnions`]: removes `Union` nodes in the logical plan.
1919
use crate::optimizer::ApplyOrder;
2020
use crate::{OptimizerConfig, OptimizerRule};
2121
use datafusion_common::tree_node::Transformed;
@@ -26,19 +26,21 @@ use itertools::Itertools;
2626
use std::sync::Arc;
2727

2828
#[derive(Default, Debug)]
29-
/// An optimization rule that replaces nested unions with a single union.
30-
pub struct EliminateNestedUnion;
29+
/// An optimization rule that
30+
/// 1. replaces nested unions with a single union.
31+
/// 2. removes unions with a single input.
32+
pub struct OptimizeUnions;
3133

32-
impl EliminateNestedUnion {
34+
impl OptimizeUnions {
3335
#[allow(missing_docs)]
3436
pub fn new() -> Self {
3537
Self {}
3638
}
3739
}
3840

39-
impl OptimizerRule for EliminateNestedUnion {
41+
impl OptimizerRule for OptimizeUnions {
4042
fn name(&self) -> &str {
41-
"eliminate_nested_union"
43+
"optimize_unions"
4244
}
4345

4446
fn apply_order(&self) -> Option<ApplyOrder> {
@@ -55,6 +57,9 @@ impl OptimizerRule for EliminateNestedUnion {
5557
_config: &dyn OptimizerConfig,
5658
) -> Result<Transformed<LogicalPlan>> {
5759
match plan {
60+
LogicalPlan::Union(Union { mut inputs, .. }) if inputs.len() == 1 => Ok(
61+
Transformed::yes(Arc::unwrap_or_clone(inputs.pop().unwrap())),
62+
),
5863
LogicalPlan::Union(Union { inputs, schema }) => {
5964
let inputs = inputs
6065
.into_iter()
@@ -139,7 +144,7 @@ mod tests {
139144
let analyzed_plan = Analyzer::with_rules(vec![Arc::new(TypeCoercion::new())])
140145
.execute_and_check($plan, &options, |_, _| {})?;
141146
let optimizer_ctx = OptimizerContext::new().with_max_passes(1);
142-
let rules: Vec<Arc<dyn crate::OptimizerRule + Send + Sync>> = vec![Arc::new(EliminateNestedUnion::new())];
147+
let rules: Vec<Arc<dyn crate::OptimizerRule + Send + Sync>> = vec![Arc::new(OptimizeUnions::new())];
143148
assert_optimized_plan_eq_snapshot!(
144149
optimizer_ctx,
145150
rules,
@@ -420,4 +425,28 @@ mod tests {
420425
TableScan: table_1
421426
")
422427
}
428+
429+
#[test]
430+
fn eliminate_one_union() -> Result<()> {
431+
let plan = table_scan(Some("table"), &schema(), None)?.build()?;
432+
let schema = Arc::clone(plan.schema());
433+
// note it is not possible to create a single input union via
434+
// LogicalPlanBuilder so create it manually here
435+
let plan = LogicalPlan::Union(Union {
436+
inputs: vec![Arc::new(plan)],
437+
schema,
438+
});
439+
440+
// Note we can't use the same assert_optimized_plan_equal as creating a
441+
// single input union is not possible via LogicalPlanBuilder and other passes
442+
// throw errors / don't handle the schema correctly.
443+
assert_optimized_plan_eq_snapshot!(
444+
OptimizerContext::new().with_max_passes(1),
445+
vec![Arc::new(OptimizeUnions::new())],
446+
plan,
447+
@r"
448+
TableScan: table
449+
"
450+
)
451+
}
423452
}

datafusion/optimizer/src/optimizer.rs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,11 @@ use crate::eliminate_filter::EliminateFilter;
4141
use crate::eliminate_group_by_constant::EliminateGroupByConstant;
4242
use crate::eliminate_join::EliminateJoin;
4343
use crate::eliminate_limit::EliminateLimit;
44-
use crate::eliminate_nested_union::EliminateNestedUnion;
45-
use crate::eliminate_one_union::EliminateOneUnion;
4644
use crate::eliminate_outer_join::EliminateOuterJoin;
4745
use crate::extract_equijoin_predicate::ExtractEquijoinPredicate;
4846
use crate::filter_null_join_keys::FilterNullJoinKeys;
4947
use crate::optimize_projections::OptimizeProjections;
48+
use crate::optimize_unions::OptimizeUnions;
5049
use crate::plan_signature::LogicalPlanSignature;
5150
use crate::propagate_empty_relation::PropagateEmptyRelation;
5251
use crate::push_down_filter::PushDownFilter;
@@ -228,7 +227,7 @@ impl Optimizer {
228227
/// Create a new optimizer using the recommended list of rules
229228
pub fn new() -> Self {
230229
let rules: Vec<Arc<dyn OptimizerRule + Sync + Send>> = vec![
231-
Arc::new(EliminateNestedUnion::new()),
230+
Arc::new(OptimizeUnions::new()),
232231
Arc::new(SimplifyExpressions::new()),
233232
Arc::new(ReplaceDistinctWithAggregate::new()),
234233
Arc::new(EliminateJoin::new()),
@@ -241,8 +240,6 @@ impl Optimizer {
241240
Arc::new(EliminateCrossJoin::new()),
242241
Arc::new(EliminateLimit::new()),
243242
Arc::new(PropagateEmptyRelation::new()),
244-
// Must be after PropagateEmptyRelation
245-
Arc::new(EliminateOneUnion::new()),
246243
Arc::new(FilterNullJoinKeys::default()),
247244
Arc::new(EliminateOuterJoin::new()),
248245
// Filters can't be pushed down past Limits, we should do PushDownFilter after PushDownLimit

datafusion/optimizer/src/propagate_empty_relation.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ mod tests {
244244

245245
use crate::assert_optimized_plan_eq_snapshot;
246246
use crate::eliminate_filter::EliminateFilter;
247-
use crate::eliminate_nested_union::EliminateNestedUnion;
247+
use crate::optimize_unions::OptimizeUnions;
248248
use crate::test::{
249249
assert_optimized_plan_with_rules, test_table_scan, test_table_scan_fields,
250250
test_table_scan_with_name,
@@ -277,7 +277,7 @@ mod tests {
277277
assert_optimized_plan_with_rules(
278278
vec![
279279
Arc::new(EliminateFilter::new()),
280-
Arc::new(EliminateNestedUnion::new()),
280+
Arc::new(OptimizeUnions::new()),
281281
Arc::new(PropagateEmptyRelation::new()),
282282
],
283283
plan,

datafusion/proto/tests/cases/roundtrip_logical_plan.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ use datafusion::datasource::listing::{
2929
ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl,
3030
};
3131
use datafusion::execution::options::ArrowReadOptions;
32-
use datafusion::optimizer::eliminate_nested_union::EliminateNestedUnion;
32+
use datafusion::optimizer::optimize_unions::OptimizeUnions;
3333
use datafusion::optimizer::Optimizer;
3434
use datafusion_common::parsers::CompressionTypeVariant;
3535
use datafusion_functions_aggregate::sum::sum_distinct;
@@ -2744,7 +2744,7 @@ async fn roundtrip_union_query() -> Result<()> {
27442744
let logical_round_trip = logical_plan_from_bytes(&bytes, &ctx.task_ctx())?;
27452745
// proto deserialization only supports 2-way union, hence this plan has nested unions
27462746
// apply the flatten unions optimizer rule to be able to compare
2747-
let optimizer = Optimizer::with_rules(vec![Arc::new(EliminateNestedUnion::new())]);
2747+
let optimizer = Optimizer::with_rules(vec![Arc::new(OptimizeUnions::new())]);
27482748
let unnested = optimizer.optimize(logical_round_trip, &(ctx.state()), |_x, _y| {})?;
27492749
assert_eq!(
27502750
format!("{}", plan.display_indent_schema()),

datafusion/sqllogictest/test_files/explain.slt

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ initial_logical_plan
176176
logical_plan after resolve_grouping_function SAME TEXT AS ABOVE
177177
logical_plan after type_coercion SAME TEXT AS ABOVE
178178
analyzed_logical_plan SAME TEXT AS ABOVE
179-
logical_plan after eliminate_nested_union SAME TEXT AS ABOVE
179+
logical_plan after optimize_unions SAME TEXT AS ABOVE
180180
logical_plan after simplify_expressions SAME TEXT AS ABOVE
181181
logical_plan after replace_distinct_aggregate SAME TEXT AS ABOVE
182182
logical_plan after eliminate_join SAME TEXT AS ABOVE
@@ -189,7 +189,6 @@ logical_plan after eliminate_filter SAME TEXT AS ABOVE
189189
logical_plan after eliminate_cross_join SAME TEXT AS ABOVE
190190
logical_plan after eliminate_limit SAME TEXT AS ABOVE
191191
logical_plan after propagate_empty_relation SAME TEXT AS ABOVE
192-
logical_plan after eliminate_one_union SAME TEXT AS ABOVE
193192
logical_plan after filter_null_join_keys SAME TEXT AS ABOVE
194193
logical_plan after eliminate_outer_join SAME TEXT AS ABOVE
195194
logical_plan after push_down_limit SAME TEXT AS ABOVE
@@ -198,7 +197,7 @@ logical_plan after single_distinct_aggregation_to_group_by SAME TEXT AS ABOVE
198197
logical_plan after eliminate_group_by_constant SAME TEXT AS ABOVE
199198
logical_plan after common_sub_expression_eliminate SAME TEXT AS ABOVE
200199
logical_plan after optimize_projections TableScan: simple_explain_test projection=[a, b, c]
201-
logical_plan after eliminate_nested_union SAME TEXT AS ABOVE
200+
logical_plan after optimize_unions SAME TEXT AS ABOVE
202201
logical_plan after simplify_expressions SAME TEXT AS ABOVE
203202
logical_plan after replace_distinct_aggregate SAME TEXT AS ABOVE
204203
logical_plan after eliminate_join SAME TEXT AS ABOVE
@@ -211,7 +210,6 @@ logical_plan after eliminate_filter SAME TEXT AS ABOVE
211210
logical_plan after eliminate_cross_join SAME TEXT AS ABOVE
212211
logical_plan after eliminate_limit SAME TEXT AS ABOVE
213212
logical_plan after propagate_empty_relation SAME TEXT AS ABOVE
214-
logical_plan after eliminate_one_union SAME TEXT AS ABOVE
215213
logical_plan after filter_null_join_keys SAME TEXT AS ABOVE
216214
logical_plan after eliminate_outer_join SAME TEXT AS ABOVE
217215
logical_plan after push_down_limit SAME TEXT AS ABOVE
@@ -537,7 +535,7 @@ initial_logical_plan
537535
logical_plan after resolve_grouping_function SAME TEXT AS ABOVE
538536
logical_plan after type_coercion SAME TEXT AS ABOVE
539537
analyzed_logical_plan SAME TEXT AS ABOVE
540-
logical_plan after eliminate_nested_union SAME TEXT AS ABOVE
538+
logical_plan after optimize_unions SAME TEXT AS ABOVE
541539
logical_plan after simplify_expressions SAME TEXT AS ABOVE
542540
logical_plan after replace_distinct_aggregate SAME TEXT AS ABOVE
543541
logical_plan after eliminate_join SAME TEXT AS ABOVE
@@ -550,7 +548,6 @@ logical_plan after eliminate_filter SAME TEXT AS ABOVE
550548
logical_plan after eliminate_cross_join SAME TEXT AS ABOVE
551549
logical_plan after eliminate_limit SAME TEXT AS ABOVE
552550
logical_plan after propagate_empty_relation SAME TEXT AS ABOVE
553-
logical_plan after eliminate_one_union SAME TEXT AS ABOVE
554551
logical_plan after filter_null_join_keys SAME TEXT AS ABOVE
555552
logical_plan after eliminate_outer_join SAME TEXT AS ABOVE
556553
logical_plan after push_down_limit SAME TEXT AS ABOVE
@@ -559,7 +556,7 @@ logical_plan after single_distinct_aggregation_to_group_by SAME TEXT AS ABOVE
559556
logical_plan after eliminate_group_by_constant SAME TEXT AS ABOVE
560557
logical_plan after common_sub_expression_eliminate SAME TEXT AS ABOVE
561558
logical_plan after optimize_projections TableScan: simple_explain_test projection=[a, b, c]
562-
logical_plan after eliminate_nested_union SAME TEXT AS ABOVE
559+
logical_plan after optimize_unions SAME TEXT AS ABOVE
563560
logical_plan after simplify_expressions SAME TEXT AS ABOVE
564561
logical_plan after replace_distinct_aggregate SAME TEXT AS ABOVE
565562
logical_plan after eliminate_join SAME TEXT AS ABOVE
@@ -572,7 +569,6 @@ logical_plan after eliminate_filter SAME TEXT AS ABOVE
572569
logical_plan after eliminate_cross_join SAME TEXT AS ABOVE
573570
logical_plan after eliminate_limit SAME TEXT AS ABOVE
574571
logical_plan after propagate_empty_relation SAME TEXT AS ABOVE
575-
logical_plan after eliminate_one_union SAME TEXT AS ABOVE
576572
logical_plan after filter_null_join_keys SAME TEXT AS ABOVE
577573
logical_plan after eliminate_outer_join SAME TEXT AS ABOVE
578574
logical_plan after push_down_limit SAME TEXT AS ABOVE

0 commit comments

Comments
 (0)