@@ -53,8 +53,8 @@ use datafusion_common::display::ToStringifiedPlan;
5353use datafusion_common:: file_options:: file_type:: FileType ;
5454use datafusion_common:: {
5555 exec_err, get_target_functional_dependencies, internal_err, not_impl_err,
56- plan_datafusion_err, plan_err, Column , DFSchema , DFSchemaRef , DataFusionError ,
57- Result , ScalarValue , TableReference , ToDFSchema , UnnestOptions ,
56+ plan_datafusion_err, plan_err, Column , Constraints , DFSchema , DFSchemaRef ,
57+ DataFusionError , Result , ScalarValue , TableReference , ToDFSchema , UnnestOptions ,
5858} ;
5959use datafusion_expr_common:: type_coercion:: binary:: type_union_resolution;
6060
@@ -63,6 +63,26 @@ use indexmap::IndexSet;
6363/// Default table name for unnamed table
6464pub const UNNAMED_TABLE : & str = "?table?" ;
6565
66+ /// Options for [`LogicalPlanBuilder`]
67+ #[ derive( Default , Debug , Clone ) ]
68+ pub struct LogicalPlanBuilderOptions {
69+ /// Flag indicating whether the plan builder should add
70+ /// functionally dependent expressions as additional aggregation groupings.
71+ add_implicit_group_by_exprs : bool ,
72+ }
73+
74+ impl LogicalPlanBuilderOptions {
75+ pub fn new ( ) -> Self {
76+ Default :: default ( )
77+ }
78+
79+ /// Should the builder add functionally dependent expressions as additional aggregation groupings.
80+ pub fn with_add_implicit_group_by_exprs ( mut self , add : bool ) -> Self {
81+ self . add_implicit_group_by_exprs = add;
82+ self
83+ }
84+ }
85+
6686/// Builder for logical plans
6787///
6888/// # Example building a simple plan
@@ -103,19 +123,29 @@ pub const UNNAMED_TABLE: &str = "?table?";
103123#[ derive( Debug , Clone ) ]
104124pub struct LogicalPlanBuilder {
105125 plan : Arc < LogicalPlan > ,
126+ options : LogicalPlanBuilderOptions ,
106127}
107128
108129impl LogicalPlanBuilder {
109130 /// Create a builder from an existing plan
110131 pub fn new ( plan : LogicalPlan ) -> Self {
111132 Self {
112133 plan : Arc :: new ( plan) ,
134+ options : LogicalPlanBuilderOptions :: default ( ) ,
113135 }
114136 }
115137
116138 /// Create a builder from an existing plan
117139 pub fn new_from_arc ( plan : Arc < LogicalPlan > ) -> Self {
118- Self { plan }
140+ Self {
141+ plan,
142+ options : LogicalPlanBuilderOptions :: default ( ) ,
143+ }
144+ }
145+
146+ pub fn with_options ( mut self , options : LogicalPlanBuilderOptions ) -> Self {
147+ self . options = options;
148+ self
119149 }
120150
121151 /// Return the output schema of the plan build so far
@@ -1138,8 +1168,12 @@ impl LogicalPlanBuilder {
11381168 let group_expr = normalize_cols ( group_expr, & self . plan ) ?;
11391169 let aggr_expr = normalize_cols ( aggr_expr, & self . plan ) ?;
11401170
1141- let group_expr =
1142- add_group_by_exprs_from_dependencies ( group_expr, self . plan . schema ( ) ) ?;
1171+ let group_expr = if self . options . add_implicit_group_by_exprs {
1172+ add_group_by_exprs_from_dependencies ( group_expr, self . plan . schema ( ) ) ?
1173+ } else {
1174+ group_expr
1175+ } ;
1176+
11431177 Aggregate :: try_new ( self . plan , group_expr, aggr_expr)
11441178 . map ( LogicalPlan :: Aggregate )
11451179 . map ( Self :: new)
@@ -1550,6 +1584,7 @@ pub fn add_group_by_exprs_from_dependencies(
15501584 }
15511585 Ok ( group_expr)
15521586}
1587+
15531588/// Errors if one or more expressions have equal names.
15541589pub fn validate_unique_names < ' a > (
15551590 node_name : & str ,
@@ -1685,7 +1720,21 @@ pub fn table_scan_with_filter_and_fetch(
16851720
16861721pub fn table_source ( table_schema : & Schema ) -> Arc < dyn TableSource > {
16871722 let table_schema = Arc :: new ( table_schema. clone ( ) ) ;
1688- Arc :: new ( LogicalTableSource { table_schema } )
1723+ Arc :: new ( LogicalTableSource {
1724+ table_schema,
1725+ constraints : Default :: default ( ) ,
1726+ } )
1727+ }
1728+
1729+ pub fn table_source_with_constraints (
1730+ table_schema : & Schema ,
1731+ constraints : Constraints ,
1732+ ) -> Arc < dyn TableSource > {
1733+ let table_schema = Arc :: new ( table_schema. clone ( ) ) ;
1734+ Arc :: new ( LogicalTableSource {
1735+ table_schema,
1736+ constraints,
1737+ } )
16891738}
16901739
16911740/// Wrap projection for a plan, if the join keys contains normal expression.
@@ -1756,12 +1805,21 @@ pub fn wrap_projection_for_join_if_necessary(
17561805/// DefaultTableSource.
17571806pub struct LogicalTableSource {
17581807 table_schema : SchemaRef ,
1808+ constraints : Constraints ,
17591809}
17601810
17611811impl LogicalTableSource {
17621812 /// Create a new LogicalTableSource
17631813 pub fn new ( table_schema : SchemaRef ) -> Self {
1764- Self { table_schema }
1814+ Self {
1815+ table_schema,
1816+ constraints : Constraints :: default ( ) ,
1817+ }
1818+ }
1819+
1820+ pub fn with_constraints ( mut self , constraints : Constraints ) -> Self {
1821+ self . constraints = constraints;
1822+ self
17651823 }
17661824}
17671825
@@ -1774,6 +1832,10 @@ impl TableSource for LogicalTableSource {
17741832 Arc :: clone ( & self . table_schema )
17751833 }
17761834
1835+ fn constraints ( & self ) -> Option < & Constraints > {
1836+ Some ( & self . constraints )
1837+ }
1838+
17771839 fn supports_filters_pushdown (
17781840 & self ,
17791841 filters : & [ & Expr ] ,
@@ -2023,12 +2085,12 @@ pub fn unnest_with_options(
20232085
20242086#[ cfg( test) ]
20252087mod tests {
2026-
20272088 use super :: * ;
20282089 use crate :: logical_plan:: StringifiedPlan ;
20292090 use crate :: { col, expr, expr_fn:: exists, in_subquery, lit, scalar_subquery} ;
20302091
2031- use datafusion_common:: { RecursionUnnestOption , SchemaError } ;
2092+ use crate :: test:: function_stub:: sum;
2093+ use datafusion_common:: { Constraint , RecursionUnnestOption , SchemaError } ;
20322094
20332095 #[ test]
20342096 fn plan_builder_simple ( ) -> Result < ( ) > {
@@ -2575,4 +2637,45 @@ mod tests {
25752637
25762638 Ok ( ( ) )
25772639 }
2640+
2641+ #[ test]
2642+ fn plan_builder_aggregate_without_implicit_group_by_exprs ( ) -> Result < ( ) > {
2643+ let constraints =
2644+ Constraints :: new_unverified ( vec ! [ Constraint :: PrimaryKey ( vec![ 0 ] ) ] ) ;
2645+ let table_source = table_source_with_constraints ( & employee_schema ( ) , constraints) ;
2646+
2647+ let plan =
2648+ LogicalPlanBuilder :: scan ( "employee_csv" , table_source, Some ( vec ! [ 0 , 3 , 4 ] ) ) ?
2649+ . aggregate ( vec ! [ col( "id" ) ] , vec ! [ sum( col( "salary" ) ) ] ) ?
2650+ . build ( ) ?;
2651+
2652+ let expected =
2653+ "Aggregate: groupBy=[[employee_csv.id]], aggr=[[sum(employee_csv.salary)]]\
2654+ \n TableScan: employee_csv projection=[id, state, salary]";
2655+ assert_eq ! ( expected, format!( "{plan}" ) ) ;
2656+
2657+ Ok ( ( ) )
2658+ }
2659+
2660+ #[ test]
2661+ fn plan_builder_aggregate_with_implicit_group_by_exprs ( ) -> Result < ( ) > {
2662+ let constraints =
2663+ Constraints :: new_unverified ( vec ! [ Constraint :: PrimaryKey ( vec![ 0 ] ) ] ) ;
2664+ let table_source = table_source_with_constraints ( & employee_schema ( ) , constraints) ;
2665+
2666+ let options =
2667+ LogicalPlanBuilderOptions :: new ( ) . with_add_implicit_group_by_exprs ( true ) ;
2668+ let plan =
2669+ LogicalPlanBuilder :: scan ( "employee_csv" , table_source, Some ( vec ! [ 0 , 3 , 4 ] ) ) ?
2670+ . with_options ( options)
2671+ . aggregate ( vec ! [ col( "id" ) ] , vec ! [ sum( col( "salary" ) ) ] ) ?
2672+ . build ( ) ?;
2673+
2674+ let expected =
2675+ "Aggregate: groupBy=[[employee_csv.id, employee_csv.state, employee_csv.salary]], aggr=[[sum(employee_csv.salary)]]\
2676+ \n TableScan: employee_csv projection=[id, state, salary]";
2677+ assert_eq ! ( expected, format!( "{plan}" ) ) ;
2678+
2679+ Ok ( ( ) )
2680+ }
25782681}
0 commit comments