@@ -26,7 +26,7 @@ use crate::error::DataFusionError;
26
26
use crate :: logical_plan:: dfschema:: DFSchemaRef ;
27
27
use crate :: sql:: parser:: FileType ;
28
28
use arrow:: datatypes:: { DataType , Field , Schema , SchemaRef } ;
29
- use datafusion_common:: DFSchema ;
29
+ use datafusion_common:: { DFField , DFSchema } ;
30
30
use std:: fmt:: Formatter ;
31
31
use std:: {
32
32
collections:: HashSet ,
@@ -267,14 +267,37 @@ pub struct Limit {
267
267
/// Evaluates correlated sub queries
268
268
#[ derive( Clone ) ]
269
269
pub struct Subquery {
270
- /// The list of sub queries
270
+ /// The list of sub queries (SubqueryNode)
271
271
pub subqueries : Vec < LogicalPlan > ,
272
272
/// The incoming logical plan
273
273
pub input : Arc < LogicalPlan > ,
274
274
/// The schema description of the output
275
275
pub schema : DFSchemaRef ,
276
276
}
277
277
278
+ /// Subquery node defines single subquery with its type
279
+ #[ derive( Clone ) ]
280
+ pub struct SubqueryNode {
281
+ /// The logical plan of subquery
282
+ pub input : Arc < LogicalPlan > ,
283
+ /// The subquery type
284
+ pub typ : SubqueryType ,
285
+ /// The schema description of the output
286
+ pub schema : DFSchemaRef ,
287
+ }
288
+
289
+ /// Subquery type
290
+ #[ derive( Debug , Clone , Copy , PartialEq , Eq , PartialOrd ) ]
291
+ pub enum SubqueryType {
292
+ /// Scalar (SELECT, WHERE) evaluating to one value
293
+ Scalar ,
294
+ /// EXISTS(...) evaluating to true if at least one row was produced
295
+ Exists ,
296
+ /// ANY(...) / ALL(...)
297
+ AnyAll ,
298
+ // [NOT] IN(...) is not defined as it is implicitly evaluated as ANY = (...) / ALL <> (...)
299
+ }
300
+
278
301
impl Subquery {
279
302
/// Merge schema of main input and correlated subquery columns
280
303
pub fn merged_schema ( input : & LogicalPlan , subqueries : & [ LogicalPlan ] ) -> DFSchema {
@@ -284,6 +307,72 @@ impl Subquery {
284
307
res
285
308
} )
286
309
}
310
+
311
+ /// Transform DataFusion schema according to subquery type
312
+ pub fn transform_dfschema ( schema : & DFSchema , typ : SubqueryType ) -> DFSchema {
313
+ match typ {
314
+ SubqueryType :: Scalar => schema. clone ( ) ,
315
+ SubqueryType :: Exists | SubqueryType :: AnyAll => {
316
+ let new_fields = schema
317
+ . fields ( )
318
+ . iter ( )
319
+ . map ( |field| {
320
+ let new_field = Subquery :: transform_field ( field. field ( ) , typ) ;
321
+ if let Some ( qualifier) = field. qualifier ( ) {
322
+ DFField :: from_qualified ( qualifier, new_field)
323
+ } else {
324
+ DFField :: from ( new_field)
325
+ }
326
+ } )
327
+ . collect ( ) ;
328
+ DFSchema :: new_with_metadata ( new_fields, schema. metadata ( ) . clone ( ) )
329
+ . unwrap ( )
330
+ }
331
+ }
332
+ }
333
+
334
+ /// Transform Arrow field according to subquery type
335
+ pub fn transform_field ( field : & Field , typ : SubqueryType ) -> Field {
336
+ match typ {
337
+ SubqueryType :: Scalar => field. clone ( ) ,
338
+ SubqueryType :: Exists => Field :: new ( field. name ( ) , DataType :: Boolean , false ) ,
339
+ // ANY/ALL subquery converts subquery result rows into a list
340
+ // and uses existing code evaluating ANY with a list to evaluate the result
341
+ SubqueryType :: AnyAll => {
342
+ let item = Field :: new_dict (
343
+ "item" ,
344
+ field. data_type ( ) . clone ( ) ,
345
+ true ,
346
+ field. dict_id ( ) . unwrap_or ( 0 ) ,
347
+ field. dict_is_ordered ( ) . unwrap_or ( false ) ,
348
+ ) ;
349
+ Field :: new ( field. name ( ) , DataType :: List ( Box :: new ( item) ) , false )
350
+ }
351
+ }
352
+ }
353
+ }
354
+
355
+ impl SubqueryNode {
356
+ /// Creates a new SubqueryNode evaluating the schema based on subquery type
357
+ pub fn new ( input : LogicalPlan , typ : SubqueryType ) -> Self {
358
+ let schema = Subquery :: transform_dfschema ( input. schema ( ) , typ) ;
359
+ Self {
360
+ input : Arc :: new ( input) ,
361
+ typ,
362
+ schema : Arc :: new ( schema) ,
363
+ }
364
+ }
365
+ }
366
+
367
+ impl Display for SubqueryType {
368
+ fn fmt ( & self , f : & mut Formatter < ' _ > ) -> fmt:: Result {
369
+ let name = match self {
370
+ Self :: Scalar => "Scalar" ,
371
+ Self :: Exists => "Exists" ,
372
+ Self :: AnyAll => "AnyAll" ,
373
+ } ;
374
+ write ! ( f, "{}" , name)
375
+ }
287
376
}
288
377
289
378
/// Values expression. See
@@ -402,6 +491,8 @@ pub enum LogicalPlan {
402
491
Limit ( Limit ) ,
403
492
/// Evaluates correlated sub queries
404
493
Subquery ( Subquery ) ,
494
+ /// Single subquery node with subquery type
495
+ SubqueryNode ( SubqueryNode ) ,
405
496
/// Creates an external table.
406
497
CreateExternalTable ( CreateExternalTable ) ,
407
498
/// Creates an in memory table.
@@ -439,6 +530,7 @@ impl LogicalPlan {
439
530
} ) => projected_schema,
440
531
LogicalPlan :: Projection ( Projection { schema, .. } ) => schema,
441
532
LogicalPlan :: Subquery ( Subquery { schema, .. } ) => schema,
533
+ LogicalPlan :: SubqueryNode ( SubqueryNode { schema, .. } ) => schema,
442
534
LogicalPlan :: Filter ( Filter { input, .. } ) => input. schema ( ) ,
443
535
LogicalPlan :: Distinct ( Distinct { input } ) => input. schema ( ) ,
444
536
LogicalPlan :: Window ( Window { schema, .. } ) => schema,
@@ -498,7 +590,8 @@ impl LogicalPlan {
498
590
schemas. insert ( 0 , schema) ;
499
591
schemas
500
592
}
501
- LogicalPlan :: Union ( Union { schema, .. } ) => {
593
+ LogicalPlan :: Union ( Union { schema, .. } )
594
+ | LogicalPlan :: SubqueryNode ( SubqueryNode { schema, .. } ) => {
502
595
vec ! [ schema]
503
596
}
504
597
LogicalPlan :: Extension ( extension) => vec ! [ extension. node. schema( ) ] ,
@@ -569,6 +662,7 @@ impl LogicalPlan {
569
662
| LogicalPlan :: Analyze { .. }
570
663
| LogicalPlan :: Explain { .. }
571
664
| LogicalPlan :: Subquery ( _)
665
+ | LogicalPlan :: SubqueryNode ( _)
572
666
| LogicalPlan :: Union ( _)
573
667
| LogicalPlan :: Distinct ( _) => {
574
668
vec ! [ ]
@@ -587,6 +681,7 @@ impl LogicalPlan {
587
681
. into_iter ( )
588
682
. chain ( subqueries. iter ( ) )
589
683
. collect ( ) ,
684
+ LogicalPlan :: SubqueryNode ( SubqueryNode { input, .. } ) => vec ! [ input] ,
590
685
LogicalPlan :: Filter ( Filter { input, .. } ) => vec ! [ input] ,
591
686
LogicalPlan :: Repartition ( Repartition { input, .. } ) => vec ! [ input] ,
592
687
LogicalPlan :: Window ( Window { input, .. } ) => vec ! [ input] ,
@@ -735,6 +830,9 @@ impl LogicalPlan {
735
830
}
736
831
true
737
832
}
833
+ LogicalPlan :: SubqueryNode ( SubqueryNode { input, .. } ) => {
834
+ input. accept ( visitor) ?
835
+ }
738
836
LogicalPlan :: Filter ( Filter { input, .. } ) => input. accept ( visitor) ?,
739
837
LogicalPlan :: Repartition ( Repartition { input, .. } ) => {
740
838
input. accept ( visitor) ?
@@ -1064,6 +1162,9 @@ impl LogicalPlan {
1064
1162
Ok ( ( ) )
1065
1163
}
1066
1164
LogicalPlan :: Subquery ( Subquery { .. } ) => write ! ( f, "Subquery" ) ,
1165
+ LogicalPlan :: SubqueryNode ( SubqueryNode { typ, .. } ) => {
1166
+ write ! ( f, "SubqueryNode: type={:?}" , typ)
1167
+ }
1067
1168
LogicalPlan :: Filter ( Filter {
1068
1169
predicate : ref expr,
1069
1170
..
0 commit comments