@@ -25,7 +25,7 @@ use arrow::datatypes::SchemaRef;
2525use async_trait:: async_trait;
2626use datafusion_common:: Result ;
2727use datafusion_common:: { not_impl_err, Constraints , Statistics } ;
28- use datafusion_expr:: Expr ;
28+ use datafusion_expr:: { Expr , SortExpr } ;
2929
3030use datafusion_expr:: dml:: InsertOp ;
3131use datafusion_expr:: {
@@ -171,6 +171,41 @@ pub trait TableProvider: Debug + Sync + Send {
171171 limit : Option < usize > ,
172172 ) -> Result < Arc < dyn ExecutionPlan > > ;
173173
174+ /// Create an [`ExecutionPlan`] for scanning the table using structured arguments.
175+ ///
176+ /// This method uses [`ScanArgs`] to pass scan parameters in a structured way
177+ /// and returns a [`ScanResult`] containing the execution plan. This approach
178+ /// allows for extensible parameter passing and result handling.
179+ ///
180+ /// Table providers can override this method to take advantage of additional
181+ /// parameters like `preferred_ordering` that may not be available through
182+ /// other scan methods.
183+ ///
184+ /// # Arguments
185+ /// * `state` - The session state containing configuration and context
186+ /// * `args` - Structured scan arguments including projection, filters, limit, and ordering preferences
187+ ///
188+ /// # Returns
189+ /// A [`ScanResult`] containing the [`ExecutionPlan`] for scanning the table
190+ ///
191+ /// See [`Self::scan`] for detailed documentation about projection, filters, and limits.
192+ async fn scan_with_args (
193+ & self ,
194+ state : & dyn Session ,
195+ args : ScanArgs ,
196+ ) -> Result < ScanResult > {
197+ let ScanArgs {
198+ filters,
199+ projection,
200+ limit,
201+ } = args;
202+ let filters = filters. unwrap_or_default ( ) ;
203+ let plan = self
204+ . scan ( state, projection. as_ref ( ) , & filters, limit)
205+ . await ?;
206+ Ok ( ScanResult :: new ( plan) )
207+ }
208+
174209 /// Specify if DataFusion should provide filter expressions to the
175210 /// TableProvider to apply *during* the scan.
176211 ///
@@ -299,6 +334,119 @@ pub trait TableProvider: Debug + Sync + Send {
299334 }
300335}
301336
337+ /// Arguments for scanning a table with [`TableProvider::scan_with_args`].
338+ ///
339+ /// `ScanArgs` provides a structured way to pass scan parameters to table providers,
340+ /// replacing the multiple individual parameters used by [`TableProvider::scan`].
341+ /// This struct uses the builder pattern for convenient construction.
342+ ///
343+ /// # Examples
344+ ///
345+ /// ```
346+ /// # use datafusion_catalog::ScanArgs;
347+ /// # use datafusion_expr::Expr;
348+ /// let args = ScanArgs::default()
349+ /// .with_projection(Some(vec![0, 2, 4]))
350+ /// .with_limit(Some(1000));
351+ /// ```
352+ #[ derive( Debug , Clone , Default ) ]
353+ pub struct ScanArgs {
354+ filters : Option < Vec < Expr > > ,
355+ projection : Option < Vec < usize > > ,
356+ limit : Option < usize > ,
357+ }
358+
359+ impl ScanArgs {
360+ /// Set the column projection for the scan.
361+ ///
362+ /// The projection is a list of column indices from [`TableProvider::schema`]
363+ /// that should be included in the scan results. If `None`, all columns are included.
364+ ///
365+ /// # Arguments
366+ /// * `projection` - Optional list of column indices to project
367+ pub fn with_projection ( mut self , projection : Option < Vec < usize > > ) -> Self {
368+ self . projection = projection;
369+ self
370+ }
371+
372+ /// Get the column projection for the scan.
373+ ///
374+ /// Returns a cloned copy of the projection column indices, or `None` if
375+ /// no projection was specified (meaning all columns should be included).
376+ pub fn projection ( & self ) -> Option < Vec < usize > > {
377+ self . projection . clone ( )
378+ }
379+
380+ /// Set the filter expressions for the scan.
381+ ///
382+ /// Filters are boolean expressions that should be evaluated during the scan
383+ /// to reduce the number of rows returned. All expressions are combined with AND logic.
384+ /// Whether filters are actually pushed down depends on [`TableProvider::supports_filters_pushdown`].
385+ ///
386+ /// # Arguments
387+ /// * `filters` - Optional list of filter expressions
388+ pub fn with_filters ( mut self , filters : Option < Vec < Expr > > ) -> Self {
389+ self . filters = filters;
390+ self
391+ }
392+
393+ /// Get the filter expressions for the scan.
394+ ///
395+ /// Returns a reference to the filter expressions, or `None` if no filters were specified.
396+ pub fn filters ( & self ) -> Option < & [ Expr ] > {
397+ self . filters . as_deref ( )
398+ }
399+
400+ /// Set the maximum number of rows to return from the scan.
401+ ///
402+ /// If specified, the scan should return at most this many rows. This is typically
403+ /// used to optimize queries with `LIMIT` clauses.
404+ ///
405+ /// # Arguments
406+ /// * `limit` - Optional maximum number of rows to return
407+ pub fn with_limit ( mut self , limit : Option < usize > ) -> Self {
408+ self . limit = limit;
409+ self
410+ }
411+
412+ /// Get the maximum number of rows to return from the scan.
413+ ///
414+ /// Returns the row limit, or `None` if no limit was specified.
415+ pub fn limit ( & self ) -> Option < usize > {
416+ self . limit
417+ }
418+ }
419+
420+ /// Result of a table scan operation from [`TableProvider::scan_with_args`].
421+ ///
422+ /// `ScanResult` encapsulates the [`ExecutionPlan`] produced by a table scan,
423+ /// providing a typed return value instead of returning the plan directly.
424+ /// This allows for future extensibility of scan results without breaking
425+ /// the API.
426+ #[ derive( Debug , Clone ) ]
427+ pub struct ScanResult {
428+ /// The ExecutionPlan to run.
429+ plan : Arc < dyn ExecutionPlan > ,
430+ }
431+
432+ impl ScanResult {
433+ /// Create a new `ScanResult` with the given execution plan.
434+ ///
435+ /// # Arguments
436+ /// * `plan` - The execution plan that will perform the table scan
437+ pub fn new ( plan : Arc < dyn ExecutionPlan > ) -> Self {
438+ Self { plan }
439+ }
440+
441+ /// Get the execution plan for this scan result.
442+ ///
443+ /// Returns a cloned reference to the [`ExecutionPlan`] that will perform
444+ /// the actual table scanning and data retrieval.
445+ pub fn plan ( & self ) -> Arc < dyn ExecutionPlan > {
446+ Arc :: clone ( & self . plan )
447+ }
448+ }
449+
302450/// A factory which creates [`TableProvider`]s at runtime given a URL.
303451///
304452/// For example, this can be used to create a table "on the fly"
0 commit comments