diff --git a/NAMESPACE b/NAMESPACE index 437ef7677..3567af045 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,7 +8,7 @@ S3method(add_count,duckplyr_df) S3method(anti_join,duckplyr_df) S3method(arrange,duckplyr_df) S3method(as.data.frame,duckplyr_df) -S3method(as.data.frame,funneled_duckplyr_df) +S3method(as.data.frame,frugal_duckplyr_df) S3method(as_duckdb_tibble,data.frame) S3method(as_duckdb_tibble,default) S3method(as_duckdb_tibble,duckplyr_df) @@ -19,7 +19,7 @@ S3method(as_duckdb_tibble,tbl_duckdb_connection) S3method(as_tibble,duckplyr_df) S3method(auto_copy,duckplyr_df) S3method(collect,duckplyr_df) -S3method(collect,funneled_duckplyr_df) +S3method(collect,frugal_duckplyr_df) S3method(compute,duckplyr_df) S3method(count,duckplyr_df) S3method(cross_join,duckplyr_df) diff --git a/R/add_count.R b/R/add_count.R index f787b8f23..235b096ec 100644 --- a/R/add_count.R +++ b/R/add_count.R @@ -10,7 +10,7 @@ add_count.duckplyr_df <- function(x, ..., wt = NULL, sort = FALSE, name = NULL, ) # dplyr forward - check_funneled(x, duckplyr_error) + check_prudence(x, duckplyr_error) add_count <- dplyr$add_count.data.frame out <- add_count(x, ..., wt = {{ wt }}, sort = sort, name = name, .drop = .drop) diff --git a/R/anti_join.R b/R/anti_join.R index 9fecab641..85dccb9bf 100644 --- a/R/anti_join.R +++ b/R/anti_join.R @@ -19,7 +19,7 @@ anti_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, ..., na_matches ) # dplyr forward - check_funneled(x, duckplyr_error) + check_prudence(x, duckplyr_error) anti_join <- dplyr$anti_join.data.frame out <- anti_join(x, y, by, copy = FALSE, ..., na_matches = na_matches) diff --git a/R/arrange.R b/R/arrange.R index df5ac406b..b3a2fec14 100644 --- a/R/arrange.R +++ b/R/arrange.R @@ -52,7 +52,7 @@ arrange.duckplyr_df <- function(.data, ..., .by_group = FALSE, .locale = NULL) { ) # dplyr forward - check_funneled(.data, duckplyr_error) + check_prudence(.data, duckplyr_error) arrange <- dplyr$arrange.data.frame out <- arrange(.data, ..., .by_group = .by_group, .locale = .locale) diff --git a/R/compute-rd.R b/R/compute-rd.R index 26e33dff5..e77bdfceb 100644 --- a/R/compute-rd.R +++ b/R/compute-rd.R @@ -1,7 +1,7 @@ #' @title Compute results #' #' @description This is a method for the [dplyr::compute()] generic. -#' For a (funneled) duckplyr frame, +#' For a (frugal) duckplyr frame, #' `compute()` executes a query but stores it in a (temporary) table, #' or in a Parquet or CSV file. #' The result is a duckplyr frame that can be used with subsequent dplyr verbs. @@ -11,7 +11,7 @@ #' @param name The name of the table to store the result in. #' @param schema_name The schema to store the result in, defaults to the current schema. #' @param temporary Set to `FALSE` to store the result in a permanent table. -#' @inheritSection duckdb_tibble Funneling +#' @inheritSection duckdb_tibble Prudence #' @examples #' library(duckplyr) #' df <- duckdb_tibble(x = c(1, 2)) diff --git a/R/compute.R b/R/compute.R index 84222b39b..8f1fc246d 100644 --- a/R/compute.R +++ b/R/compute.R @@ -4,13 +4,13 @@ compute.duckplyr_df <- function( x, ..., - funnel = NULL, + collect = NULL, name = NULL, schema_name = NULL, temporary = TRUE ) { - if (is.null(funnel)) { - funnel <- get_funnel_duckplyr_df(x) + if (is.null(collect)) { + collect <- get_collect_duckplyr_df(x) } if (is.null(schema_name)) { schema_name <- "" @@ -35,8 +35,8 @@ compute.duckplyr_df <- function( out <- duckplyr_reconstruct(out_rel, x) - if (get_funnel_duckplyr_df(out) != funnel) { - out <- as_duckdb_tibble(out, funnel = funnel) + if (get_collect_duckplyr_df(out) != collect) { + out <- as_duckdb_tibble(out, collect = collect) } return(out) @@ -44,7 +44,7 @@ compute.duckplyr_df <- function( ) # dplyr forward - check_funneled(x, duckplyr_error) + check_prudence(x, duckplyr_error) compute <- dplyr$compute.data.frame out <- compute(x, ...) diff --git a/R/compute_file.R b/R/compute_file.R index d8d1c1be5..45a5a1179 100644 --- a/R/compute_file.R +++ b/R/compute_file.R @@ -1,7 +1,7 @@ #' @title Compute results to a file #' #' @description -#' These functions apply to (funneled) duckplyr frames. +#' These functions apply to (frugal) duckplyr frames. #' They executes a query and stores the results in a flat file. #' The result is a duckplyr frame that can be used with subsequent dplyr verbs. #' @@ -9,7 +9,7 @@ #' #' @inheritParams rlang::args_dots_empty #' @inheritParams compute.duckplyr_df -#' @inheritSection duckdb_tibble Funneling +#' @inheritSection duckdb_tibble Prudence #' @param path The path to store the result in. #' @param options A list of additional options to pass to create the storage format, #' see @@ -26,15 +26,15 @@ #' explain(df) #' @seealso [compute.duckplyr_df()], [dplyr::collect()] #' @name compute_file -compute_parquet <- function(x, path, ..., funnel = NULL, options = NULL) { +compute_parquet <- function(x, path, ..., collect = NULL, options = NULL) { check_dots_empty() if (is.null(options)) { options <- list() } - if (is.null(funnel)) { - funnel <- get_funnel_duckplyr_df(x) + if (is.null(collect)) { + collect <- get_collect_duckplyr_df(x) } rel <- duckdb_rel_from_df(x) @@ -46,7 +46,7 @@ compute_parquet <- function(x, path, ..., funnel = NULL, options = NULL) { path <- file.path(path, "**", "**.parquet") } - read_parquet_duckdb(path, funnel = funnel) + read_parquet_duckdb(path, collect = collect) } #' compute_csv() @@ -54,15 +54,15 @@ compute_parquet <- function(x, path, ..., funnel = NULL, options = NULL) { #' `compute_csv()` creates a CSV file. #' @rdname compute_file #' @export -compute_csv <- function(x, path, ..., funnel = NULL, options = NULL) { +compute_csv <- function(x, path, ..., collect = NULL, options = NULL) { check_dots_empty() if (is.null(options)) { options <- list() } - if (is.null(funnel)) { - funnel <- get_funnel_duckplyr_df(x) + if (is.null(collect)) { + collect <- get_collect_duckplyr_df(x) } rel <- duckdb_rel_from_df(x) @@ -74,5 +74,5 @@ compute_csv <- function(x, path, ..., funnel = NULL, options = NULL) { path <- file.path(path, "**", "**.csv") } - read_csv_duckdb(path, funnel = funnel) + read_csv_duckdb(path, collect = collect) } diff --git a/R/count.R b/R/count.R index eec706310..c04831396 100644 --- a/R/count.R +++ b/R/count.R @@ -59,7 +59,7 @@ count.duckplyr_df <- function(x, ..., wt = NULL, sort = FALSE, name = NULL, .dro # out <- count(x_df, !!!quos, wt = {{ wt }}, sort = sort, name = name, .drop = .drop) # dplyr forward - check_funneled(x, duckplyr_error) + check_prudence(x, duckplyr_error) count <- dplyr$count.data.frame out <- count(x, ..., wt = {{ wt }}, sort = sort, name = name, .drop = .drop) diff --git a/R/cross_join.R b/R/cross_join.R index a64200874..e83e80feb 100644 --- a/R/cross_join.R +++ b/R/cross_join.R @@ -10,7 +10,7 @@ cross_join.duckplyr_df <- function(x, y, ..., copy = FALSE, suffix = c(".x", ".y ) # dplyr forward - check_funneled(x, duckplyr_error) + check_prudence(x, duckplyr_error) cross_join <- dplyr$cross_join.data.frame out <- cross_join(x, y, ..., copy = copy, suffix = suffix) diff --git a/R/distinct.R b/R/distinct.R index 022991a21..1614e6291 100644 --- a/R/distinct.R +++ b/R/distinct.R @@ -69,7 +69,7 @@ distinct.duckplyr_df <- function(.data, ..., .keep_all = FALSE) { ) # dplyr forward - check_funneled(.data, duckplyr_error) + check_prudence(.data, duckplyr_error) distinct <- dplyr$distinct.data.frame out <- distinct(.data, ..., .keep_all = .keep_all) diff --git a/R/do.R b/R/do.R index 33e2553a1..98784ff9c 100644 --- a/R/do.R +++ b/R/do.R @@ -10,7 +10,7 @@ do.duckplyr_df <- function(.data, ...) { ) # dplyr forward - check_funneled(.data, duckplyr_error) + check_prudence(.data, duckplyr_error) do <- dplyr$do.data.frame out <- do(.data, ...) diff --git a/R/ducktbl.R b/R/ducktbl.R index 9ad57a475..4d36523a8 100644 --- a/R/ducktbl.R +++ b/R/ducktbl.R @@ -7,10 +7,10 @@ #' For such objects, #' dplyr verbs such as [mutate()], [select()] or [filter()] will use DuckDB. #' -#' `duckdb_tibble()` works like [tibble()], returning an "unfunneled" duckplyr data frame by default. -#' See the "Funneling" section below. +#' `duckdb_tibble()` works like [tibble()], returning an "lavish" duckplyr data frame by default. +#' See the "Prudence" section below. #' -#' @section Funneling: +#' @section Prudence: #' Data frames backed by duckplyr, with class `"duckplyr_df"`, #' behave as regular data frames in almost all respects. #' In particular, direct column access like `df$x`, @@ -18,7 +18,7 @@ #' Conceptually, duckplyr frames are "eager": from a user's perspective, #' they behave like regular data frames. #' Under the hood, two key differences provide improved performance and usability: -#' lazy materialization and funneling. +#' lazy materialization and prudence. #' #' For a duckplyr frame that is the result of a dplyr operation, #' accessing column data or retrieving the number of rows will trigger a computation @@ -32,9 +32,9 @@ #' Being both "eager" and "lazy" at the same time introduces a challenge: #' it is too easy to accidentally trigger computation, #' which may be prohibitive if an intermediate result is too large. -#' This is where funneling comes in. +#' This is where prudence comes in. #' -#' - For unfunneled duckplyr frames, the underlying DuckDB computation is carried out +#' - For lavish duckplyr frames, the underlying DuckDB computation is carried out #' upon the first request. #' Once the results are computed, they are cached and subsequent requests are fast. #' This is a good choice for small to medium-sized data, @@ -42,7 +42,7 @@ #' at any stage. #' This is the default for `duckdb_tibble()` and `as_duckdb_tibble()`. #' -#' - For funneled duckplyr frames, accessing a column or requesting the number of rows +#' - For frugal duckplyr frames, accessing a column or requesting the number of rows #' triggers an error, either unconditionally, or if the result exceeds a certain size. #' This is a good choice for large data sets where the cost of materializing the data #' may be prohibitive due to size or computation time, @@ -50,35 +50,35 @@ #' The default for the ingestion functions like [read_parquet_duckdb()] #' is to limit the result size to one million cells (values in the resulting data frame). #' -#' Funneled duckplyr frames behave like [`dtplyr`'s lazy frames](https://dtplyr.tidyverse.org/reference/lazy_dt.html), +#' Frugal duckplyr frames behave like [`dtplyr`'s lazy frames](https://dtplyr.tidyverse.org/reference/lazy_dt.html), #' or dbplyr's lazy frames: #' the computation only starts when you **explicitly** request it with a "collect" #' function. -#' In dtplyr and dbplyr, there are no unfunneled frames: collection always needs to be +#' In dtplyr and dbplyr, there are no lavish frames: collection always needs to be #' explicit. #' -#' A funneled duckplyr frame can be converted to an unfunneled one with `as_duckdb_tibble(funnel = "open")`. +#' A frugal duckplyr frame can be converted to an lavish one with `as_duckdb_tibble(collect = "any_size")`. #' The [collect.duckplyr_df()] method triggers computation and converts to a plain tibble. #' Other useful methods include [compute_file()] for storing results in a file, #' and [compute.duckplyr_df()] for storing results in temporary storage on disk. #' -#' Beyond safety regarding memory usage, funneled frames also allow you +#' Beyond safety regarding memory usage, frugal frames also allow you #' to check that all operations are supported by DuckDB: -#' for a funneled frame with `funnel = "closed"`, fallbacks to dplyr are not possible. +#' for a frugal frame with `collect = "always_manual"`, fallbacks to dplyr are not possible. #' As a reminder, computing via DuckDB is currently not always possible, #' see `vignette("limits")` for the supported operations. #' In such cases, the original dplyr implementation is used, see [fallback] for details. #' As the original dplyr implementation accesses columns directly, #' the data must be materialized before a fallback can be executed. -#' This means that automatic fallback is only possible for "unfunneled" duckplyr frames, -#' while for "funneled" duckplyr frames, one of the aforementioned collection methods must be used first. +#' This means that automatic fallback is only possible for "lavish" duckplyr frames, +#' while for "frugal" duckplyr frames, one of the aforementioned collection methods must be used first. #' #' #' @param ... For `duckdb_tibble()`, passed on to [tibble()]. #' For `as_duckdb_tibble()`, passed on to methods. -#' @param .funnel,funnel Either a logical: -#' - Set to `TRUE` to return a funneled data frame. -#' - Set to `FALSE` to return an unfunneled data frame. +#' @param .collect,collect Either a logical: +#' - Set to `TRUE` to return a frugal data frame. +#' - Set to `FALSE` to return an lavish data frame. #' #' Or a named vector with at least one of #' - `cells` (numeric) @@ -90,11 +90,11 @@ #' If `cells` is specified but not `rows`, `rows` is `Inf`. #' If `rows` is specified but not `cells`, `cells` is `Inf`. #' -#' The default is to inherit the funneling of the input. -#' see the "Funneling" section. +#' The default is to inherit the prudence of the input. +#' see the "Prudence" section. #' #' @return For `duckdb_tibble()` and `as_duckdb_tibble()`, an object with the following classes: -#' - `"funneled_duckplyr_df"` if `.funnel` is `TRUE` +#' - `"frugal_duckplyr_df"` if `.collect` is `TRUE` #' - `"duckplyr_df"` #' - Classes of a [tibble] #' @@ -108,14 +108,14 @@ #' #' x$a #' -#' y <- duckdb_tibble(a = 1, .funnel = "closed") +#' y <- duckdb_tibble(a = 1, .collect = "always_manual") #' y #' try(length(y$a)) #' length(collect(y)$a) #' @export -duckdb_tibble <- function(..., .funnel = "open") { +duckdb_tibble <- function(..., .collect = "any_size") { out <- tibble::tibble(...) - as_duckdb_tibble(out, funnel = .funnel) + as_duckdb_tibble(out, collect = .collect) } #' as_duckdb_tibble @@ -126,62 +126,62 @@ duckdb_tibble <- function(..., .funnel = "open") { #' @param x The object to convert or to test. #' @rdname duckdb_tibble #' @export -as_duckdb_tibble <- function(x, ..., funnel = "open") { - # Handle the funnel arg in the generic, only the other args will be dispatched +as_duckdb_tibble <- function(x, ..., collect = "any_size") { + # Handle the collect arg in the generic, only the other args will be dispatched as_duckdb_tibble <- function(x, ...) { UseMethod("as_duckdb_tibble") } - funnel_parsed <- funnel_parse(funnel) + collect_parsed <- collect_parse(collect) out <- as_duckdb_tibble(x, ...) - if (funnel_parsed$funnel == "closed") { - as_funneled_duckplyr_df( + if (collect_parsed$collect == "always_manual") { + as_frugal_duckplyr_df( out, - funnel_parsed$allow_materialization, - funnel_parsed$n_rows, - funnel_parsed$n_cells + collect_parsed$allow_materialization, + collect_parsed$n_rows, + collect_parsed$n_cells ) } else { - as_unfunneled_duckplyr_df(out) + as_lavish_duckplyr_df(out) } } -funnel_parse <- function(funnel, call = caller_env()) { +collect_parse <- function(collect, call = caller_env()) { n_rows <- Inf n_cells <- Inf - if (is.numeric(funnel)) { - if (is.null(names(funnel))) { - cli::cli_abort("{.arg funnel} must have names if it is a named vector.", call = call) + if (is.numeric(collect)) { + if (is.null(names(collect))) { + cli::cli_abort("{.arg collect} must have names if it is a named vector.", call = call) } - extra_names <- setdiff(names(funnel), c("rows", "cells")) + extra_names <- setdiff(names(collect), c("rows", "cells")) if (length(extra_names) > 0) { - cli::cli_abort("Unknown name in {.arg funnel}: {extra_names[[1]]}", call = call) + cli::cli_abort("Unknown name in {.arg collect}: {extra_names[[1]]}", call = call) } - if ("rows" %in% names(funnel)) { - n_rows <- funnel[["rows"]] + if ("rows" %in% names(collect)) { + n_rows <- collect[["rows"]] if (is.na(n_rows) || n_rows < 0) { - cli::cli_abort("The {.val rows} component of {.arg funnel} must be a non-negative integer", call = call) + cli::cli_abort("The {.val rows} component of {.arg collect} must be a non-negative integer", call = call) } } - if ("cells" %in% names(funnel)) { - n_cells <- funnel[["cells"]] + if ("cells" %in% names(collect)) { + n_cells <- collect[["cells"]] if (is.na(n_cells) || n_cells < 0) { - cli::cli_abort("The {.val cells} component of {.arg funnel} must be a non-negative integer", call = call) + cli::cli_abort("The {.val cells} component of {.arg collect} must be a non-negative integer", call = call) } } allow_materialization <- is.finite(n_rows) || is.finite(n_cells) - funnel <- "closed" - } else if (!is.character(funnel)) { - cli::cli_abort("{.arg funnel} must be an unnamed character vector or a named numeric vector", call = call) + collect <- "always_manual" + } else if (!is.character(collect)) { + cli::cli_abort("{.arg collect} must be an unnamed character vector or a named numeric vector", call = call) } else { - allow_materialization <- !identical(funnel, "closed") + allow_materialization <- !identical(collect, "always_manual") } list( - funnel = funnel, + collect = collect, allow_materialization = allow_materialization, n_rows = n_rows, n_cells = n_cells @@ -195,7 +195,7 @@ as_duckdb_tibble.tbl_duckdb_connection <- function(x, ...) { con <- dbplyr::remote_con(x) sql <- dbplyr::remote_query(x) - read_sql_duckdb(sql, funnel = "closed", con = con) + read_sql_duckdb(sql, collect = "always_manual", con = con) } #' @export @@ -270,9 +270,9 @@ is_duckdb_tibble <- function(x) { } -#' @param funnel Only adds the class, does not recreate the relation object! +#' @param collect Only adds the class, does not recreate the relation object! #' @noRd -new_duckdb_tibble <- function(x, class = NULL, funnel = "open", error_call = caller_env()) { +new_duckdb_tibble <- function(x, class = NULL, collect = "any_size", error_call = caller_env()) { if (is.null(class)) { class <- c("tbl_df", "tbl", "data.frame") } @@ -284,7 +284,7 @@ new_duckdb_tibble <- function(x, class = NULL, funnel = "open", error_call = cal } class(x) <- unique(c( - if (!identical(funnel, "open")) "funneled_duckplyr_df", + if (!identical(collect, "any_size")) "frugal_duckplyr_df", "duckplyr_df", class )) diff --git a/R/filter.R b/R/filter.R index 15418d939..f7afa7948 100644 --- a/R/filter.R +++ b/R/filter.R @@ -36,7 +36,7 @@ filter.duckplyr_df <- function(.data, ..., .by = NULL, .preserve = FALSE) { ) # dplyr forward - check_funneled(.data, duckplyr_error) + check_prudence(.data, duckplyr_error) filter <- dplyr$filter.data.frame out <- filter(.data, ..., .by = {{ .by }}, .preserve = .preserve) diff --git a/R/full_join.R b/R/full_join.R index 31bb03154..40578e11a 100644 --- a/R/full_join.R +++ b/R/full_join.R @@ -23,7 +23,7 @@ full_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x" ) # dplyr forward - check_funneled(x, duckplyr_error) + check_prudence(x, duckplyr_error) full_join <- dplyr$full_join.data.frame out <- full_join(x, y, by, copy = FALSE, suffix, ..., keep = keep, na_matches = na_matches, multiple = multiple, relationship = relationship) diff --git a/R/funnel.R b/R/funnel.R index c782844a3..ea8ebd94a 100644 --- a/R/funnel.R +++ b/R/funnel.R @@ -1,4 +1,4 @@ -as_funneled_duckplyr_df <- function(x, allow_materialization, n_rows, n_cells) { +as_frugal_duckplyr_df <- function(x, allow_materialization, n_rows, n_cells) { rel <- duckdb_rel_from_df(x) out <- rel_to_df( @@ -9,23 +9,23 @@ as_funneled_duckplyr_df <- function(x, allow_materialization, n_rows, n_cells) { ) out <- dplyr_reconstruct(out, x) - add_funneled_duckplyr_df_class(out, n_rows, n_cells) + add_frugal_duckplyr_df_class(out, n_rows, n_cells) } -add_funneled_duckplyr_df_class <- function(x, n_rows, n_cells) { - class(x) <- unique(c("funneled_duckplyr_df", class(x))) +add_frugal_duckplyr_df_class <- function(x, n_rows, n_cells) { + class(x) <- unique(c("frugal_duckplyr_df", class(x))) - funnel <- c( + collect <- c( rows = if (is.finite(n_rows)) n_rows, cells = if (is.finite(n_cells)) n_cells ) - attr(x, "funnel") <- funnel + attr(x, "collect") <- collect x } -as_unfunneled_duckplyr_df <- function(x) { - if (!inherits(x, "funneled_duckplyr_df")) { +as_lavish_duckplyr_df <- function(x) { + if (!inherits(x, "frugal_duckplyr_df")) { return(x) } @@ -34,46 +34,46 @@ as_unfunneled_duckplyr_df <- function(x) { out <- rel_to_df(rel, allow_materialization = TRUE) out <- dplyr_reconstruct(out, x) - remove_funneled_duckplyr_df_class(out) + remove_frugal_duckplyr_df_class(out) } -is_funneled_duckplyr_df <- function(x) { - inherits(x, "funneled_duckplyr_df") +is_frugal_duckplyr_df <- function(x) { + inherits(x, "frugal_duckplyr_df") } -get_funnel_duckplyr_df <- function(x) { - if (!is_funneled_duckplyr_df(x)) { - return("open") +get_collect_duckplyr_df <- function(x) { + if (!is_frugal_duckplyr_df(x)) { + return("any_size") } - funnel <- attr(x, "funnel") - if (is.null(funnel)) { - return("closed") + collect <- attr(x, "collect") + if (is.null(collect)) { + return("always_manual") } - funnel + collect } -remove_funneled_duckplyr_df_class <- function(x) { - class(x) <- setdiff(class(x), "funneled_duckplyr_df") - attr(x, "funnel") <- NULL +remove_frugal_duckplyr_df_class <- function(x) { + class(x) <- setdiff(class(x), "frugal_duckplyr_df") + attr(x, "collect") <- NULL x } duckplyr_reconstruct <- function(rel, template) { - funnel <- get_funnel_duckplyr_df(template) - funnel_parsed <- funnel_parse(funnel) + collect <- get_collect_duckplyr_df(template) + collect_parsed <- collect_parse(collect) out <- rel_to_df( rel, - allow_materialization = funnel_parsed$allow_materialization, - n_rows = funnel_parsed$n_rows, - n_cells = funnel_parsed$n_cells + allow_materialization = collect_parsed$allow_materialization, + n_rows = collect_parsed$n_rows, + n_cells = collect_parsed$n_cells ) dplyr_reconstruct(out, template) } #' @export -collect.funneled_duckplyr_df <- function(x, ...) { +collect.frugal_duckplyr_df <- function(x, ...) { # Do nothing if already materialized if (is.null(duckdb$rel_from_altrep_df(x, allow_materialized = FALSE))) { out <- x @@ -83,7 +83,7 @@ collect.funneled_duckplyr_df <- function(x, ...) { out <- dplyr_reconstruct(out, x) } - out <- remove_funneled_duckplyr_df_class(out) + out <- remove_frugal_duckplyr_df_class(out) collect(out) } @@ -95,7 +95,7 @@ as.data.frame.duckplyr_df <- function(x, row.names = NULL, optional = FALSE, ... } #' @export -as.data.frame.funneled_duckplyr_df <- function(x, row.names = NULL, optional = FALSE, ...) { +as.data.frame.frugal_duckplyr_df <- function(x, row.names = NULL, optional = FALSE, ...) { out <- collect(x) as.data.frame(out, row.names = row.names, optional = optional, ...) } diff --git a/R/group_by.R b/R/group_by.R index c695621ba..804169b05 100644 --- a/R/group_by.R +++ b/R/group_by.R @@ -11,7 +11,7 @@ group_by.duckplyr_df <- function(.data, ..., .add = FALSE, .drop = group_by_drop ) # dplyr forward - check_funneled(.data, duckplyr_error) + check_prudence(.data, duckplyr_error) group_by <- dplyr$group_by.data.frame out <- group_by(.data, ..., .add = .add, .drop = .drop) diff --git a/R/group_indices.R b/R/group_indices.R index 8c0d5a7e3..413b94537 100644 --- a/R/group_indices.R +++ b/R/group_indices.R @@ -11,7 +11,7 @@ group_indices.duckplyr_df <- function(.data, ...) { ) # dplyr forward - check_funneled(.data, duckplyr_error) + check_prudence(.data, duckplyr_error) group_indices <- dplyr$group_indices.data.frame out <- group_indices(.data, ...) diff --git a/R/group_keys.R b/R/group_keys.R index d3e66f924..0e6a1301f 100644 --- a/R/group_keys.R +++ b/R/group_keys.R @@ -11,7 +11,7 @@ group_keys.duckplyr_df <- function(.tbl, ...) { ) # dplyr forward - check_funneled(.tbl, duckplyr_error) + check_prudence(.tbl, duckplyr_error) group_keys <- dplyr$group_keys.data.frame out <- group_keys(.tbl, ...) diff --git a/R/group_map.R b/R/group_map.R index e4dd9e4c1..e8cb4840a 100644 --- a/R/group_map.R +++ b/R/group_map.R @@ -16,7 +16,7 @@ group_map.duckplyr_df <- function(.data, .f, ..., .keep = FALSE, keep = deprecat ) # dplyr forward - check_funneled(.data, duckplyr_error) + check_prudence(.data, duckplyr_error) group_map <- dplyr$group_map.data.frame out <- group_map(.data, .f, ..., .keep = .keep) diff --git a/R/group_modify.R b/R/group_modify.R index f9605e795..1169b465a 100644 --- a/R/group_modify.R +++ b/R/group_modify.R @@ -16,7 +16,7 @@ group_modify.duckplyr_df <- function(.data, .f, ..., .keep = FALSE, keep = depre ) # dplyr forward - check_funneled(.data, duckplyr_error) + check_prudence(.data, duckplyr_error) group_modify <- dplyr$group_modify.data.frame out <- group_modify(.data, .f, ..., .keep = .keep) diff --git a/R/group_nest.R b/R/group_nest.R index 2bd16c083..43b930167 100644 --- a/R/group_nest.R +++ b/R/group_nest.R @@ -11,7 +11,7 @@ group_nest.duckplyr_df <- function(.tbl, ..., .key = "data", keep = FALSE) { ) # dplyr forward - check_funneled(.tbl, duckplyr_error) + check_prudence(.tbl, duckplyr_error) group_nest <- dplyr$group_nest.data.frame out <- group_nest(.tbl, ..., .key = .key, keep = keep) diff --git a/R/group_size.R b/R/group_size.R index a09d228ad..01088472b 100644 --- a/R/group_size.R +++ b/R/group_size.R @@ -11,7 +11,7 @@ group_size.duckplyr_df <- function(x) { ) # dplyr forward - check_funneled(x, duckplyr_error) + check_prudence(x, duckplyr_error) group_size <- dplyr$group_size.data.frame out <- group_size(x) diff --git a/R/group_split.R b/R/group_split.R index 94a15eb07..90fd211f5 100644 --- a/R/group_split.R +++ b/R/group_split.R @@ -16,7 +16,7 @@ group_split.duckplyr_df <- function(.tbl, ..., .keep = TRUE, keep = deprecated() ) # dplyr forward - check_funneled(.tbl, duckplyr_error) + check_prudence(.tbl, duckplyr_error) group_split <- dplyr$group_split.data.frame out <- group_split(.tbl, ..., .keep = .keep) diff --git a/R/group_trim.R b/R/group_trim.R index 16071e58e..7459e771d 100644 --- a/R/group_trim.R +++ b/R/group_trim.R @@ -11,7 +11,7 @@ group_trim.duckplyr_df <- function(.tbl, .drop = group_by_drop_default(.tbl)) { ) # dplyr forward - check_funneled(.tbl, duckplyr_error) + check_prudence(.tbl, duckplyr_error) group_trim <- dplyr$group_trim.data.frame out <- group_trim(.tbl, .drop) diff --git a/R/groups.R b/R/groups.R index 1e4812e0b..63ca12561 100644 --- a/R/groups.R +++ b/R/groups.R @@ -11,7 +11,7 @@ groups.duckplyr_df <- function(x) { ) # dplyr forward - check_funneled(x, duckplyr_error) + check_prudence(x, duckplyr_error) groups <- dplyr$groups.data.frame out <- groups(x) diff --git a/R/inner_join.R b/R/inner_join.R index 24fbbb71c..29bc68699 100644 --- a/R/inner_join.R +++ b/R/inner_join.R @@ -25,7 +25,7 @@ inner_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x ) # dplyr forward - check_funneled(x, duckplyr_error) + check_prudence(x, duckplyr_error) inner_join <- dplyr$inner_join.data.frame out <- inner_join(x, y, by, copy = FALSE, suffix, ..., keep = keep, na_matches = na_matches, multiple = multiple, unmatched = unmatched, relationship = relationship) diff --git a/R/intersect.R b/R/intersect.R index b6f7d053e..a546ac181 100644 --- a/R/intersect.R +++ b/R/intersect.R @@ -44,7 +44,7 @@ intersect.duckplyr_df <- function(x, y, ...) { ) # dplyr forward - check_funneled(x, duckplyr_error) + check_prudence(x, duckplyr_error) intersect <- dplyr$intersect.data.frame out <- intersect(x, y, ...) diff --git a/R/io2.R b/R/io2.R index 08d54c149..8dcddcf04 100644 --- a/R/io2.R +++ b/R/io2.R @@ -13,10 +13,10 @@ NULL #' #' @rdname read_file_duckdb #' @export -read_parquet_duckdb <- function(path, ..., funnel = c(cells = 1e6), options = list()) { +read_parquet_duckdb <- function(path, ..., collect = c(cells = 1e6), options = list()) { check_dots_empty() - read_file_duckdb(path, "read_parquet", funnel = funnel, options = options) + read_file_duckdb(path, "read_parquet", collect = collect, options = options) } #' @description @@ -41,8 +41,8 @@ read_parquet_duckdb <- function(path, ..., funnel = c(cells = 1e6), options = li #' # Materialize explicitly #' collect(df)$a #' -#' # Automatic materialization with funnel = "open" -#' df <- read_csv_duckdb(path, funnel = "open") +#' # Automatic materialization with collect = "any_size" +#' df <- read_csv_duckdb(path, collect = "any_size") #' df$a #' #' # Specify column types @@ -50,10 +50,10 @@ read_parquet_duckdb <- function(path, ..., funnel = c(cells = 1e6), options = li #' path, #' options = list(delim = ",", types = list(c("DOUBLE", "VARCHAR"))) #' ) -read_csv_duckdb <- function(path, ..., funnel = c(cells = 1e6), options = list()) { +read_csv_duckdb <- function(path, ..., collect = c(cells = 1e6), options = list()) { check_dots_empty() - read_file_duckdb(path, "read_csv_auto", funnel = funnel, options = options) + read_file_duckdb(path, "read_csv_auto", collect = collect, options = options) } #' @description @@ -71,10 +71,10 @@ read_csv_duckdb <- function(path, ..., funnel = c(cells = 1e6), options = list() #' db_exec("INSTALL json") #' db_exec("LOAD json") #' read_json_duckdb(path) -read_json_duckdb <- function(path, ..., funnel = c(cells = 1e6), options = list()) { +read_json_duckdb <- function(path, ..., collect = c(cells = 1e6), options = list()) { check_dots_empty() - read_file_duckdb(path, "read_json", funnel = funnel, options = options) + read_file_duckdb(path, "read_json", collect = collect, options = options) } #' @description @@ -85,8 +85,8 @@ read_json_duckdb <- function(path, ..., funnel = c(cells = 1e6), options = list( #' pass a wildcard or a character vector to the `path` argument, #' #' @details -#' By default, a funneled duckplyr frame, with a limit of one million cells, is created. -#' See the "Funneling" section in [duckdb_tibble()] for details. +#' By default, a frugal duckplyr frame, with a limit of one million cells, is created. +#' See the "Prudence" section in [duckdb_tibble()] for details. #' #' @inheritParams rlang::args_dots_empty #' @@ -94,9 +94,9 @@ read_json_duckdb <- function(path, ..., funnel = c(cells = 1e6), options = list( #' @param table_function The name of a table-valued #' DuckDB function such as `"read_parquet"`, #' `"read_csv"`, `"read_csv_auto"` or `"read_json"`. -#' @param funnel Logical, whether to create a funneled duckplyr frame. -#' By default, a funneled duckplyr frame is created. -#' See the "Funneling" section in [duckdb_tibble()] for details. +#' @param collect Logical, whether to create a frugal duckplyr frame. +#' By default, a frugal duckplyr frame is created. +#' See the "Prudence" section in [duckdb_tibble()] for details. #' @param options Arguments to the DuckDB function #' indicated by `table_function`. #' @@ -108,7 +108,7 @@ read_file_duckdb <- function( path, table_function, ..., - funnel = c(cells = 1e6), + collect = c(cells = 1e6), options = list() ) { check_dots_empty() @@ -121,10 +121,10 @@ read_file_duckdb <- function( path <- list(path) } - duckfun(table_function, c(list(path), options), funnel = funnel) + duckfun(table_function, c(list(path), options), collect = collect) } -duckfun <- function(table_function, args, ..., funnel) { +duckfun <- function(table_function, args, ..., collect) { if (!is.list(args)) { cli::cli_abort("{.arg args} must be a list.") } @@ -148,12 +148,12 @@ duckfun <- function(table_function, args, ..., funnel) { meta_rel_register_file(rel, table_function, path, options) - # Start with funnel, to avoid unwanted materialization + # Start with collect, to avoid unwanted materialization df <- duckdb$rel_to_altrep(rel, allow_materialization = FALSE) - out <- new_duckdb_tibble(df, funnel = "closed") + out <- new_duckdb_tibble(df, collect = "always_manual") - if (!identical(funnel, "closed")) { - out <- as_duckdb_tibble(out, funnel = funnel) + if (!identical(collect, "always_manual")) { + out <- as_duckdb_tibble(out, collect = collect) } out diff --git a/R/left_join.R b/R/left_join.R index 2d0b0d31d..f171a8054 100644 --- a/R/left_join.R +++ b/R/left_join.R @@ -26,7 +26,7 @@ left_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x" ) # dplyr forward - check_funneled(x, duckplyr_error) + check_prudence(x, duckplyr_error) left_join <- dplyr$left_join.data.frame out <- left_join(x, y, by, copy = FALSE, suffix, ..., keep = keep, na_matches = na_matches, multiple = multiple, unmatched = unmatched, relationship = relationship) diff --git a/R/mutate.R b/R/mutate.R index 256d3966c..1d11d92a8 100644 --- a/R/mutate.R +++ b/R/mutate.R @@ -105,7 +105,7 @@ mutate.duckplyr_df <- function(.data, ..., .by = NULL, .keep = c("all", "used", ) # dplyr forward - check_funneled(.data, duckplyr_error) + check_prudence(.data, duckplyr_error) mutate <- dplyr$mutate.data.frame out <- mutate(.data, ..., .by = {{ .by }}, .keep = .keep, .before = {{ .before }}, .after = {{ .after }}) diff --git a/R/n_groups.R b/R/n_groups.R index ad5811a33..0cfb92d9e 100644 --- a/R/n_groups.R +++ b/R/n_groups.R @@ -11,7 +11,7 @@ n_groups.duckplyr_df <- function(x) { ) # dplyr forward - check_funneled(x, duckplyr_error) + check_prudence(x, duckplyr_error) n_groups <- dplyr$n_groups.data.frame out <- n_groups(x) diff --git a/R/nest_by.R b/R/nest_by.R index 2a1fb9cb0..0efb1c00f 100644 --- a/R/nest_by.R +++ b/R/nest_by.R @@ -10,7 +10,7 @@ nest_by.duckplyr_df <- function(.data, ..., .key = "data", .keep = FALSE) { ) # dplyr forward - check_funneled(.data, duckplyr_error) + check_prudence(.data, duckplyr_error) nest_by <- dplyr$nest_by.data.frame out <- nest_by(.data, ..., .key = .key, .keep = .keep) diff --git a/R/nest_join.R b/R/nest_join.R index e8c5712e5..f1f28d19c 100644 --- a/R/nest_join.R +++ b/R/nest_join.R @@ -20,7 +20,7 @@ nest_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, keep = NULL, na ) # dplyr forward - check_funneled(x, duckplyr_error) + check_prudence(x, duckplyr_error) x_df <- x class(x_df) <- setdiff(class(x_df), "duckplyr_df") diff --git a/R/print.R b/R/print.R index bf22a9327..fd796827b 100644 --- a/R/print.R +++ b/R/print.R @@ -4,7 +4,7 @@ tbl_sum.duckplyr_df <- function(x) { c("A duckplyr data frame" = cli::pluralize("{length(x)} variable{?s}")) } -# dim.funneled_duckplyr_df is not called, special dispatch +# dim.frugal_duckplyr_df is not called, special dispatch #' @importFrom pillar tbl_nrow #' @export diff --git a/R/pull.R b/R/pull.R index a37e7ba76..69d1129a8 100644 --- a/R/pull.R +++ b/R/pull.R @@ -31,7 +31,7 @@ pull.duckplyr_df <- function(.data, var = -1, name = NULL, ...) { ) # dplyr forward - check_funneled(.data, duckplyr_error) + check_prudence(.data, duckplyr_error) pull <- dplyr$pull.data.frame out <- pull(.data, {{ var }}, {{ name }}, ...) diff --git a/R/reframe.R b/R/reframe.R index 2ae434899..d1f1881cb 100644 --- a/R/reframe.R +++ b/R/reframe.R @@ -10,7 +10,7 @@ reframe.duckplyr_df <- function(.data, ..., .by = NULL) { ) # dplyr forward - check_funneled(.data, duckplyr_error) + check_prudence(.data, duckplyr_error) reframe <- dplyr$reframe.data.frame out <- reframe(.data, ..., .by = {{ .by }}) diff --git a/R/relational.R b/R/relational.R index 675374e66..9835d0f50 100644 --- a/R/relational.R +++ b/R/relational.R @@ -123,16 +123,16 @@ new_failing_mask <- function(names_data) { #' @param duckplyr_error Return value from rel_try() #' @noRd -check_funneled <- function(x, duckplyr_error, call = caller_env()) { +check_prudence <- function(x, duckplyr_error, call = caller_env()) { msg <- tryCatch(nrow(x), error = conditionMessage) if (is.character(msg)) { duckplyr_error_msg <- if (is.character(duckplyr_error)) duckplyr_error duckplyr_error_parent <- if (is_condition(duckplyr_error)) duckplyr_error cli::cli_abort(parent = duckplyr_error_parent, call = call, c( - "This operation cannot be carried out by DuckDB, and the input is a funneled duckplyr frame.", + "This operation cannot be carried out by DuckDB, and the input is a frugal duckplyr frame.", "*" = duckplyr_error_msg, - "i" = 'Use {.code compute(funnel = "open")} to materialize to temporary storage and continue with {.pkg duckplyr}.', - "i" = 'See {.run vignette("funnel")} for other options.' + "i" = 'Use {.code compute(collect = "any_size")} to materialize to temporary storage and continue with {.pkg duckplyr}.', + "i" = 'See {.run vignette("collect")} for other options.' )) } } diff --git a/R/relocate.R b/R/relocate.R index c39d714fd..e89e6be87 100644 --- a/R/relocate.R +++ b/R/relocate.R @@ -29,7 +29,7 @@ relocate.duckplyr_df <- function(.data, ..., .before = NULL, .after = NULL) { ) # dplyr forward - check_funneled(.data, duckplyr_error) + check_prudence(.data, duckplyr_error) relocate <- dplyr$relocate.data.frame out <- relocate(.data, ..., .before = {{ .before }}, .after = {{ .after }}) diff --git a/R/rename.R b/R/rename.R index 0e18f8577..98aa9eaea 100644 --- a/R/rename.R +++ b/R/rename.R @@ -27,7 +27,7 @@ rename.duckplyr_df <- function(.data, ...) { ) # dplyr forward - check_funneled(.data, duckplyr_error) + check_prudence(.data, duckplyr_error) rename <- dplyr$rename.data.frame out <- rename(.data, ...) diff --git a/R/rename_with.R b/R/rename_with.R index d3d4d8c22..76e339021 100644 --- a/R/rename_with.R +++ b/R/rename_with.R @@ -10,7 +10,7 @@ rename_with.duckplyr_df <- function(.data, .fn, .cols = everything(), ...) { ) # dplyr forward - check_funneled(.data, duckplyr_error) + check_prudence(.data, duckplyr_error) rename_with <- dplyr$rename_with.data.frame out <- rename_with(.data, .fn, {{ .cols }}, ...) diff --git a/R/right_join.R b/R/right_join.R index ae8073839..38ed32b76 100644 --- a/R/right_join.R +++ b/R/right_join.R @@ -25,7 +25,7 @@ right_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x ) # dplyr forward - check_funneled(x, duckplyr_error) + check_prudence(x, duckplyr_error) right_join <- dplyr$right_join.data.frame out <- right_join(x, y, by, copy = FALSE, suffix, ..., keep = keep, na_matches = na_matches, multiple = multiple, unmatched = unmatched, relationship = relationship) diff --git a/R/rows_append.R b/R/rows_append.R index a65efc453..177c08bf3 100644 --- a/R/rows_append.R +++ b/R/rows_append.R @@ -10,7 +10,7 @@ rows_append.duckplyr_df <- function(x, y, ..., copy = FALSE, in_place = FALSE) { ) # dplyr forward - check_funneled(x, duckplyr_error) + check_prudence(x, duckplyr_error) rows_append <- dplyr$rows_append.data.frame out <- rows_append(x, y, ..., copy = copy, in_place = in_place) diff --git a/R/rows_delete.R b/R/rows_delete.R index db11bf2a3..d6321d495 100644 --- a/R/rows_delete.R +++ b/R/rows_delete.R @@ -10,7 +10,7 @@ rows_delete.duckplyr_df <- function(x, y, by = NULL, ..., unmatched = c("error", ) # dplyr forward - check_funneled(x, duckplyr_error) + check_prudence(x, duckplyr_error) rows_delete <- dplyr$rows_delete.data.frame out <- rows_delete(x, y, by, ..., unmatched = unmatched, copy = copy, in_place = in_place) diff --git a/R/rows_insert.R b/R/rows_insert.R index eee8d8696..de2ee482f 100644 --- a/R/rows_insert.R +++ b/R/rows_insert.R @@ -10,7 +10,7 @@ rows_insert.duckplyr_df <- function(x, y, by = NULL, ..., conflict = c("error", ) # dplyr forward - check_funneled(x, duckplyr_error) + check_prudence(x, duckplyr_error) rows_insert <- dplyr$rows_insert.data.frame out <- rows_insert(x, y, by, ..., conflict = conflict, copy = copy, in_place = in_place) diff --git a/R/rows_patch.R b/R/rows_patch.R index db34a7adc..08a8d221d 100644 --- a/R/rows_patch.R +++ b/R/rows_patch.R @@ -10,7 +10,7 @@ rows_patch.duckplyr_df <- function(x, y, by = NULL, ..., unmatched = c("error", ) # dplyr forward - check_funneled(x, duckplyr_error) + check_prudence(x, duckplyr_error) rows_patch <- dplyr$rows_patch.data.frame out <- rows_patch(x, y, by, ..., unmatched = unmatched, copy = copy, in_place = in_place) diff --git a/R/rows_update.R b/R/rows_update.R index 78be3b07b..234705a9a 100644 --- a/R/rows_update.R +++ b/R/rows_update.R @@ -10,7 +10,7 @@ rows_update.duckplyr_df <- function(x, y, by = NULL, ..., unmatched = c("error", ) # dplyr forward - check_funneled(x, duckplyr_error) + check_prudence(x, duckplyr_error) rows_update <- dplyr$rows_update.data.frame out <- rows_update(x, y, by, ..., unmatched = unmatched, copy = copy, in_place = in_place) diff --git a/R/rows_upsert.R b/R/rows_upsert.R index 8dded4c18..55b6cc860 100644 --- a/R/rows_upsert.R +++ b/R/rows_upsert.R @@ -10,7 +10,7 @@ rows_upsert.duckplyr_df <- function(x, y, by = NULL, ..., copy = FALSE, in_place ) # dplyr forward - check_funneled(x, duckplyr_error) + check_prudence(x, duckplyr_error) rows_upsert <- dplyr$rows_upsert.data.frame out <- rows_upsert(x, y, by, ..., copy = copy, in_place = in_place) diff --git a/R/rowwise.R b/R/rowwise.R index 1a8a73908..0fa20a240 100644 --- a/R/rowwise.R +++ b/R/rowwise.R @@ -11,7 +11,7 @@ rowwise.duckplyr_df <- function(data, ...) { ) # dplyr forward - check_funneled(data, duckplyr_error) + check_prudence(data, duckplyr_error) rowwise <- dplyr$rowwise.data.frame out <- rowwise(data, ...) diff --git a/R/select.R b/R/select.R index 25c9db738..ed0b4dede 100644 --- a/R/select.R +++ b/R/select.R @@ -33,7 +33,7 @@ select.duckplyr_df <- function(.data, ...) { # dplyr forward - check_funneled(.data, duckplyr_error) + check_prudence(.data, duckplyr_error) select <- dplyr$select.data.frame out <- select(.data, ...) diff --git a/R/semi_join.R b/R/semi_join.R index 9fde67091..af5ae5cba 100644 --- a/R/semi_join.R +++ b/R/semi_join.R @@ -19,7 +19,7 @@ semi_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, ..., na_matches ) # dplyr forward - check_funneled(x, duckplyr_error) + check_prudence(x, duckplyr_error) semi_join <- dplyr$semi_join.data.frame out <- semi_join(x, y, by, copy = FALSE, ..., na_matches = na_matches) diff --git a/R/setdiff.R b/R/setdiff.R index 73c39c40e..a3eba1842 100644 --- a/R/setdiff.R +++ b/R/setdiff.R @@ -44,7 +44,7 @@ setdiff.duckplyr_df <- function(x, y, ...) { ) # dplyr forward - check_funneled(x, duckplyr_error) + check_prudence(x, duckplyr_error) setdiff <- dplyr$setdiff.data.frame out <- setdiff(x, y, ...) diff --git a/R/setequal.R b/R/setequal.R index 257ed6aea..1ec78f919 100644 --- a/R/setequal.R +++ b/R/setequal.R @@ -10,7 +10,7 @@ setequal.duckplyr_df <- function(x, y, ...) { ) # dplyr forward - check_funneled(x, duckplyr_error) + check_prudence(x, duckplyr_error) setequal <- dplyr$setequal.data.frame out <- setequal(x, y, ...) diff --git a/R/slice.R b/R/slice.R index 2af9506da..a45aff0bf 100644 --- a/R/slice.R +++ b/R/slice.R @@ -10,7 +10,7 @@ slice.duckplyr_df <- function(.data, ..., .by = NULL, .preserve = FALSE) { ) # dplyr forward - check_funneled(.data, duckplyr_error) + check_prudence(.data, duckplyr_error) slice <- dplyr$slice.data.frame out <- slice(.data, ..., .by = {{ .by }}, .preserve = .preserve) diff --git a/R/slice_head.R b/R/slice_head.R index 7c2cca5c7..e0df328fb 100644 --- a/R/slice_head.R +++ b/R/slice_head.R @@ -10,7 +10,7 @@ slice_head.duckplyr_df <- function(.data, ..., n, prop, by = NULL) { ) # dplyr forward - check_funneled(.data, duckplyr_error) + check_prudence(.data, duckplyr_error) slice_head <- dplyr$slice_head.data.frame out <- slice_head(.data, ..., n = n, prop = prop, by = {{ by }}) diff --git a/R/slice_sample.R b/R/slice_sample.R index a5362e894..4c3e5c7d4 100644 --- a/R/slice_sample.R +++ b/R/slice_sample.R @@ -10,7 +10,7 @@ slice_sample.duckplyr_df <- function(.data, ..., n, prop, by = NULL, weight_by = ) # dplyr forward - check_funneled(.data, duckplyr_error) + check_prudence(.data, duckplyr_error) slice_sample <- dplyr$slice_sample.data.frame out <- slice_sample(.data, ..., n = n, prop = prop, by = {{ by }}, weight_by = {{ weight_by }}, replace = replace) diff --git a/R/slice_tail.R b/R/slice_tail.R index 418b037e1..134ee6290 100644 --- a/R/slice_tail.R +++ b/R/slice_tail.R @@ -10,7 +10,7 @@ slice_tail.duckplyr_df <- function(.data, ..., n, prop, by = NULL) { ) # dplyr forward - check_funneled(.data, duckplyr_error) + check_prudence(.data, duckplyr_error) slice_tail <- dplyr$slice_tail.data.frame out <- slice_tail(.data, ..., n = n, prop = prop, by = {{ by }}) diff --git a/R/sql.R b/R/sql.R index a85de000c..f31c2923b 100644 --- a/R/sql.R +++ b/R/sql.R @@ -18,7 +18,7 @@ #' @export #' @examples #' read_sql_duckdb("FROM duckdb_settings()") -read_sql_duckdb <- function(sql, ..., funnel = c(cells = 1e6), con = NULL) { +read_sql_duckdb <- function(sql, ..., collect = c(cells = 1e6), con = NULL) { if (!is_string(sql)) { cli::cli_abort("{.arg sql} must be a string.") } @@ -33,10 +33,10 @@ read_sql_duckdb <- function(sql, ..., funnel = c(cells = 1e6), con = NULL) { meta_rel_register(rel, expr(duckdb$rel_from_sql(con, !!sql))) df <- duckdb$rel_to_altrep(rel, allow_materialization = FALSE) - out <- new_duckdb_tibble(df, funnel = "closed") + out <- new_duckdb_tibble(df, collect = "always_manual") - if (!identical(funnel, "closed")) { - out <- as_duckdb_tibble(out, funnel = funnel) + if (!identical(collect, "always_manual")) { + out <- as_duckdb_tibble(out, collect = collect) } out diff --git a/R/summarise.R b/R/summarise.R index 245c02202..c4dc8bb45 100644 --- a/R/summarise.R +++ b/R/summarise.R @@ -60,7 +60,7 @@ summarise.duckplyr_df <- function(.data, ..., .by = NULL, .groups = NULL) { ) # dplyr forward - check_funneled(.data, duckplyr_error) + check_prudence(.data, duckplyr_error) summarise <- dplyr$summarise.data.frame out <- summarise(.data, ..., .by = {{ .by }}, .groups = .groups) diff --git a/R/symdiff.R b/R/symdiff.R index 3f537cd1d..60cab3604 100644 --- a/R/symdiff.R +++ b/R/symdiff.R @@ -47,7 +47,7 @@ symdiff.duckplyr_df <- function(x, y, ...) { ) # dplyr forward - check_funneled(x, duckplyr_error) + check_prudence(x, duckplyr_error) symdiff <- dplyr$symdiff.data.frame out <- symdiff(x, y, ...) diff --git a/R/transmute.R b/R/transmute.R index 3ab9b6931..659636fb8 100644 --- a/R/transmute.R +++ b/R/transmute.R @@ -25,7 +25,7 @@ transmute.duckplyr_df <- function(.data, ...) { ) # dplyr forward - check_funneled(.data, duckplyr_error) + check_prudence(.data, duckplyr_error) transmute <- dplyr$transmute.data.frame out <- transmute(.data, ...) diff --git a/R/ungroup.R b/R/ungroup.R index b2f156365..b7535904f 100644 --- a/R/ungroup.R +++ b/R/ungroup.R @@ -10,7 +10,7 @@ ungroup.duckplyr_df <- function(x, ...) { ) # dplyr forward - check_funneled(x, duckplyr_error) + check_prudence(x, duckplyr_error) ungroup <- dplyr$ungroup.data.frame out <- ungroup(x, ...) diff --git a/R/union.R b/R/union.R index 7835fce12..0f6026085 100644 --- a/R/union.R +++ b/R/union.R @@ -11,7 +11,7 @@ union.duckplyr_df <- function(x, y, ...) { duckplyr_error <- NULL # dplyr forward - check_funneled(x, duckplyr_error) + check_prudence(x, duckplyr_error) union <- dplyr$union.data.frame out <- union(x, y, ...) diff --git a/R/union_all.R b/R/union_all.R index e511a5b61..0801b188b 100644 --- a/R/union_all.R +++ b/R/union_all.R @@ -47,7 +47,7 @@ union_all.duckplyr_df <- function(x, y, ...) { ) # dplyr forward - check_funneled(x, duckplyr_error) + check_prudence(x, duckplyr_error) union_all <- dplyr$union_all.data.frame out <- union_all(x, y, ...) diff --git a/README.Rmd b/README.Rmd index d90df75fb..56b68f935 100644 --- a/README.Rmd +++ b/README.Rmd @@ -223,7 +223,7 @@ Of course, working with Parquet, CSV, or JSON files downloaded locally is possib - `vignette("large")`: Tools for working with large data -- `vignette("funnel")`: How duckplyr can help protect memory when working with large data +- `vignette("collect")`: How duckplyr can help protect memory when working with large data - `vignette("limits")`: Translation of dplyr employed by duckplyr, and current limitations diff --git a/README.md b/README.md index 6c78ec49f..504479fd1 100644 --- a/README.md +++ b/README.md @@ -341,7 +341,7 @@ locally is possible as well. - [`vignette("large")`](https://duckplyr.tidyverse.org/dev/articles/large.html): Tools for working with large data -- [`vignette("funnel")`](https://duckplyr.tidyverse.org/dev/articles/funnel.html): +- [`vignette("collect")`](https://duckplyr.tidyverse.org/dev/articles/collect.html): How duckplyr can help protect memory when working with large data - [`vignette("limits")`](https://duckplyr.tidyverse.org/dev/articles/limits.html): diff --git a/_pkgdown.yml b/_pkgdown.yml index 12f2acb64..d5c8571d9 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -13,7 +13,7 @@ articles: navbar: ~ contents: - large - - funnel + - collect - limits - developers - telemetry diff --git a/index.md b/index.md index 3e4713934..c0b76540b 100644 --- a/index.md +++ b/index.md @@ -339,7 +339,7 @@ Of course, working with Parquet, CSV, or JSON files downloaded locally is possib - `vignette("large")`: Tools for working with large data -- `vignette("funnel")`: How duckplyr can help protect memory when working with large data +- `vignette("collect")`: How duckplyr can help protect memory when working with large data - `vignette("limits")`: Translation of dplyr employed by duckplyr, and current limitations diff --git a/man/compute.duckplyr_df.Rd b/man/compute.duckplyr_df.Rd index 2930fdfde..4f2be8414 100644 --- a/man/compute.duckplyr_df.Rd +++ b/man/compute.duckplyr_df.Rd @@ -7,7 +7,7 @@ \method{compute}{duckplyr_df}( x, ..., - funnel = NULL, + collect = NULL, name = NULL, schema_name = NULL, temporary = TRUE @@ -20,10 +20,10 @@ details.} \item{...}{Arguments passed on to methods} -\item{funnel}{Either a logical: +\item{collect}{Either a logical: \itemize{ -\item Set to \code{TRUE} to return a funneled data frame. -\item Set to \code{FALSE} to return an unfunneled data frame. +\item Set to \code{TRUE} to return a frugal data frame. +\item Set to \code{FALSE} to return an lavish data frame. } Or a named vector with at least one of @@ -38,8 +38,8 @@ measured in cells (values) and rows in the resulting data frame. If \code{cells} is specified but not \code{rows}, \code{rows} is \code{Inf}. If \code{rows} is specified but not \code{cells}, \code{cells} is \code{Inf}. -The default is to inherit the funneling of the input. -see the "Funneling" section.} +The default is to inherit the prudence of the input. +see the "Prudence" section.} \item{name}{The name of the table to store the result in.} @@ -49,12 +49,12 @@ see the "Funneling" section.} } \description{ This is a method for the \code{\link[dplyr:compute]{dplyr::compute()}} generic. -For a (funneled) duckplyr frame, +For a (frugal) duckplyr frame, \code{compute()} executes a query but stores it in a (temporary) table, or in a Parquet or CSV file. The result is a duckplyr frame that can be used with subsequent dplyr verbs. } -\section{Funneling}{ +\section{Prudence}{ Data frames backed by duckplyr, with class \code{"duckplyr_df"}, behave as regular data frames in almost all respects. @@ -63,7 +63,7 @@ or retrieving the number of rows with \code{\link[=nrow]{nrow()}}, works identic Conceptually, duckplyr frames are "eager": from a user's perspective, they behave like regular data frames. Under the hood, two key differences provide improved performance and usability: -lazy materialization and funneling. +lazy materialization and prudence. For a duckplyr frame that is the result of a dplyr operation, accessing column data or retrieving the number of rows will trigger a computation @@ -77,16 +77,16 @@ but different from \pkg{dplyr} where each intermediate step is computed. Being both "eager" and "lazy" at the same time introduces a challenge: it is too easy to accidentally trigger computation, which may be prohibitive if an intermediate result is too large. -This is where funneling comes in. +This is where prudence comes in. \itemize{ -\item For unfunneled duckplyr frames, the underlying DuckDB computation is carried out +\item For lavish duckplyr frames, the underlying DuckDB computation is carried out upon the first request. Once the results are computed, they are cached and subsequent requests are fast. This is a good choice for small to medium-sized data, where DuckDB can provide a nice speedup but materializing the data is affordable at any stage. This is the default for \code{duckdb_tibble()} and \code{as_duckdb_tibble()}. -\item For funneled duckplyr frames, accessing a column or requesting the number of rows +\item For frugal duckplyr frames, accessing a column or requesting the number of rows triggers an error, either unconditionally, or if the result exceeds a certain size. This is a good choice for large data sets where the cost of materializing the data may be prohibitive due to size or computation time, @@ -95,28 +95,28 @@ The default for the ingestion functions like \code{\link[=read_parquet_duckdb]{r is to limit the result size to one million cells (values in the resulting data frame). } -Funneled duckplyr frames behave like \href{https://dtplyr.tidyverse.org/reference/lazy_dt.html}{\code{dtplyr}'s lazy frames}, +Frugal duckplyr frames behave like \href{https://dtplyr.tidyverse.org/reference/lazy_dt.html}{\code{dtplyr}'s lazy frames}, or dbplyr's lazy frames: the computation only starts when you \strong{explicitly} request it with a "collect" function. -In dtplyr and dbplyr, there are no unfunneled frames: collection always needs to be +In dtplyr and dbplyr, there are no lavish frames: collection always needs to be explicit. -A funneled duckplyr frame can be converted to an unfunneled one with \code{as_duckdb_tibble(funnel = "open")}. +A frugal duckplyr frame can be converted to an lavish one with \code{as_duckdb_tibble(collect = "any_size")}. The \code{\link[=collect.duckplyr_df]{collect.duckplyr_df()}} method triggers computation and converts to a plain tibble. Other useful methods include \code{\link[=compute_file]{compute_file()}} for storing results in a file, and \code{\link[=compute.duckplyr_df]{compute.duckplyr_df()}} for storing results in temporary storage on disk. -Beyond safety regarding memory usage, funneled frames also allow you +Beyond safety regarding memory usage, frugal frames also allow you to check that all operations are supported by DuckDB: -for a funneled frame with \code{funnel = "closed"}, fallbacks to dplyr are not possible. +for a frugal frame with \code{collect = "always_manual"}, fallbacks to dplyr are not possible. As a reminder, computing via DuckDB is currently not always possible, see \code{vignette("limits")} for the supported operations. In such cases, the original dplyr implementation is used, see \link{fallback} for details. As the original dplyr implementation accesses columns directly, the data must be materialized before a fallback can be executed. -This means that automatic fallback is only possible for "unfunneled" duckplyr frames, -while for "funneled" duckplyr frames, one of the aforementioned collection methods must be used first. +This means that automatic fallback is only possible for "lavish" duckplyr frames, +while for "frugal" duckplyr frames, one of the aforementioned collection methods must be used first. } \examples{ diff --git a/man/compute_file.Rd b/man/compute_file.Rd index 74af8f627..77294880f 100644 --- a/man/compute_file.Rd +++ b/man/compute_file.Rd @@ -6,9 +6,9 @@ \alias{compute_csv} \title{Compute results to a file} \usage{ -compute_parquet(x, path, ..., funnel = NULL, options = NULL) +compute_parquet(x, path, ..., collect = NULL, options = NULL) -compute_csv(x, path, ..., funnel = NULL, options = NULL) +compute_csv(x, path, ..., collect = NULL, options = NULL) } \arguments{ \item{x}{A data frame, data frame extension (e.g. a tibble), or a lazy @@ -19,10 +19,10 @@ details.} \item{...}{These dots are for future extensions and must be empty.} -\item{funnel}{Either a logical: +\item{collect}{Either a logical: \itemize{ -\item Set to \code{TRUE} to return a funneled data frame. -\item Set to \code{FALSE} to return an unfunneled data frame. +\item Set to \code{TRUE} to return a frugal data frame. +\item Set to \code{FALSE} to return an lavish data frame. } Or a named vector with at least one of @@ -37,8 +37,8 @@ measured in cells (values) and rows in the resulting data frame. If \code{cells} is specified but not \code{rows}, \code{rows} is \code{Inf}. If \code{rows} is specified but not \code{cells}, \code{cells} is \code{Inf}. -The default is to inherit the funneling of the input. -see the "Funneling" section.} +The default is to inherit the prudence of the input. +see the "Prudence" section.} \item{options}{A list of additional options to pass to create the storage format, see \url{https://duckdb.org/docs/data/parquet/overview#writing-to-parquet-files} @@ -46,7 +46,7 @@ or \url{https://duckdb.org/docs/data/csv/overview#writing-using-the-copy-stateme for details.} } \description{ -These functions apply to (funneled) duckplyr frames. +These functions apply to (frugal) duckplyr frames. They executes a query and stores the results in a flat file. The result is a duckplyr frame that can be used with subsequent dplyr verbs. @@ -54,7 +54,7 @@ The result is a duckplyr frame that can be used with subsequent dplyr verbs. \code{compute_csv()} creates a CSV file. } -\section{Funneling}{ +\section{Prudence}{ Data frames backed by duckplyr, with class \code{"duckplyr_df"}, behave as regular data frames in almost all respects. @@ -63,7 +63,7 @@ or retrieving the number of rows with \code{\link[=nrow]{nrow()}}, works identic Conceptually, duckplyr frames are "eager": from a user's perspective, they behave like regular data frames. Under the hood, two key differences provide improved performance and usability: -lazy materialization and funneling. +lazy materialization and prudence. For a duckplyr frame that is the result of a dplyr operation, accessing column data or retrieving the number of rows will trigger a computation @@ -77,16 +77,16 @@ but different from \pkg{dplyr} where each intermediate step is computed. Being both "eager" and "lazy" at the same time introduces a challenge: it is too easy to accidentally trigger computation, which may be prohibitive if an intermediate result is too large. -This is where funneling comes in. +This is where prudence comes in. \itemize{ -\item For unfunneled duckplyr frames, the underlying DuckDB computation is carried out +\item For lavish duckplyr frames, the underlying DuckDB computation is carried out upon the first request. Once the results are computed, they are cached and subsequent requests are fast. This is a good choice for small to medium-sized data, where DuckDB can provide a nice speedup but materializing the data is affordable at any stage. This is the default for \code{duckdb_tibble()} and \code{as_duckdb_tibble()}. -\item For funneled duckplyr frames, accessing a column or requesting the number of rows +\item For frugal duckplyr frames, accessing a column or requesting the number of rows triggers an error, either unconditionally, or if the result exceeds a certain size. This is a good choice for large data sets where the cost of materializing the data may be prohibitive due to size or computation time, @@ -95,28 +95,28 @@ The default for the ingestion functions like \code{\link[=read_parquet_duckdb]{r is to limit the result size to one million cells (values in the resulting data frame). } -Funneled duckplyr frames behave like \href{https://dtplyr.tidyverse.org/reference/lazy_dt.html}{\code{dtplyr}'s lazy frames}, +Frugal duckplyr frames behave like \href{https://dtplyr.tidyverse.org/reference/lazy_dt.html}{\code{dtplyr}'s lazy frames}, or dbplyr's lazy frames: the computation only starts when you \strong{explicitly} request it with a "collect" function. -In dtplyr and dbplyr, there are no unfunneled frames: collection always needs to be +In dtplyr and dbplyr, there are no lavish frames: collection always needs to be explicit. -A funneled duckplyr frame can be converted to an unfunneled one with \code{as_duckdb_tibble(funnel = "open")}. +A frugal duckplyr frame can be converted to an lavish one with \code{as_duckdb_tibble(collect = "any_size")}. The \code{\link[=collect.duckplyr_df]{collect.duckplyr_df()}} method triggers computation and converts to a plain tibble. Other useful methods include \code{\link[=compute_file]{compute_file()}} for storing results in a file, and \code{\link[=compute.duckplyr_df]{compute.duckplyr_df()}} for storing results in temporary storage on disk. -Beyond safety regarding memory usage, funneled frames also allow you +Beyond safety regarding memory usage, frugal frames also allow you to check that all operations are supported by DuckDB: -for a funneled frame with \code{funnel = "closed"}, fallbacks to dplyr are not possible. +for a frugal frame with \code{collect = "always_manual"}, fallbacks to dplyr are not possible. As a reminder, computing via DuckDB is currently not always possible, see \code{vignette("limits")} for the supported operations. In such cases, the original dplyr implementation is used, see \link{fallback} for details. As the original dplyr implementation accesses columns directly, the data must be materialized before a fallback can be executed. -This means that automatic fallback is only possible for "unfunneled" duckplyr frames, -while for "funneled" duckplyr frames, one of the aforementioned collection methods must be used first. +This means that automatic fallback is only possible for "lavish" duckplyr frames, +while for "frugal" duckplyr frames, one of the aforementioned collection methods must be used first. } \examples{ diff --git a/man/duckdb_tibble.Rd b/man/duckdb_tibble.Rd index 423eb8afc..1852cfffd 100644 --- a/man/duckdb_tibble.Rd +++ b/man/duckdb_tibble.Rd @@ -6,9 +6,9 @@ \alias{is_duckdb_tibble} \title{duckplyr data frames} \usage{ -duckdb_tibble(..., .funnel = "open") +duckdb_tibble(..., .collect = "any_size") -as_duckdb_tibble(x, ..., funnel = "open") +as_duckdb_tibble(x, ..., collect = "any_size") is_duckdb_tibble(x) } @@ -16,10 +16,10 @@ is_duckdb_tibble(x) \item{...}{For \code{duckdb_tibble()}, passed on to \code{\link[tibble:tibble]{tibble::tibble()}}. For \code{as_duckdb_tibble()}, passed on to methods.} -\item{.funnel, funnel}{Either a logical: +\item{.collect, collect}{Either a logical: \itemize{ -\item Set to \code{TRUE} to return a funneled data frame. -\item Set to \code{FALSE} to return an unfunneled data frame. +\item Set to \code{TRUE} to return a frugal data frame. +\item Set to \code{FALSE} to return an lavish data frame. } Or a named vector with at least one of @@ -34,15 +34,15 @@ measured in cells (values) and rows in the resulting data frame. If \code{cells} is specified but not \code{rows}, \code{rows} is \code{Inf}. If \code{rows} is specified but not \code{cells}, \code{cells} is \code{Inf}. -The default is to inherit the funneling of the input. -see the "Funneling" section.} +The default is to inherit the prudence of the input. +see the "Prudence" section.} \item{x}{The object to convert or to test.} } \value{ For \code{duckdb_tibble()} and \code{as_duckdb_tibble()}, an object with the following classes: \itemize{ -\item \code{"funneled_duckplyr_df"} if \code{.funnel} is \code{TRUE} +\item \code{"frugal_duckplyr_df"} if \code{.collect} is \code{TRUE} \item \code{"duckplyr_df"} \item Classes of a \link[tibble:tibble]{tibble::tibble} } @@ -56,15 +56,15 @@ This ensures that dplyr methods are dispatched correctly. For such objects, dplyr verbs such as \code{\link[dplyr:mutate]{dplyr::mutate()}}, \code{\link[dplyr:select]{dplyr::select()}} or \code{\link[dplyr:filter]{dplyr::filter()}} will use DuckDB. -\code{duckdb_tibble()} works like \code{\link[tibble:tibble]{tibble::tibble()}}, returning an "unfunneled" duckplyr data frame by default. -See the "Funneling" section below. +\code{duckdb_tibble()} works like \code{\link[tibble:tibble]{tibble::tibble()}}, returning an "lavish" duckplyr data frame by default. +See the "Prudence" section below. \code{as_duckdb_tibble()} converts a data frame or a dplyr lazy table to a duckplyr data frame. This is a generic function that can be overridden for custom classes. \code{is_duckdb_tibble()} returns \code{TRUE} if \code{x} is a duckplyr data frame. } -\section{Funneling}{ +\section{Prudence}{ Data frames backed by duckplyr, with class \code{"duckplyr_df"}, behave as regular data frames in almost all respects. @@ -73,7 +73,7 @@ or retrieving the number of rows with \code{\link[=nrow]{nrow()}}, works identic Conceptually, duckplyr frames are "eager": from a user's perspective, they behave like regular data frames. Under the hood, two key differences provide improved performance and usability: -lazy materialization and funneling. +lazy materialization and prudence. For a duckplyr frame that is the result of a dplyr operation, accessing column data or retrieving the number of rows will trigger a computation @@ -87,16 +87,16 @@ but different from \pkg{dplyr} where each intermediate step is computed. Being both "eager" and "lazy" at the same time introduces a challenge: it is too easy to accidentally trigger computation, which may be prohibitive if an intermediate result is too large. -This is where funneling comes in. +This is where prudence comes in. \itemize{ -\item For unfunneled duckplyr frames, the underlying DuckDB computation is carried out +\item For lavish duckplyr frames, the underlying DuckDB computation is carried out upon the first request. Once the results are computed, they are cached and subsequent requests are fast. This is a good choice for small to medium-sized data, where DuckDB can provide a nice speedup but materializing the data is affordable at any stage. This is the default for \code{duckdb_tibble()} and \code{as_duckdb_tibble()}. -\item For funneled duckplyr frames, accessing a column or requesting the number of rows +\item For frugal duckplyr frames, accessing a column or requesting the number of rows triggers an error, either unconditionally, or if the result exceeds a certain size. This is a good choice for large data sets where the cost of materializing the data may be prohibitive due to size or computation time, @@ -105,28 +105,28 @@ The default for the ingestion functions like \code{\link[=read_parquet_duckdb]{r is to limit the result size to one million cells (values in the resulting data frame). } -Funneled duckplyr frames behave like \href{https://dtplyr.tidyverse.org/reference/lazy_dt.html}{\code{dtplyr}'s lazy frames}, +Frugal duckplyr frames behave like \href{https://dtplyr.tidyverse.org/reference/lazy_dt.html}{\code{dtplyr}'s lazy frames}, or dbplyr's lazy frames: the computation only starts when you \strong{explicitly} request it with a "collect" function. -In dtplyr and dbplyr, there are no unfunneled frames: collection always needs to be +In dtplyr and dbplyr, there are no lavish frames: collection always needs to be explicit. -A funneled duckplyr frame can be converted to an unfunneled one with \code{as_duckdb_tibble(funnel = "open")}. +A frugal duckplyr frame can be converted to an lavish one with \code{as_duckdb_tibble(collect = "any_size")}. The \code{\link[=collect.duckplyr_df]{collect.duckplyr_df()}} method triggers computation and converts to a plain tibble. Other useful methods include \code{\link[=compute_file]{compute_file()}} for storing results in a file, and \code{\link[=compute.duckplyr_df]{compute.duckplyr_df()}} for storing results in temporary storage on disk. -Beyond safety regarding memory usage, funneled frames also allow you +Beyond safety regarding memory usage, frugal frames also allow you to check that all operations are supported by DuckDB: -for a funneled frame with \code{funnel = "closed"}, fallbacks to dplyr are not possible. +for a frugal frame with \code{collect = "always_manual"}, fallbacks to dplyr are not possible. As a reminder, computing via DuckDB is currently not always possible, see \code{vignette("limits")} for the supported operations. In such cases, the original dplyr implementation is used, see \link{fallback} for details. As the original dplyr implementation accesses columns directly, the data must be materialized before a fallback can be executed. -This means that automatic fallback is only possible for "unfunneled" duckplyr frames, -while for "funneled" duckplyr frames, one of the aforementioned collection methods must be used first. +This means that automatic fallback is only possible for "lavish" duckplyr frames, +while for "frugal" duckplyr frames, one of the aforementioned collection methods must be used first. } \examples{ @@ -139,7 +139,7 @@ x \%>\% x$a -y <- duckdb_tibble(a = 1, .funnel = "closed") +y <- duckdb_tibble(a = 1, .collect = "always_manual") y try(length(y$a)) length(collect(y)$a) diff --git a/man/read_file_duckdb.Rd b/man/read_file_duckdb.Rd index 528de409d..79aeef5c4 100644 --- a/man/read_file_duckdb.Rd +++ b/man/read_file_duckdb.Rd @@ -7,17 +7,17 @@ \alias{read_json_duckdb} \title{Read Parquet, CSV, and other files using DuckDB} \usage{ -read_parquet_duckdb(path, ..., funnel = c(cells = 1e+06), options = list()) +read_parquet_duckdb(path, ..., collect = c(cells = 1e+06), options = list()) -read_csv_duckdb(path, ..., funnel = c(cells = 1e+06), options = list()) +read_csv_duckdb(path, ..., collect = c(cells = 1e+06), options = list()) -read_json_duckdb(path, ..., funnel = c(cells = 1e+06), options = list()) +read_json_duckdb(path, ..., collect = c(cells = 1e+06), options = list()) read_file_duckdb( path, table_function, ..., - funnel = c(cells = 1e+06), + collect = c(cells = 1e+06), options = list() ) } @@ -26,9 +26,9 @@ read_file_duckdb( \item{...}{These dots are for future extensions and must be empty.} -\item{funnel}{Logical, whether to create a funneled duckplyr frame. -By default, a funneled duckplyr frame is created. -See the "Funneling" section in \code{\link[=duckdb_tibble]{duckdb_tibble()}} for details.} +\item{collect}{Logical, whether to create a frugal duckplyr frame. +By default, a frugal duckplyr frame is created. +See the "Prudence" section in \code{\link[=duckdb_tibble]{duckdb_tibble()}} for details.} \item{options}{Arguments to the DuckDB function indicated by \code{table_function}.} @@ -58,8 +58,8 @@ To read multiple files with the same schema, pass a wildcard or a character vector to the \code{path} argument, } \details{ -By default, a funneled duckplyr frame, with a limit of one million cells, is created. -See the "Funneling" section in \code{\link[=duckdb_tibble]{duckdb_tibble()}} for details. +By default, a frugal duckplyr frame, with a limit of one million cells, is created. +See the "Prudence" section in \code{\link[=duckdb_tibble]{duckdb_tibble()}} for details. } \examples{ # Create simple CSV file @@ -78,8 +78,8 @@ try(print(df$a)) # Materialize explicitly collect(df)$a -# Automatic materialization with funnel = "open" -df <- read_csv_duckdb(path, funnel = "open") +# Automatic materialization with collect = "any_size" +df <- read_csv_duckdb(path, collect = "any_size") df$a # Specify column types diff --git a/man/read_sql_duckdb.Rd b/man/read_sql_duckdb.Rd index c79029adb..4e60a8454 100644 --- a/man/read_sql_duckdb.Rd +++ b/man/read_sql_duckdb.Rd @@ -4,16 +4,16 @@ \alias{read_sql_duckdb} \title{Return SQL query as duckdb_tibble} \usage{ -read_sql_duckdb(sql, ..., funnel = c(cells = 1e+06), con = NULL) +read_sql_duckdb(sql, ..., collect = c(cells = 1e+06), con = NULL) } \arguments{ \item{sql}{The SQL to run.} \item{...}{These dots are for future extensions and must be empty.} -\item{funnel}{Logical, whether to create a funneled duckplyr frame. -By default, a funneled duckplyr frame is created. -See the "Funneling" section in \code{\link[=duckdb_tibble]{duckdb_tibble()}} for details.} +\item{collect}{Logical, whether to create a frugal duckplyr frame. +By default, a frugal duckplyr frame is created. +See the "Prudence" section in \code{\link[=duckdb_tibble]{duckdb_tibble()}} for details.} \item{con}{The connection, defaults to the default connection.} } diff --git a/patch/anti_join.patch b/patch/anti_join.patch index d5fa0ddda..f120b5e39 100644 --- a/patch/anti_join.patch +++ b/patch/anti_join.patch @@ -24,7 +24,7 @@ diff --git b/R/anti_join.R a/R/anti_join.R } ) @@ -13,7 +22,7 @@ anti_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, ..., na_matches - check_funneled(x, duckplyr_error) + check_prudence(x, duckplyr_error) anti_join <- dplyr$anti_join.data.frame - out <- anti_join(x, y, by, copy, ..., na_matches = na_matches) diff --git a/patch/auto_copy.patch b/patch/auto_copy.patch index 177e09e88..1021c321c 100644 --- a/patch/auto_copy.patch +++ b/patch/auto_copy.patch @@ -14,7 +14,7 @@ diff --git b/R/auto_copy.R a/R/auto_copy.R - ) - - # dplyr forward -- check_funneled(x, duckplyr_error) +- check_prudence(x, duckplyr_error) - - auto_copy <- dplyr$auto_copy.data.frame - out <- auto_copy(x, y, copy, ...) diff --git a/patch/collect.patch b/patch/collect.patch index 5f9d52388..1e4304276 100644 --- a/patch/collect.patch +++ b/patch/collect.patch @@ -15,7 +15,7 @@ diff --git b/R/collect.R a/R/collect.R - ) - - # dplyr forward -- check_funneled(x, duckplyr_error) +- check_prudence(x, duckplyr_error) - - collect <- dplyr$collect.data.frame - out <- collect(x, ...) diff --git a/patch/compute.patch b/patch/compute.patch index 32d979ce7..d8b83fe7f 100644 --- a/patch/compute.patch +++ b/patch/compute.patch @@ -9,13 +9,13 @@ diff --git b/R/compute.R a/R/compute.R +compute.duckplyr_df <- function( + x, + ..., -+ funnel = NULL, ++ collect = NULL, + name = NULL, + schema_name = NULL, + temporary = TRUE +) { -+ if (is.null(funnel)) { -+ funnel <- get_funnel_duckplyr_df(x) ++ if (is.null(collect)) { ++ collect <- get_collect_duckplyr_df(x) + } + if (is.null(schema_name)) { + schema_name <- "" @@ -41,8 +41,8 @@ diff --git b/R/compute.R a/R/compute.R + + out <- duckplyr_reconstruct(out_rel, x) + -+ if (get_funnel_duckplyr_df(out) != funnel) { -+ out <- as_duckdb_tibble(out, funnel = funnel) ++ if (get_collect_duckplyr_df(out) != collect) { ++ out <- as_duckdb_tibble(out, collect = collect) + } + return(out) diff --git a/patch/count.patch b/patch/count.patch index 67c16ca58..6538eda47 100644 --- a/patch/count.patch +++ b/patch/count.patch @@ -66,5 +66,5 @@ diff --git b/R/count.R a/R/count.R + # out <- count(x_df, !!!quos, wt = {{ wt }}, sort = sort, name = name, .drop = .drop) + # dplyr forward - check_funneled(x, duckplyr_error) + check_prudence(x, duckplyr_error) diff --git a/patch/full_join.patch b/patch/full_join.patch index 97d56c0d9..996a19eae 100644 --- a/patch/full_join.patch +++ b/patch/full_join.patch @@ -28,7 +28,7 @@ diff --git b/R/full_join.R a/R/full_join.R } ) @@ -13,7 +26,7 @@ full_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x" - check_funneled(x, duckplyr_error) + check_prudence(x, duckplyr_error) full_join <- dplyr$full_join.data.frame - out <- full_join(x, y, by, copy, suffix, ..., keep = keep, na_matches = na_matches, multiple = multiple, relationship = relationship) diff --git a/patch/group_map.patch b/patch/group_map.patch index f3036dfd6..7dff995c9 100644 --- a/patch/group_map.patch +++ b/patch/group_map.patch @@ -14,7 +14,7 @@ diff --git b/R/group_map.R a/R/group_map.R duckplyr_error <- rel_try(NULL, # Always fall back to dplyr @@ -14,7 +19,7 @@ group_map.duckplyr_df <- function(.data, .f, ..., .keep = FALSE, keep = deprecat - check_funneled(.data, duckplyr_error) + check_prudence(.data, duckplyr_error) group_map <- dplyr$group_map.data.frame - out <- group_map(.data, .f, ..., .keep = .keep, keep = keep) diff --git a/patch/group_modify.patch b/patch/group_modify.patch index 1273505ca..302937f60 100644 --- a/patch/group_modify.patch +++ b/patch/group_modify.patch @@ -14,7 +14,7 @@ diff --git b/R/group_modify.R a/R/group_modify.R duckplyr_error <- rel_try(NULL, # Always fall back to dplyr @@ -14,7 +19,7 @@ group_modify.duckplyr_df <- function(.data, .f, ..., .keep = FALSE, keep = depre - check_funneled(.data, duckplyr_error) + check_prudence(.data, duckplyr_error) group_modify <- dplyr$group_modify.data.frame - out <- group_modify(.data, .f, ..., .keep = .keep, keep = keep) diff --git a/patch/group_split.patch b/patch/group_split.patch index 1e233d62c..0bd85dd28 100644 --- a/patch/group_split.patch +++ b/patch/group_split.patch @@ -14,7 +14,7 @@ diff --git b/R/group_split.R a/R/group_split.R duckplyr_error <- rel_try(NULL, # Always fall back to dplyr @@ -14,7 +19,7 @@ group_split.duckplyr_df <- function(.tbl, ..., .keep = TRUE, keep = deprecated() - check_funneled(.tbl, duckplyr_error) + check_prudence(.tbl, duckplyr_error) group_split <- dplyr$group_split.data.frame - out <- group_split(.tbl, ..., .keep = .keep, keep = keep) diff --git a/patch/group_vars.patch b/patch/group_vars.patch index 10d3e87c0..584c46816 100644 --- a/patch/group_vars.patch +++ b/patch/group_vars.patch @@ -14,7 +14,7 @@ diff --git b/R/group_vars.R a/R/group_vars.R - ) - - # dplyr forward -- check_funneled(x, duckplyr_error) +- check_prudence(x, duckplyr_error) - - group_vars <- dplyr$group_vars.data.frame - out <- group_vars(x) diff --git a/patch/inner_join.patch b/patch/inner_join.patch index 6903fca68..3f415dfcf 100644 --- a/patch/inner_join.patch +++ b/patch/inner_join.patch @@ -30,7 +30,7 @@ diff --git b/R/inner_join.R a/R/inner_join.R } ) @@ -13,7 +28,7 @@ inner_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x - check_funneled(x, duckplyr_error) + check_prudence(x, duckplyr_error) inner_join <- dplyr$inner_join.data.frame - out <- inner_join(x, y, by, copy, suffix, ..., keep = keep, na_matches = na_matches, multiple = multiple, unmatched = unmatched, relationship = relationship) diff --git a/patch/left_join.patch b/patch/left_join.patch index 19b3c8ec6..813118074 100644 --- a/patch/left_join.patch +++ b/patch/left_join.patch @@ -31,7 +31,7 @@ diff --git b/R/left_join.R a/R/left_join.R } ) @@ -13,7 +29,7 @@ left_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x" - check_funneled(x, duckplyr_error) + check_prudence(x, duckplyr_error) left_join <- dplyr$left_join.data.frame - out <- left_join(x, y, by, copy, suffix, ..., keep = keep, na_matches = na_matches, multiple = multiple, unmatched = unmatched, relationship = relationship) diff --git a/patch/nest_join.patch b/patch/nest_join.patch index ace6ba366..b0f2ad3bd 100644 --- a/patch/nest_join.patch +++ b/patch/nest_join.patch @@ -20,7 +20,7 @@ diff --git b/R/nest_join.R a/R/nest_join.R "No relational implementation for {.code nest_join()}" = TRUE, @@ -12,8 +22,13 @@ nest_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, keep = NULL, na # dplyr forward - check_funneled(x, duckplyr_error) + check_prudence(x, duckplyr_error) + x_df <- x + class(x_df) <- setdiff(class(x_df), "duckplyr_df") diff --git a/patch/right_join.patch b/patch/right_join.patch index bae0417d2..e8330ce3b 100644 --- a/patch/right_join.patch +++ b/patch/right_join.patch @@ -30,7 +30,7 @@ diff --git b/R/right_join.R a/R/right_join.R } ) @@ -13,7 +28,7 @@ right_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x - check_funneled(x, duckplyr_error) + check_prudence(x, duckplyr_error) right_join <- dplyr$right_join.data.frame - out <- right_join(x, y, by, copy, suffix, ..., keep = keep, na_matches = na_matches, multiple = multiple, unmatched = unmatched, relationship = relationship) diff --git a/patch/select.patch b/patch/select.patch index ebba9810c..acc49596b 100644 --- a/patch/select.patch +++ b/patch/select.patch @@ -40,5 +40,5 @@ diff --git b/R/select.R a/R/select.R + # dplyr forward - check_funneled(.data, duckplyr_error) + check_prudence(.data, duckplyr_error) diff --git a/patch/semi_join.patch b/patch/semi_join.patch index 8deabed91..2295f49a2 100644 --- a/patch/semi_join.patch +++ b/patch/semi_join.patch @@ -24,7 +24,7 @@ diff --git b/R/semi_join.R a/R/semi_join.R } ) @@ -13,7 +22,7 @@ semi_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, ..., na_matches - check_funneled(x, duckplyr_error) + check_prudence(x, duckplyr_error) semi_join <- dplyr$semi_join.data.frame - out <- semi_join(x, y, by, copy, ..., na_matches = na_matches) diff --git a/patch/slice_sample.patch b/patch/slice_sample.patch index 7e4196de9..c0fcbf8bc 100644 --- a/patch/slice_sample.patch +++ b/patch/slice_sample.patch @@ -2,7 +2,7 @@ diff --git b/R/slice_sample.R a/R/slice_sample.R --- b/R/slice_sample.R +++ a/R/slice_sample.R @@ -13,7 +13,7 @@ slice_sample.duckplyr_df <- function(.data, ..., n, prop, by = NULL, weight_by = - check_funneled(.data, duckplyr_error) + check_prudence(.data, duckplyr_error) slice_sample <- dplyr$slice_sample.data.frame - out <- slice_sample(.data, ..., n = n, prop = prop, by = by, weight_by = weight_by, replace = replace) diff --git a/patch/union.patch b/patch/union.patch index d65075c05..b868dd63e 100644 --- a/patch/union.patch +++ b/patch/union.patch @@ -21,4 +21,4 @@ diff --git b/R/union.R a/R/union.R + duckplyr_error <- NULL # dplyr forward - check_funneled(x, duckplyr_error) + check_prudence(x, duckplyr_error) diff --git a/tests/testthat/test-compute.R b/tests/testthat/test-compute.R index 5456daa2b..54a035bf4 100644 --- a/tests/testthat/test-compute.R +++ b/tests/testthat/test-compute.R @@ -8,29 +8,29 @@ test_that("compute()", { }) expect_identical(out, as_duckdb_tibble(df)) - expect_false(is_funneled_duckplyr_df(out)) + expect_false(is_frugal_duckplyr_df(out)) }) -test_that("funneling with failure", { +test_that("prudence with failure", { set.seed(20250124) - df <- duckdb_tibble(x = 1:10, .funnel = c(rows = 5)) + df <- duckdb_tibble(x = 1:10, .collect = c(rows = 5)) out <- compute(df) expect_identical(collect(out), collect(df)) - expect_identical(get_funnel_duckplyr_df(out), c(rows = 5)) + expect_identical(get_collect_duckplyr_df(out), c(rows = 5)) expect_error(nrow(out)) }) -test_that("funneling with success", { +test_that("prudence with success", { set.seed(20250126) - df <- duckdb_tibble(x = 1:10, .funnel = c(rows = 20)) + df <- duckdb_tibble(x = 1:10, .collect = c(rows = 20)) out <- compute(df) expect_identical(collect(out), collect(df)) - expect_identical(get_funnel_duckplyr_df(out), c(rows = 20)) + expect_identical(get_collect_duckplyr_df(out), c(rows = 20)) expect_error(nrow(out), NA) }) diff --git a/tests/testthat/test-compute_file.R b/tests/testthat/test-compute_file.R index 81a27b164..62427ef64 100644 --- a/tests/testthat/test-compute_file.R +++ b/tests/testthat/test-compute_file.R @@ -4,7 +4,7 @@ test_that("compute_parquet()", { out <- compute_parquet(df, path = "test.parquet") expect_identical(out, as_duckdb_tibble(df)) - expect_false(is_funneled_duckplyr_df(out)) + expect_false(is_frugal_duckplyr_df(out)) }) test_that("compute_parquet() with options", { @@ -14,7 +14,7 @@ test_that("compute_parquet() with options", { out <- compute_parquet(df, path = "test", options = list(partition_by = "a")) expect_identical(out, as_duckdb_tibble(df)) - expect_false(is_funneled_duckplyr_df(out)) + expect_false(is_frugal_duckplyr_df(out)) }) test_that("compute_csv()", { @@ -23,14 +23,14 @@ test_that("compute_csv()", { out <- compute_csv(df, path = "test.csv") expect_identical(out, as_duckdb_tibble(df)) - expect_false(is_funneled_duckplyr_df(out)) + expect_false(is_frugal_duckplyr_df(out)) }) -test_that("compute_csv() funnel", { +test_that("compute_csv() collect", { df <- data.frame(x = c(1, 2)) withr::defer(unlink("test.csv")) - out <- compute_csv(df, path = "test.csv", funnel = "closed") + out <- compute_csv(df, path = "test.csv", collect = "always_manual") - expect_true(is_funneled_duckplyr_df(out)) + expect_true(is_frugal_duckplyr_df(out)) expect_identical(collect(out), as_tibble(df)) }) diff --git a/tests/testthat/test-ducktbl.R b/tests/testthat/test-ducktbl.R index 39ba2ae32..7d25532b6 100644 --- a/tests/testthat/test-ducktbl.R +++ b/tests/testthat/test-ducktbl.R @@ -7,23 +7,23 @@ test_that("Can construct", { expect_identical(duckdb_tibble(a = 1)$a, 1) }) -test_that('.funnel = "closed" forbids materialization', { - tbl <- duckdb_tibble(a = 1, .funnel = "closed") +test_that('.collect = "always_manual" forbids materialization', { + tbl <- duckdb_tibble(a = 1, .collect = "always_manual") expect_error(length(tbl$a)) }) -test_that('.funnel = c(rows = ) forbids materialization', { - tbl <- duckdb_tibble(a = 1:10, .funnel = c(rows = 5)) +test_that('.collect = c(rows = ) forbids materialization', { + tbl <- duckdb_tibble(a = 1:10, .collect = c(rows = 5)) expect_error(length(tbl$a)) }) -test_that('.funnel = c(cells = ) forbids materialization', { - tbl <- duckdb_tibble(a = 1:10, b = 1, .funnel = c(cells = 10)) +test_that('.collect = c(cells = ) forbids materialization', { + tbl <- duckdb_tibble(a = 1:10, b = 1, .collect = c(cells = 10)) expect_error(length(tbl$a)) }) -test_that('.funnel = "closed" forbids materialization for as_duckdb_tibble', { - tbl <- as_duckdb_tibble(data.frame(a = 1), funnel = "closed") +test_that('.collect = "always_manual" forbids materialization for as_duckdb_tibble', { + tbl <- as_duckdb_tibble(data.frame(a = 1), collect = "always_manual") expect_error(length(tbl$a)) }) @@ -64,14 +64,14 @@ test_that("as_duckdb_tibble() and dbplyr tables", { dplyr::copy_to(dest = con) duck <- db_tbl %>% - as_duckdb_tibble(funnel = "closed") %>% + as_duckdb_tibble(collect = "always_manual") %>% mutate(b = 2) expect_error(length(duck$b)) db <- db_tbl %>% mutate(b = 2) %>% - as_duckdb_tibble(funnel = "closed") + as_duckdb_tibble(collect = "always_manual") expect_error(length(db$b)) diff --git a/tests/testthat/test-funnel.R b/tests/testthat/test-funnel.R index 541d651b3..7e28ab165 100644 --- a/tests/testthat/test-funnel.R +++ b/tests/testthat/test-funnel.R @@ -1,12 +1,12 @@ -test_that("funneled duckplyr frames will collect", { - tbl <- duckdb_tibble(a = 1, .funnel = "closed") +test_that("frugal duckplyr frames will collect", { + tbl <- duckdb_tibble(a = 1, .collect = "always_manual") expect_identical( collect(tbl), tibble(a = 1) ) }) -test_that("unfunneled duckplyr frames are converted to data frames", { +test_that("lavish duckplyr frames are converted to data frames", { tbl <- duckdb_tibble(a = 1) expect_identical( as.data.frame(tbl), @@ -14,15 +14,15 @@ test_that("unfunneled duckplyr frames are converted to data frames", { ) }) -test_that("funneled duckplyr frames are converted to data frames", { - tbl <- duckdb_tibble(a = 1, .funnel = "closed") +test_that("frugal duckplyr frames are converted to data frames", { + tbl <- duckdb_tibble(a = 1, .collect = "always_manual") expect_identical( as.data.frame(tbl), data.frame(a = 1) ) }) -test_that("unfunneled duckplyr frames are converted to tibbles", { +test_that("lavish duckplyr frames are converted to tibbles", { tbl <- duckdb_tibble(a = 1) expect_identical( as_tibble(tbl), @@ -30,28 +30,28 @@ test_that("unfunneled duckplyr frames are converted to tibbles", { ) }) -test_that("funneled duckplyr frames are converted to tibbles", { - tbl <- duckdb_tibble(a = 1, .funnel = "closed") +test_that("frugal duckplyr frames are converted to tibbles", { + tbl <- duckdb_tibble(a = 1, .collect = "always_manual") expect_identical( as_tibble(tbl), tibble(a = 1) ) }) -test_that("funneling after operation with failure", { - df <- duckdb_tibble(x = 1:10, .funnel = c(rows = 5)) +test_that("prudence after operation with failure", { + df <- duckdb_tibble(x = 1:10, .collect = c(rows = 5)) out <- df %>% count(x) - expect_identical(get_funnel_duckplyr_df(out), c(rows = 5)) + expect_identical(get_collect_duckplyr_df(out), c(rows = 5)) expect_error(nrow(out)) }) -test_that("funneling after operation with success", { - df <- duckdb_tibble(x = 1:10, .funnel = c(rows = 5)) +test_that("prudence after operation with success", { + df <- duckdb_tibble(x = 1:10, .collect = c(rows = 5)) out <- df %>% count() - expect_identical(get_funnel_duckplyr_df(out), c(rows = 5)) + expect_identical(get_collect_duckplyr_df(out), c(rows = 5)) expect_error(nrow(out), NA) }) diff --git a/tests/testthat/test-sql.R b/tests/testthat/test-sql.R index c63d76781..8f5c3ec39 100644 --- a/tests/testthat/test-sql.R +++ b/tests/testthat/test-sql.R @@ -2,7 +2,7 @@ test_that("read_sql_duckdb() works", { con <- withr::local_db_connection(DBI::dbConnect(duckdb::duckdb())) expect_identical( - read_sql_duckdb("SELECT 1 AS a", con = con, funnel = "open"), + read_sql_duckdb("SELECT 1 AS a", con = con, collect = "any_size"), duckdb_tibble(a = 1L) ) }) diff --git a/tools/02-duckplyr_df-methods.R b/tools/02-duckplyr_df-methods.R index 3bf774bee..e2d971f66 100644 --- a/tools/02-duckplyr_df-methods.R +++ b/tools/02-duckplyr_df-methods.R @@ -105,7 +105,7 @@ func_decl_chr <- function( "{\n", rel_try_chr, " # dplyr forward\n", - " check_funneled(", arg_1, ", duckplyr_error)\n" + " check_prudence(", arg_1, ", duckplyr_error)\n" ) new_code_chr <- sub("[{]", new_code_chr_sub, new_code_chr) diff --git a/vignettes/developers.Rmd b/vignettes/developers.Rmd index 55e022c12..7b922c3fc 100644 --- a/vignettes/developers.Rmd +++ b/vignettes/developers.Rmd @@ -117,15 +117,15 @@ The recipe below shows how to achieve this with the usethis package. Learn more about usethis at . -## Funneling +## Prudence -The default mode for `as_duckdb_tibble()` and `duckdb_tibble()` is unfunneled. +The default mode for `as_duckdb_tibble()` and `duckdb_tibble()` is lavish. This means that the dplyr operations are carried out by DuckDB when possible, and also available as data frames upon first request. -Use `as_duckdb_tibble(funnel = "closed")` or `duckdb_tibble(.funnel = "closed")` to avoid materializing intermediate data and to ensure that all operations are carried out by DuckDB or fail. -Funneling can also limit the number of rows or cells that are materialized: +Use `as_duckdb_tibble(collect = "always_manual")` or `duckdb_tibble(.collect = "always_manual")` to avoid materializing intermediate data and to ensure that all operations are carried out by DuckDB or fail. +Prudence can also limit the number of rows or cells that are materialized: ```{r} -data <- duckplyr::duckdb_tibble(x = 1:5, .funnel = c(rows = 3)) +data <- duckplyr::duckdb_tibble(x = 1:5, .collect = c(rows = 3)) data ``` @@ -133,4 +133,4 @@ data nrow(data) ``` -Learn more about funneling in `vignette("funnel")`, and about the translation employed by duckplyr in `vignette("limits")`. +Learn more about prudence in `vignette("collect")`, and about the translation employed by duckplyr in `vignette("limits")`. diff --git a/vignettes/funnel.Rmd b/vignettes/funnel.Rmd index 79b6b1439..bdec611ed 100644 --- a/vignettes/funnel.Rmd +++ b/vignettes/funnel.Rmd @@ -1,8 +1,8 @@ --- -title: "Memory protection: Funneling" +title: "Memory protection: Prudence" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{10 Memory protection: Funneling} + %\VignetteIndexEntry{10 Memory protection: Prudence} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- @@ -30,7 +30,7 @@ knitr::opts_chunk$set( Sys.setenv(DUCKPLYR_FALLBACK_COLLECT = 0) ``` -This vignette discusses eager and lazy computation, and funneling. +This vignette discusses eager and lazy computation, and prudence. ```{r attach} library(conflicted) @@ -55,7 +55,7 @@ nrow(df) ``` Under the hood, two key differences provide improved performance and usability: -lazy materialization and funneling. +lazy materialization and prudence. ## Eager and lazy computation @@ -131,50 +131,50 @@ system.time( ``` -## Funneling +## Prudence Being both "eager" and "lazy" at the same time introduces a challenge: it is too easy to accidentally trigger computation, which may be prohibitive if an intermediate result is too large. -This is where funneling comes in. +This is where prudence comes in. ### Concept -For unfunneled duckplyr frames, as in the two previous examples the underlying DuckDB computation is carried out upon the first request. +For lavish duckplyr frames, as in the two previous examples the underlying DuckDB computation is carried out upon the first request. Once the results are computed, they are cached and subsequent requests are fast. This is a good choice for small to medium-sized data, where DuckDB can provide a nice speedup but materializing the data is affordable at any stage. This is the default for `duckdb_tibble()` and `as_duckdb_tibble()`. -For funneled duckplyr frames, accessing a column or requesting the number of rows triggers an error. +For frugal duckplyr frames, accessing a column or requesting the number of rows triggers an error. This is a good choice for large data sets where the cost of materializing the data may be prohibitive due to size or computation time, and the user wants to control when the computation is carried out. ### Example -The example below demonstrates the use of funneled duckplyr frames. +The example below demonstrates the use of frugal duckplyr frames. ```{r} -flights_funneled <- +flights_frugal <- flights |> - duckplyr::as_duckdb_tibble(funnel = "closed") + duckplyr::as_duckdb_tibble(collect = "always_manual") ``` -In this example, `flights_funneled` is a funneled duckplyr frame. +In this example, `flights_frugal` is a frugal duckplyr frame. The data can be displayed, and column names and types can be accessed. ```{r} -flights_funneled -names(flights_funneled)[1:10] -class(flights_funneled) -class(flights_funneled[[1]]) +flights_frugal +names(flights_frugal)[1:10] +class(flights_frugal) +class(flights_frugal[[1]]) ``` On the other hand, accessing a column or requesting the number of rows triggers an error: ```{r error = TRUE} -nrow(flights_funneled) -flights_funneled[[1]] +nrow(flights_frugal) +flights_frugal[[1]] ``` @@ -182,20 +182,20 @@ flights_funneled[[1]] For operations not supported by duckplyr, the original dplyr implementation is used as a fallback. As the original dplyr implementation accesses columns directly, the data must be materialized before a fallback can be executed. -Therefore, funneled frames allow you to check that all operations are supported by DuckDB: for a funneled frame, fallbacks to dplyr are not possible. +Therefore, frugal frames allow you to check that all operations are supported by DuckDB: for a frugal frame, fallbacks to dplyr are not possible. ```{r error = TRUE} -flights_funneled |> +flights_frugal |> group_by(origin) |> summarize(n = n()) |> ungroup() ``` -The same pipeline with an unfunneled frame works, but the computation is carried out by dplyr: +The same pipeline with an lavish frame works, but the computation is carried out by dplyr: ```{r} -flights_funneled |> - duckplyr::as_duckdb_tibble(funnel = "open") |> +flights_frugal |> + duckplyr::as_duckdb_tibble(collect = "any_size") |> group_by(origin) |> summarize(n = n()) |> ungroup() @@ -205,18 +205,18 @@ By using operations supported by duckplyr and avoiding fallbacks as much as poss See `?fallback` for details on fallbacks, and `vignette("limits")` for the operations supported by duckplyr. -### Unfunneling +### Unprudence -A funneled duckplyr frame can be converted to an unfunneled one with `as_duckdb_tibble(funnel = "open")`. +A frugal duckplyr frame can be converted to an lavish one with `as_duckdb_tibble(collect = "any_size")`. The `collect.duckplyr_df()` method triggers computation and converts to a plain tibble. The difference between the two is the class of the returned object: ```{r} -flights_funneled |> - duckplyr::as_duckdb_tibble(funnel = "open") |> +flights_frugal |> + duckplyr::as_duckdb_tibble(collect = "any_size") |> class() -flights_funneled |> +flights_frugal |> collect() |> class() ``` @@ -224,11 +224,11 @@ flights_funneled |> The same behavior is achieved with `as_tibble()` and `as.data.frame()`: ```{r} -flights_funneled |> +flights_frugal |> as_tibble() |> class() -flights_funneled |> +flights_frugal |> as.data.frame() |> class() ``` @@ -237,21 +237,21 @@ See `vignette("large")` for techniques for working with large data sets. ### Comparison -Funneled duckplyr frames behave like lazy tables in dbplyr and lazy frames in dtplyr: the computation only starts when you *explicitly* request it with `collect.duckplyr_df()` or through other means. -However, funneled duckplyr frames can be unfunneled at any time, and vice versa. -In dtplyr and dbplyr, there are no unfunneled frames: collection always needs to be explicit. +Frugal duckplyr frames behave like lazy tables in dbplyr and lazy frames in dtplyr: the computation only starts when you *explicitly* request it with `collect.duckplyr_df()` or through other means. +However, frugal duckplyr frames can be lavish at any time, and vice versa. +In dtplyr and dbplyr, there are no lavish frames: collection always needs to be explicit. -## Partial funneling +## Partial prudence -Partial funneling is a compromise between funneling and unfunneling. +Partial prudence is a compromise between prudence and unprudence. Materialization is allowed for data up to a certain size, measured in cells (values) and rows in the resulting data frame. ```{r} nrow(flights) flights_partial <- flights |> - duckplyr::as_duckdb_tibble(funnel = c(rows = 100000)) + duckplyr::as_duckdb_tibble(collect = c(rows = 100000)) ``` In this example, the data is materialized only if the result has fewer than 100,000 rows. @@ -271,6 +271,6 @@ flights_partial |> nrow() ``` -Partial funneling is a good choice for data sets where the cost of materializing the data is prohibitive only for large results. +Partial prudence is a good choice for data sets where the cost of materializing the data is prohibitive only for large results. The default for the ingestion functions like `read_parquet_duckdb()` is to limit the result size to one million cells (values in the resulting data frame). See `vignette("large")` for more details on working with large data sets. diff --git a/vignettes/large.Rmd b/vignettes/large.Rmd index c6757a76a..eeb7d23af 100644 --- a/vignettes/large.Rmd +++ b/vignettes/large.Rmd @@ -67,7 +67,7 @@ Full compatibility means fewer surprises and less cognitive load for the user. With DuckDB as the backend, duckplyr can also handle large data sets that do not fit into RAM, keeping full dplyr compatibility. The tools for bringing data into and out of R memory are modeled after the dplyr and dbplyr packages, and are described in the following sections. -See `vignette("funnel")` on eager and lazy data, `vignette("limits")` for limitations in the translation employed by duckplyr, and `?fallback` for more information on fallback. +See `vignette("collect")` on eager and lazy data, `vignette("limits")` for limitations in the translation employed by duckplyr, and `?fallback` for more information on fallback. ## To duckplyr @@ -177,7 +177,7 @@ flights_parquet In all cases, the data is read lazily: only the metadata is read initially, and the data is read as required. This means that data can be read from files that are larger than the available RAM. The Parquet format is particularly efficient for this purpose, as it stores data in a columnar format and allows reading only the columns that are required. -See `vignette("funnel")` for more details on the concept of lazy data. +See `vignette("collect")` for more details on the concept of lazy data. ## From DuckDB @@ -291,4 +291,4 @@ flights_parquet |> group_by(Month) ``` -See `vignette("funnel")` for the concepts and mechanisms at play. +See `vignette("collect")` for the concepts and mechanisms at play.