From 64f0d73035d359f7814598883291ff46798310f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Wed, 29 Jan 2025 21:01:05 +0100 Subject: [PATCH] open/closed --- R/ducktbl.R | 22 +++++++++++----------- R/funnel.R | 4 ++-- R/io2.R | 8 ++++---- R/relational.R | 2 +- R/sql.R | 4 ++-- man/compute.duckplyr_df.Rd | 4 ++-- man/compute_file.Rd | 4 ++-- man/duckdb_tibble.Rd | 10 +++++----- man/read_file_duckdb.Rd | 4 ++-- tests/testthat/test-compute_file.R | 2 +- tests/testthat/test-ducktbl.R | 12 ++++++------ tests/testthat/test-funnel.R | 6 +++--- tests/testthat/test-sql.R | 2 +- vignettes/developers.Rmd | 2 +- vignettes/funnel.Rmd | 8 ++++---- 15 files changed, 47 insertions(+), 47 deletions(-) diff --git a/R/ducktbl.R b/R/ducktbl.R index 94221440f..4d36523a8 100644 --- a/R/ducktbl.R +++ b/R/ducktbl.R @@ -57,14 +57,14 @@ #' In dtplyr and dbplyr, there are no lavish frames: collection always needs to be #' explicit. #' -#' A frugal duckplyr frame can be converted to an lavish one with `as_duckdb_tibble(collect = "open")`. +#' A frugal duckplyr frame can be converted to an lavish one with `as_duckdb_tibble(collect = "any_size")`. #' The [collect.duckplyr_df()] method triggers computation and converts to a plain tibble. #' Other useful methods include [compute_file()] for storing results in a file, #' and [compute.duckplyr_df()] for storing results in temporary storage on disk. #' #' Beyond safety regarding memory usage, frugal frames also allow you #' to check that all operations are supported by DuckDB: -#' for a frugal frame with `collect = "closed"`, fallbacks to dplyr are not possible. +#' for a frugal frame with `collect = "always_manual"`, fallbacks to dplyr are not possible. #' As a reminder, computing via DuckDB is currently not always possible, #' see `vignette("limits")` for the supported operations. #' In such cases, the original dplyr implementation is used, see [fallback] for details. @@ -108,12 +108,12 @@ #' #' x$a #' -#' y <- duckdb_tibble(a = 1, .collect = "closed") +#' y <- duckdb_tibble(a = 1, .collect = "always_manual") #' y #' try(length(y$a)) #' length(collect(y)$a) #' @export -duckdb_tibble <- function(..., .collect = "open") { +duckdb_tibble <- function(..., .collect = "any_size") { out <- tibble::tibble(...) as_duckdb_tibble(out, collect = .collect) } @@ -126,7 +126,7 @@ duckdb_tibble <- function(..., .collect = "open") { #' @param x The object to convert or to test. #' @rdname duckdb_tibble #' @export -as_duckdb_tibble <- function(x, ..., collect = "open") { +as_duckdb_tibble <- function(x, ..., collect = "any_size") { # Handle the collect arg in the generic, only the other args will be dispatched as_duckdb_tibble <- function(x, ...) { UseMethod("as_duckdb_tibble") @@ -135,7 +135,7 @@ as_duckdb_tibble <- function(x, ..., collect = "open") { collect_parsed <- collect_parse(collect) out <- as_duckdb_tibble(x, ...) - if (collect_parsed$collect == "closed") { + if (collect_parsed$collect == "always_manual") { as_frugal_duckplyr_df( out, collect_parsed$allow_materialization, @@ -173,11 +173,11 @@ collect_parse <- function(collect, call = caller_env()) { } } allow_materialization <- is.finite(n_rows) || is.finite(n_cells) - collect <- "closed" + collect <- "always_manual" } else if (!is.character(collect)) { cli::cli_abort("{.arg collect} must be an unnamed character vector or a named numeric vector", call = call) } else { - allow_materialization <- !identical(collect, "closed") + allow_materialization <- !identical(collect, "always_manual") } list( @@ -195,7 +195,7 @@ as_duckdb_tibble.tbl_duckdb_connection <- function(x, ...) { con <- dbplyr::remote_con(x) sql <- dbplyr::remote_query(x) - read_sql_duckdb(sql, collect = "closed", con = con) + read_sql_duckdb(sql, collect = "always_manual", con = con) } #' @export @@ -272,7 +272,7 @@ is_duckdb_tibble <- function(x) { #' @param collect Only adds the class, does not recreate the relation object! #' @noRd -new_duckdb_tibble <- function(x, class = NULL, collect = "open", error_call = caller_env()) { +new_duckdb_tibble <- function(x, class = NULL, collect = "any_size", error_call = caller_env()) { if (is.null(class)) { class <- c("tbl_df", "tbl", "data.frame") } @@ -284,7 +284,7 @@ new_duckdb_tibble <- function(x, class = NULL, collect = "open", error_call = ca } class(x) <- unique(c( - if (!identical(collect, "open")) "frugal_duckplyr_df", + if (!identical(collect, "any_size")) "frugal_duckplyr_df", "duckplyr_df", class )) diff --git a/R/funnel.R b/R/funnel.R index 50e7f20b5..ea8ebd94a 100644 --- a/R/funnel.R +++ b/R/funnel.R @@ -43,12 +43,12 @@ is_frugal_duckplyr_df <- function(x) { get_collect_duckplyr_df <- function(x) { if (!is_frugal_duckplyr_df(x)) { - return("open") + return("any_size") } collect <- attr(x, "collect") if (is.null(collect)) { - return("closed") + return("always_manual") } collect diff --git a/R/io2.R b/R/io2.R index 4b82297ba..8dcddcf04 100644 --- a/R/io2.R +++ b/R/io2.R @@ -41,8 +41,8 @@ read_parquet_duckdb <- function(path, ..., collect = c(cells = 1e6), options = l #' # Materialize explicitly #' collect(df)$a #' -#' # Automatic materialization with collect = "open" -#' df <- read_csv_duckdb(path, collect = "open") +#' # Automatic materialization with collect = "any_size" +#' df <- read_csv_duckdb(path, collect = "any_size") #' df$a #' #' # Specify column types @@ -150,9 +150,9 @@ duckfun <- function(table_function, args, ..., collect) { # Start with collect, to avoid unwanted materialization df <- duckdb$rel_to_altrep(rel, allow_materialization = FALSE) - out <- new_duckdb_tibble(df, collect = "closed") + out <- new_duckdb_tibble(df, collect = "always_manual") - if (!identical(collect, "closed")) { + if (!identical(collect, "always_manual")) { out <- as_duckdb_tibble(out, collect = collect) } diff --git a/R/relational.R b/R/relational.R index c6307c800..9835d0f50 100644 --- a/R/relational.R +++ b/R/relational.R @@ -131,7 +131,7 @@ check_prudence <- function(x, duckplyr_error, call = caller_env()) { cli::cli_abort(parent = duckplyr_error_parent, call = call, c( "This operation cannot be carried out by DuckDB, and the input is a frugal duckplyr frame.", "*" = duckplyr_error_msg, - "i" = 'Use {.code compute(collect = "open")} to materialize to temporary storage and continue with {.pkg duckplyr}.', + "i" = 'Use {.code compute(collect = "any_size")} to materialize to temporary storage and continue with {.pkg duckplyr}.', "i" = 'See {.run vignette("collect")} for other options.' )) } diff --git a/R/sql.R b/R/sql.R index 7b21efab7..f31c2923b 100644 --- a/R/sql.R +++ b/R/sql.R @@ -33,9 +33,9 @@ read_sql_duckdb <- function(sql, ..., collect = c(cells = 1e6), con = NULL) { meta_rel_register(rel, expr(duckdb$rel_from_sql(con, !!sql))) df <- duckdb$rel_to_altrep(rel, allow_materialization = FALSE) - out <- new_duckdb_tibble(df, collect = "closed") + out <- new_duckdb_tibble(df, collect = "always_manual") - if (!identical(collect, "closed")) { + if (!identical(collect, "always_manual")) { out <- as_duckdb_tibble(out, collect = collect) } diff --git a/man/compute.duckplyr_df.Rd b/man/compute.duckplyr_df.Rd index 418f4a2f8..4f2be8414 100644 --- a/man/compute.duckplyr_df.Rd +++ b/man/compute.duckplyr_df.Rd @@ -102,14 +102,14 @@ function. In dtplyr and dbplyr, there are no lavish frames: collection always needs to be explicit. -A frugal duckplyr frame can be converted to an lavish one with \code{as_duckdb_tibble(collect = "open")}. +A frugal duckplyr frame can be converted to an lavish one with \code{as_duckdb_tibble(collect = "any_size")}. The \code{\link[=collect.duckplyr_df]{collect.duckplyr_df()}} method triggers computation and converts to a plain tibble. Other useful methods include \code{\link[=compute_file]{compute_file()}} for storing results in a file, and \code{\link[=compute.duckplyr_df]{compute.duckplyr_df()}} for storing results in temporary storage on disk. Beyond safety regarding memory usage, frugal frames also allow you to check that all operations are supported by DuckDB: -for a frugal frame with \code{collect = "closed"}, fallbacks to dplyr are not possible. +for a frugal frame with \code{collect = "always_manual"}, fallbacks to dplyr are not possible. As a reminder, computing via DuckDB is currently not always possible, see \code{vignette("limits")} for the supported operations. In such cases, the original dplyr implementation is used, see \link{fallback} for details. diff --git a/man/compute_file.Rd b/man/compute_file.Rd index 1fe2b1911..77294880f 100644 --- a/man/compute_file.Rd +++ b/man/compute_file.Rd @@ -102,14 +102,14 @@ function. In dtplyr and dbplyr, there are no lavish frames: collection always needs to be explicit. -A frugal duckplyr frame can be converted to an lavish one with \code{as_duckdb_tibble(collect = "open")}. +A frugal duckplyr frame can be converted to an lavish one with \code{as_duckdb_tibble(collect = "any_size")}. The \code{\link[=collect.duckplyr_df]{collect.duckplyr_df()}} method triggers computation and converts to a plain tibble. Other useful methods include \code{\link[=compute_file]{compute_file()}} for storing results in a file, and \code{\link[=compute.duckplyr_df]{compute.duckplyr_df()}} for storing results in temporary storage on disk. Beyond safety regarding memory usage, frugal frames also allow you to check that all operations are supported by DuckDB: -for a frugal frame with \code{collect = "closed"}, fallbacks to dplyr are not possible. +for a frugal frame with \code{collect = "always_manual"}, fallbacks to dplyr are not possible. As a reminder, computing via DuckDB is currently not always possible, see \code{vignette("limits")} for the supported operations. In such cases, the original dplyr implementation is used, see \link{fallback} for details. diff --git a/man/duckdb_tibble.Rd b/man/duckdb_tibble.Rd index 6c233aa4d..1852cfffd 100644 --- a/man/duckdb_tibble.Rd +++ b/man/duckdb_tibble.Rd @@ -6,9 +6,9 @@ \alias{is_duckdb_tibble} \title{duckplyr data frames} \usage{ -duckdb_tibble(..., .collect = "open") +duckdb_tibble(..., .collect = "any_size") -as_duckdb_tibble(x, ..., collect = "open") +as_duckdb_tibble(x, ..., collect = "any_size") is_duckdb_tibble(x) } @@ -112,14 +112,14 @@ function. In dtplyr and dbplyr, there are no lavish frames: collection always needs to be explicit. -A frugal duckplyr frame can be converted to an lavish one with \code{as_duckdb_tibble(collect = "open")}. +A frugal duckplyr frame can be converted to an lavish one with \code{as_duckdb_tibble(collect = "any_size")}. The \code{\link[=collect.duckplyr_df]{collect.duckplyr_df()}} method triggers computation and converts to a plain tibble. Other useful methods include \code{\link[=compute_file]{compute_file()}} for storing results in a file, and \code{\link[=compute.duckplyr_df]{compute.duckplyr_df()}} for storing results in temporary storage on disk. Beyond safety regarding memory usage, frugal frames also allow you to check that all operations are supported by DuckDB: -for a frugal frame with \code{collect = "closed"}, fallbacks to dplyr are not possible. +for a frugal frame with \code{collect = "always_manual"}, fallbacks to dplyr are not possible. As a reminder, computing via DuckDB is currently not always possible, see \code{vignette("limits")} for the supported operations. In such cases, the original dplyr implementation is used, see \link{fallback} for details. @@ -139,7 +139,7 @@ x \%>\% x$a -y <- duckdb_tibble(a = 1, .collect = "closed") +y <- duckdb_tibble(a = 1, .collect = "always_manual") y try(length(y$a)) length(collect(y)$a) diff --git a/man/read_file_duckdb.Rd b/man/read_file_duckdb.Rd index 372e6dabb..79aeef5c4 100644 --- a/man/read_file_duckdb.Rd +++ b/man/read_file_duckdb.Rd @@ -78,8 +78,8 @@ try(print(df$a)) # Materialize explicitly collect(df)$a -# Automatic materialization with collect = "open" -df <- read_csv_duckdb(path, collect = "open") +# Automatic materialization with collect = "any_size" +df <- read_csv_duckdb(path, collect = "any_size") df$a # Specify column types diff --git a/tests/testthat/test-compute_file.R b/tests/testthat/test-compute_file.R index 69b3f12f4..62427ef64 100644 --- a/tests/testthat/test-compute_file.R +++ b/tests/testthat/test-compute_file.R @@ -29,7 +29,7 @@ test_that("compute_csv()", { test_that("compute_csv() collect", { df <- data.frame(x = c(1, 2)) withr::defer(unlink("test.csv")) - out <- compute_csv(df, path = "test.csv", collect = "closed") + out <- compute_csv(df, path = "test.csv", collect = "always_manual") expect_true(is_frugal_duckplyr_df(out)) expect_identical(collect(out), as_tibble(df)) diff --git a/tests/testthat/test-ducktbl.R b/tests/testthat/test-ducktbl.R index 7365c1235..7d25532b6 100644 --- a/tests/testthat/test-ducktbl.R +++ b/tests/testthat/test-ducktbl.R @@ -7,8 +7,8 @@ test_that("Can construct", { expect_identical(duckdb_tibble(a = 1)$a, 1) }) -test_that('.collect = "closed" forbids materialization', { - tbl <- duckdb_tibble(a = 1, .collect = "closed") +test_that('.collect = "always_manual" forbids materialization', { + tbl <- duckdb_tibble(a = 1, .collect = "always_manual") expect_error(length(tbl$a)) }) @@ -22,8 +22,8 @@ test_that('.collect = c(cells = ) forbids materialization', { expect_error(length(tbl$a)) }) -test_that('.collect = "closed" forbids materialization for as_duckdb_tibble', { - tbl <- as_duckdb_tibble(data.frame(a = 1), collect = "closed") +test_that('.collect = "always_manual" forbids materialization for as_duckdb_tibble', { + tbl <- as_duckdb_tibble(data.frame(a = 1), collect = "always_manual") expect_error(length(tbl$a)) }) @@ -64,14 +64,14 @@ test_that("as_duckdb_tibble() and dbplyr tables", { dplyr::copy_to(dest = con) duck <- db_tbl %>% - as_duckdb_tibble(collect = "closed") %>% + as_duckdb_tibble(collect = "always_manual") %>% mutate(b = 2) expect_error(length(duck$b)) db <- db_tbl %>% mutate(b = 2) %>% - as_duckdb_tibble(collect = "closed") + as_duckdb_tibble(collect = "always_manual") expect_error(length(db$b)) diff --git a/tests/testthat/test-funnel.R b/tests/testthat/test-funnel.R index 9f6df2657..7e28ab165 100644 --- a/tests/testthat/test-funnel.R +++ b/tests/testthat/test-funnel.R @@ -1,5 +1,5 @@ test_that("frugal duckplyr frames will collect", { - tbl <- duckdb_tibble(a = 1, .collect = "closed") + tbl <- duckdb_tibble(a = 1, .collect = "always_manual") expect_identical( collect(tbl), tibble(a = 1) @@ -15,7 +15,7 @@ test_that("lavish duckplyr frames are converted to data frames", { }) test_that("frugal duckplyr frames are converted to data frames", { - tbl <- duckdb_tibble(a = 1, .collect = "closed") + tbl <- duckdb_tibble(a = 1, .collect = "always_manual") expect_identical( as.data.frame(tbl), data.frame(a = 1) @@ -31,7 +31,7 @@ test_that("lavish duckplyr frames are converted to tibbles", { }) test_that("frugal duckplyr frames are converted to tibbles", { - tbl <- duckdb_tibble(a = 1, .collect = "closed") + tbl <- duckdb_tibble(a = 1, .collect = "always_manual") expect_identical( as_tibble(tbl), tibble(a = 1) diff --git a/tests/testthat/test-sql.R b/tests/testthat/test-sql.R index 9532e2197..8f5c3ec39 100644 --- a/tests/testthat/test-sql.R +++ b/tests/testthat/test-sql.R @@ -2,7 +2,7 @@ test_that("read_sql_duckdb() works", { con <- withr::local_db_connection(DBI::dbConnect(duckdb::duckdb())) expect_identical( - read_sql_duckdb("SELECT 1 AS a", con = con, collect = "open"), + read_sql_duckdb("SELECT 1 AS a", con = con, collect = "any_size"), duckdb_tibble(a = 1L) ) }) diff --git a/vignettes/developers.Rmd b/vignettes/developers.Rmd index f36b48181..7b922c3fc 100644 --- a/vignettes/developers.Rmd +++ b/vignettes/developers.Rmd @@ -121,7 +121,7 @@ Learn more about usethis at . The default mode for `as_duckdb_tibble()` and `duckdb_tibble()` is lavish. This means that the dplyr operations are carried out by DuckDB when possible, and also available as data frames upon first request. -Use `as_duckdb_tibble(collect = "closed")` or `duckdb_tibble(.collect = "closed")` to avoid materializing intermediate data and to ensure that all operations are carried out by DuckDB or fail. +Use `as_duckdb_tibble(collect = "always_manual")` or `duckdb_tibble(.collect = "always_manual")` to avoid materializing intermediate data and to ensure that all operations are carried out by DuckDB or fail. Prudence can also limit the number of rows or cells that are materialized: ```{r} diff --git a/vignettes/funnel.Rmd b/vignettes/funnel.Rmd index c706b32a3..bdec611ed 100644 --- a/vignettes/funnel.Rmd +++ b/vignettes/funnel.Rmd @@ -157,7 +157,7 @@ The example below demonstrates the use of frugal duckplyr frames. ```{r} flights_frugal <- flights |> - duckplyr::as_duckdb_tibble(collect = "closed") + duckplyr::as_duckdb_tibble(collect = "always_manual") ``` In this example, `flights_frugal` is a frugal duckplyr frame. @@ -195,7 +195,7 @@ The same pipeline with an lavish frame works, but the computation is carried out ```{r} flights_frugal |> - duckplyr::as_duckdb_tibble(collect = "open") |> + duckplyr::as_duckdb_tibble(collect = "any_size") |> group_by(origin) |> summarize(n = n()) |> ungroup() @@ -207,13 +207,13 @@ See `?fallback` for details on fallbacks, and `vignette("limits")` for the opera ### Unprudence -A frugal duckplyr frame can be converted to an lavish one with `as_duckdb_tibble(collect = "open")`. +A frugal duckplyr frame can be converted to an lavish one with `as_duckdb_tibble(collect = "any_size")`. The `collect.duckplyr_df()` method triggers computation and converts to a plain tibble. The difference between the two is the class of the returned object: ```{r} flights_frugal |> - duckplyr::as_duckdb_tibble(collect = "open") |> + duckplyr::as_duckdb_tibble(collect = "any_size") |> class() flights_frugal |>