From 27992b16b898f070e08f6ea5d62eda9a271574be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Thu, 10 Apr 2025 07:42:46 +0200 Subject: [PATCH 1/3] feat: Reuse intermediate materialization results --- R/relational-duckdb.R | 8 ++++++- tests/testthat/_snaps/compute.md | 3 ++- tests/testthat/_snaps/relational-duckdb.md | 11 +++++----- tests/testthat/test-compute.R | 7 +++++- tests/testthat/test-relational-duckdb.R | 25 ++++++++++++++++++++++ 5 files changed, 46 insertions(+), 8 deletions(-) diff --git a/R/relational-duckdb.R b/R/relational-duckdb.R index 063d33dd5..845cbe604 100644 --- a/R/relational-duckdb.R +++ b/R/relational-duckdb.R @@ -99,7 +99,13 @@ duckdb_rel_from_df <- function(df, call = caller_env()) { # FIXME: make generic stopifnot(is.data.frame(df)) - rel <- duckdb$rel_from_altrep_df(df, strict = FALSE, allow_materialized = FALSE) + rel <- duckdb$rel_from_altrep_df( + df, + strict = FALSE, + allow_materialized = FALSE, + wrap = TRUE + ) + if (!is.null(rel)) { # Once we're here, we know it's an ALTREP data frame # We don't get here if it's already materialized diff --git a/tests/testthat/_snaps/compute.md b/tests/testthat/_snaps/compute.md index 87d426ba1..0686838ed 100644 --- a/tests/testthat/_snaps/compute.md +++ b/tests/testthat/_snaps/compute.md @@ -7,7 +7,8 @@ --------------------- --- Relation Tree --- --------------------- - Scan Table [duckplyr_4hYuvhNS26] + AltrepDataFrame [0xdeadbeef] + Scan Table [duckplyr_4hYuvhNS26] --------------------- -- Result Columns -- diff --git a/tests/testthat/_snaps/relational-duckdb.md b/tests/testthat/_snaps/relational-duckdb.md index 62bb68a10..c3c41b559 100644 --- a/tests/testthat/_snaps/relational-duckdb.md +++ b/tests/testthat/_snaps/relational-duckdb.md @@ -42,11 +42,12 @@ --------------------- --- Relation Tree --- --------------------- - Projection [a as a] - Order [___row_number ASC] - Filter [(a = 1.0)] - Projection [a as a, row_number() OVER () as ___row_number] - r_dataframe_scan(0xdeadbeef) + AltrepDataFrame [0xdeadbeef] + Projection [a as a] + Order [___row_number ASC] + Filter [(a = 1.0)] + Projection [a as a, row_number() OVER () as ___row_number] + r_dataframe_scan(0xdeadbeef) --------------------- -- Result Columns -- diff --git a/tests/testthat/test-compute.R b/tests/testthat/test-compute.R index e475d928d..82da399a5 100644 --- a/tests/testthat/test-compute.R +++ b/tests/testthat/test-compute.R @@ -1,9 +1,14 @@ test_that("compute()", { set.seed(20241230) + transform <- function(x) { + x <- gsub("0x[0-9a-f]+", "0xdeadbeef", x) + x + } + df <- duckdb_tibble(x = c(1, 2)) out <- compute(df) - expect_snapshot({ + expect_snapshot(transform = transform, { duckdb_rel_from_df(out) }) diff --git a/tests/testthat/test-relational-duckdb.R b/tests/testthat/test-relational-duckdb.R index 4df078a3a..3864ecf1a 100644 --- a/tests/testthat/test-relational-duckdb.R +++ b/tests/testthat/test-relational-duckdb.R @@ -138,3 +138,28 @@ test_that("duckdb_rel_from_df() uses materialized results", { expect_equal(n_calls, 1) }) + +test_that("duckdb_rel_from_df() uses materialized intermediate results", { + skip_if(identical(Sys.getenv("R_COVR"), "true")) + + withr::local_envvar(DUCKPLYR_OUTPUT_ORDER = FALSE) + + df1 <- duckdb_tibble(a = 1) + df2 <- df1 |> arrange(a) + df3 <- df2 |> mutate(b = 2) + + rel2 <- duckdb:::rel_from_altrep_df(df2, wrap = TRUE) + expect_length(strsplit(duckdb:::rel_tostring(rel2, "tree"), "\n")[[1]], 4) + + rel3 <- duckdb:::rel_from_altrep_df(df3, wrap = TRUE) + expect_length(strsplit(duckdb:::rel_tostring(rel3, "tree"), "\n")[[1]], 6) + + # Side effect: trigger intermediate materialization + nrow(df2) + + # The depth of the rel2 tree is shorter thanks to `wrap = TRUE` + expect_length(strsplit(duckdb:::rel_tostring(rel2, "tree"), "\n")[[1]], 2) + + # The depth of the rel3 tree is shorter now too + expect_length(strsplit(duckdb:::rel_tostring(rel3, "tree"), "\n")[[1]], 4) +}) From eaa39b2f38a13cd453f62eb1cad4dce42ad9d160 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kirill=20M=C3=BCller?= Date: Thu, 24 Apr 2025 06:04:02 +0200 Subject: [PATCH 2/3] TESTS --- reldf.R | 29 +++++++++++++++++++++++++++++ reldf2.R | 20 ++++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 reldf.R create mode 100644 reldf2.R diff --git a/reldf.R b/reldf.R new file mode 100644 index 000000000..c58a0bf7f --- /dev/null +++ b/reldf.R @@ -0,0 +1,29 @@ +pkgload::load_all() + +df1 <- + duckdb_tibble(a = 1, b = 2, c = 3) |> + select(a, b) |> + slice_head(n = 1) + +df2 <- + df1 |> + select(b) + +df2 |> + duckdb$rel_from_altrep_df() + +df2 |> + explain() + +df1$a + +df2 |> + duckdb$rel_from_altrep_df(wrap = TRUE) + +df2 |> + explain() + +df2$b + +df2 |> + duckdb$rel_from_altrep_df(wrap = TRUE) diff --git a/reldf2.R b/reldf2.R new file mode 100644 index 000000000..3cc35d81e --- /dev/null +++ b/reldf2.R @@ -0,0 +1,20 @@ +pkgload::load_all() + +df <- + duckdb_tibble(a = 1, b = 2, c = 3) |> + select(a, b) + +names(df) <- c("c", "d") + +df2 <- + df |> + slice_head(n = 1) |> + select(c) + +df2 |> + duckdb$rel_from_altrep_df() + +df$c + +df2 |> + duckdb$rel_from_altrep_df() From 19a2c5834410cd2b2488927c30a3bc90fad5c7f8 Mon Sep 17 00:00:00 2001 From: krlmlr Date: Thu, 24 Apr 2025 04:07:58 +0000 Subject: [PATCH 3/3] chore: Auto-update from GitHub Actions Run: https://github.com/tidyverse/duckplyr/actions/runs/14633164425 --- man/duckplyr-package.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/duckplyr-package.Rd b/man/duckplyr-package.Rd index 76c0b2938..69de84109 100644 --- a/man/duckplyr-package.Rd +++ b/man/duckplyr-package.Rd @@ -29,7 +29,7 @@ Authors: Other contributors: \itemize{ - \item Posit Software, PBC (03wc8by49) [copyright holder, funder] + \item Posit Software, PBC (\href{https://ror.org/03wc8by49}{ROR}) [copyright holder, funder] } }