Skip to content

feat: Reuse intermediate materialization results #686

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion R/relational-duckdb.R
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,13 @@ duckdb_rel_from_df <- function(df, call = caller_env()) {
# FIXME: make generic
stopifnot(is.data.frame(df))

rel <- duckdb$rel_from_altrep_df(df, strict = FALSE, allow_materialized = FALSE)
rel <- duckdb$rel_from_altrep_df(
df,
strict = FALSE,
allow_materialized = FALSE,
wrap = TRUE
)

if (!is.null(rel)) {
# Once we're here, we know it's an ALTREP data frame
# We don't get here if it's already materialized
Expand Down
2 changes: 1 addition & 1 deletion man/duckplyr-package.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

29 changes: 29 additions & 0 deletions reldf.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
pkgload::load_all()

df1 <-
duckdb_tibble(a = 1, b = 2, c = 3) |>
select(a, b) |>
slice_head(n = 1)

df2 <-
df1 |>
select(b)

df2 |>
duckdb$rel_from_altrep_df()

df2 |>
explain()

df1$a

df2 |>
duckdb$rel_from_altrep_df(wrap = TRUE)

df2 |>
explain()

df2$b

df2 |>
duckdb$rel_from_altrep_df(wrap = TRUE)
20 changes: 20 additions & 0 deletions reldf2.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
pkgload::load_all()

df <-
duckdb_tibble(a = 1, b = 2, c = 3) |>
select(a, b)

names(df) <- c("c", "d")

df2 <-
df |>
slice_head(n = 1) |>
select(c)

df2 |>
duckdb$rel_from_altrep_df()

df$c

df2 |>
duckdb$rel_from_altrep_df()
3 changes: 2 additions & 1 deletion tests/testthat/_snaps/compute.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
---------------------
--- Relation Tree ---
---------------------
Scan Table [duckplyr_4hYuvhNS26]
AltrepDataFrame [0xdeadbeef]
Scan Table [duckplyr_4hYuvhNS26]

---------------------
-- Result Columns --
Expand Down
11 changes: 6 additions & 5 deletions tests/testthat/_snaps/relational-duckdb.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,12 @@
---------------------
--- Relation Tree ---
---------------------
Projection [a as a]
Order [___row_number ASC]
Filter [(a = 1.0)]
Projection [a as a, row_number() OVER () as ___row_number]
r_dataframe_scan(0xdeadbeef)
AltrepDataFrame [0xdeadbeef]
Projection [a as a]
Order [___row_number ASC]
Filter [(a = 1.0)]
Projection [a as a, row_number() OVER () as ___row_number]
r_dataframe_scan(0xdeadbeef)

---------------------
-- Result Columns --
Expand Down
7 changes: 6 additions & 1 deletion tests/testthat/test-compute.R
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
test_that("compute()", {
set.seed(20241230)

transform <- function(x) {
x <- gsub("0x[0-9a-f]+", "0xdeadbeef", x)
x
}

df <- duckdb_tibble(x = c(1, 2))
out <- compute(df)
expect_snapshot({
expect_snapshot(transform = transform, {
duckdb_rel_from_df(out)
})

Expand Down
25 changes: 25 additions & 0 deletions tests/testthat/test-relational-duckdb.R
Original file line number Diff line number Diff line change
Expand Up @@ -138,3 +138,28 @@ test_that("duckdb_rel_from_df() uses materialized results", {

expect_equal(n_calls, 1)
})

test_that("duckdb_rel_from_df() uses materialized intermediate results", {
skip_if(identical(Sys.getenv("R_COVR"), "true"))

withr::local_envvar(DUCKPLYR_OUTPUT_ORDER = FALSE)

df1 <- duckdb_tibble(a = 1)
df2 <- df1 |> arrange(a)
df3 <- df2 |> mutate(b = 2)

rel2 <- duckdb:::rel_from_altrep_df(df2, wrap = TRUE)
expect_length(strsplit(duckdb:::rel_tostring(rel2, "tree"), "\n")[[1]], 4)

rel3 <- duckdb:::rel_from_altrep_df(df3, wrap = TRUE)
expect_length(strsplit(duckdb:::rel_tostring(rel3, "tree"), "\n")[[1]], 6)

# Side effect: trigger intermediate materialization
nrow(df2)

# The depth of the rel2 tree is shorter thanks to `wrap = TRUE`
expect_length(strsplit(duckdb:::rel_tostring(rel2, "tree"), "\n")[[1]], 2)

# The depth of the rel3 tree is shorter now too
expect_length(strsplit(duckdb:::rel_tostring(rel3, "tree"), "\n")[[1]], 4)
})