Skip to content

Commit 88bd29f

Browse files
committed
feat: Client-side warning on join explosion
1 parent bbfc5aa commit 88bd29f

File tree

11 files changed

+82
-15
lines changed

11 files changed

+82
-15
lines changed

R/dplyr.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,11 @@ rows_check_y_unmatched <- dplyr$rows_check_y_unmatched
8080
rows_df_in_place <- dplyr$rows_df_in_place
8181
rowwise_df <- dplyr$rowwise_df
8282
slice_rows <- dplyr$slice_rows
83+
stop_join <- dplyr$stop_join
8384
summarise_build <- dplyr$summarise_build
8485
summarise_cols <- dplyr$summarise_cols
8586
summarise_deprecate_variable_size <- dplyr$summarise_deprecate_variable_size
8687
the <- dplyr$the
8788
tick_if_needed <- dplyr$tick_if_needed
89+
warn_join <- dplyr$warn_join
8890
warn_join_cross_by <- dplyr$warn_join_cross_by

R/full_join.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ full_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x"
1717
"No implicit cross joins for full_join()" = is_cross_by(by),
1818
"`multiple` not supported" = !identical(multiple, "all"),
1919
{
20-
out <- rel_join_impl(x, y, by, "full", na_matches, suffix, keep, error_call)
20+
out <- rel_join_impl(x, y, by, "full", na_matches, suffix, keep, relationship, error_call)
2121
return(out)
2222
}
2323
)

R/inner_join.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ inner_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x
1919
"`multiple` not supported" = !identical(multiple, "all"),
2020
"`unmatched` not supported" = !identical(unmatched, "drop"),
2121
{
22-
out <- rel_join_impl(x, y, by, "inner", na_matches, suffix, keep, error_call)
22+
out <- rel_join_impl(x, y, by, "inner", na_matches, suffix, keep, relationship, error_call)
2323
return(out)
2424
}
2525
)

R/join.R

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ rel_join_impl <- function(
66
na_matches,
77
suffix = c(".x", ".y"),
88
keep = NULL,
9+
relationship = NULL,
910
error_call = caller_env()
1011
) {
1112
mutating <- !(join %in% c("semi", "anti"))
@@ -25,6 +26,10 @@ rel_join_impl <- function(
2526
by <- as_join_by(by, error_call = error_call)
2627
}
2728

29+
if (mutating) {
30+
check_relationship(relationship, x, y, by, error_call = error_call)
31+
}
32+
2833
x_by <- by$x
2934
y_by <- by$y
3035
x_rel <- duckdb_rel_from_df(x)
@@ -137,3 +142,62 @@ rel_join_impl <- function(
137142

138143
return(out)
139144
}
145+
146+
check_relationship <- function(relationship, x, y, by, error_call) {
147+
if (is_null(relationship)) {
148+
# FIXME: Determine behavior based on option
149+
if (!is_key(x, by$x) && !is_key(y, by$y)) {
150+
warn_join(
151+
message = c(
152+
"Detected an unexpected many-to-many relationship between `x` and `y`.",
153+
i = paste0(
154+
"If a many-to-many relationship is expected, ",
155+
"set `relationship = \"many-to-many\"` to silence this warning."
156+
)
157+
),
158+
class = "dplyr_warning_join_relationship_many_to_many",
159+
call = error_call
160+
)
161+
}
162+
return()
163+
}
164+
165+
if (relationship %in% c("one-to-many", "one-to-one")) {
166+
if (!is_key(x, by$x)) {
167+
stop_join(
168+
message = c(
169+
glue("Each row in `{x_name}` must match at most 1 row in `{y_name}`."),
170+
),
171+
class = paste0("dplyr_error_join_relationship_", gsub("-", "_", relationship)),
172+
call = error_call
173+
)
174+
}
175+
}
176+
177+
if (relationship %in% c("many-to-one", "one-to-one")) {
178+
if (!is_key(y, by$y)) {
179+
stop_join(
180+
message = c(
181+
glue("Each row in `{y_name}` must match at most 1 row in `{x_name}`."),
182+
),
183+
class = paste0("dplyr_error_join_relationship_", gsub("-", "_", relationship)),
184+
call = error_call
185+
)
186+
}
187+
}
188+
}
189+
190+
is_key <- function(x, cols) {
191+
local_options(duckdb.materialize_message = FALSE)
192+
193+
rows <-
194+
x %>%
195+
# FIXME: Why does this materialize
196+
# as_duckplyr_tibble() %>%
197+
summarize(.by = c(!!!syms(cols)), `___n` = n()) %>%
198+
filter(`___n` > 1L) %>%
199+
head(1L) %>%
200+
nrow()
201+
202+
rows == 0
203+
}

R/left_join.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ left_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x"
2020
"`multiple` not supported" = !identical(multiple, "all"),
2121
"`unmatched` not supported" = !identical(unmatched, "drop"),
2222
{
23-
out <- rel_join_impl(x, y, by, "left", na_matches, suffix, keep, error_call)
23+
out <- rel_join_impl(x, y, by, "left", na_matches, suffix, keep, relationship, error_call)
2424
return(out)
2525
}
2626
)

R/right_join.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ right_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x
1919
"`multiple` not supported" = !identical(multiple, "all"),
2020
"`unmatched` not supported" = !identical(unmatched, "drop"),
2121
{
22-
out <- rel_join_impl(x, y, by, "right", na_matches, suffix, keep, error_call)
22+
out <- rel_join_impl(x, y, by, "right", na_matches, suffix, keep, relationship, error_call)
2323
return(out)
2424
}
2525
)

tests/testthat/_snaps/join-rows.md

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -86,12 +86,10 @@
8686
---
8787

8888
Code
89-
left_join(df, df, by = join_by(x))
89+
duckplyr_left_join(df, df, by = join_by(x))
9090
Condition
91-
Warning in `left_join()`:
91+
Warning in `duckplyr_left_join()`:
9292
Detected an unexpected many-to-many relationship between `x` and `y`.
93-
i Row 1 of `x` matches multiple rows in `y`.
94-
i Row 1 of `y` matches multiple rows in `x`.
9593
i If a many-to-many relationship is expected, set `relationship = "many-to-many"` to silence this warning.
9694
Output
9795
x

tests/testthat/_snaps/join.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,15 @@
5050
Error:
5151
! `na_matches` must be one of "na" or "never", not "foo".
5252

53+
# mutating joins trigger many-to-many warning
54+
55+
Code
56+
out <- duckplyr_left_join(df, df, join_by(x))
57+
Condition
58+
Warning in `duckplyr_left_join()`:
59+
Detected an unexpected many-to-many relationship between `x` and `y`.
60+
i If a many-to-many relationship is expected, set `relationship = "many-to-many"` to silence this warning.
61+
5362
# mutating joins compute common columns
5463

5564
Code

tests/testthat/test-join-rows.R

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,6 @@ test_that("join_rows() gives meaningful many-to-one errors", {
190190
})
191191

192192
test_that("join_rows() gives meaningful many-to-many warnings", {
193-
skip("TODO duckdb")
194193
expect_snapshot({
195194
join_rows(c(1, 1), c(1, 1))
196195
})

tests/testthat/test-join.R

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,6 @@ test_that("join_filter() validates arguments", {
353353
})
354354

355355
test_that("mutating joins trigger many-to-many warning", {
356-
skip("TODO duckdb")
357356
df <- tibble(x = c(1, 1))
358357
expect_snapshot(out <- duckplyr_left_join(df, df, join_by(x)))
359358
})

tools/00-funs.R

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -235,14 +235,10 @@ duckplyr_tests <- head(n = -1, list(
235235
NULL
236236
),
237237
"test-join-rows.R" = c(
238-
"join_rows() gives meaningful many-to-many warnings",
239238
NULL
240239
),
241240
"test-join.R" = c(
242-
"mutating joins trigger multiple match warning",
243-
"mutating joins don't trigger multiple match warning when called indirectly",
244-
245-
"mutating joins trigger many-to-many warning",
241+
# FIXME: How to detect an indirect call?
246242
"mutating joins don't trigger many-to-many warning when called indirectly",
247243
NULL
248244
),

0 commit comments

Comments
 (0)