Skip to content

Commit 5be75f1

Browse files
committed
feat: Client-side warning on join explosion
1 parent e394d80 commit 5be75f1

File tree

11 files changed

+113
-11
lines changed

11 files changed

+113
-11
lines changed

R/dplyr.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,11 @@ rows_check_y_unmatched <- dplyr$rows_check_y_unmatched
8080
rows_df_in_place <- dplyr$rows_df_in_place
8181
rowwise_df <- dplyr$rowwise_df
8282
slice_rows <- dplyr$slice_rows
83+
stop_join <- dplyr$stop_join
8384
summarise_build <- dplyr$summarise_build
8485
summarise_cols <- dplyr$summarise_cols
8586
summarise_deprecate_variable_size <- dplyr$summarise_deprecate_variable_size
8687
the <- dplyr$the
8788
tick_if_needed <- dplyr$tick_if_needed
89+
warn_join <- dplyr$warn_join
8890
warn_join_cross_by <- dplyr$warn_join_cross_by

R/full_join.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ full_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x"
1717
"No implicit cross joins for {.code full_join()}" = is_cross_by(by),
1818
"{.arg multiple} not supported" = !identical(multiple, "all"),
1919
{
20-
out <- rel_join_impl(x, y, by, "full", na_matches, suffix, keep, error_call)
20+
out <- rel_join_impl(x, y, by, "full", na_matches, suffix, keep, relationship, error_call)
2121
return(out)
2222
}
2323
)

R/inner_join.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ inner_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x
1919
"{.arg multiple} not supported" = !identical(multiple, "all"),
2020
"{.arg unmatched} not supported" = !identical(unmatched, "drop"),
2121
{
22-
out <- rel_join_impl(x, y, by, "inner", na_matches, suffix, keep, error_call)
22+
out <- rel_join_impl(x, y, by, "inner", na_matches, suffix, keep, relationship, error_call)
2323
return(out)
2424
}
2525
)

R/join.R

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ rel_join_impl <- function(
66
na_matches,
77
suffix = c(".x", ".y"),
88
keep = NULL,
9+
relationship = NULL,
910
error_call = caller_env()
1011
) {
1112
mutating <- !(join %in% c("semi", "anti"))
@@ -25,6 +26,10 @@ rel_join_impl <- function(
2526
by <- as_join_by(by, error_call = error_call)
2627
}
2728

29+
if (mutating) {
30+
check_relationship(relationship, x, y, by, error_call = error_call)
31+
}
32+
2833
x_by <- by$x
2934
y_by <- by$y
3035
x_rel <- duckdb_rel_from_df(x)
@@ -136,3 +141,62 @@ rel_join_impl <- function(
136141

137142
return(out)
138143
}
144+
145+
check_relationship <- function(relationship, x, y, by, error_call) {
146+
if (is_null(relationship)) {
147+
# FIXME: Determine behavior based on option
148+
if (!is_key(x, by$x) && !is_key(y, by$y)) {
149+
warn_join(
150+
message = c(
151+
"Detected an unexpected many-to-many relationship between `x` and `y`.",
152+
i = paste0(
153+
"If a many-to-many relationship is expected, ",
154+
"set `relationship = \"many-to-many\"` to silence this warning."
155+
)
156+
),
157+
class = "dplyr_warning_join_relationship_many_to_many",
158+
call = error_call
159+
)
160+
}
161+
return()
162+
}
163+
164+
if (relationship %in% c("one-to-many", "one-to-one")) {
165+
if (!is_key(x, by$x)) {
166+
stop_join(
167+
message = c(
168+
glue("Each row in `{x_name}` must match at most 1 row in `{y_name}`."),
169+
),
170+
class = paste0("dplyr_error_join_relationship_", gsub("-", "_", relationship)),
171+
call = error_call
172+
)
173+
}
174+
}
175+
176+
if (relationship %in% c("many-to-one", "one-to-one")) {
177+
if (!is_key(y, by$y)) {
178+
stop_join(
179+
message = c(
180+
glue("Each row in `{y_name}` must match at most 1 row in `{x_name}`."),
181+
),
182+
class = paste0("dplyr_error_join_relationship_", gsub("-", "_", relationship)),
183+
call = error_call
184+
)
185+
}
186+
}
187+
}
188+
189+
is_key <- function(x, cols) {
190+
local_options(duckdb.materialize_message = FALSE)
191+
192+
rows <-
193+
x %>%
194+
# FIXME: Why does this materialize
195+
# as_duckplyr_tibble() %>%
196+
summarize(.by = c(!!!syms(cols)), `___n` = n()) %>%
197+
filter(`___n` > 1L) %>%
198+
head(1L) %>%
199+
nrow()
200+
201+
rows == 0
202+
}

R/left_join.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ left_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x"
2020
"{.arg multiple} not supported" = !identical(multiple, "all"),
2121
"{.arg unmatched} not supported" = !identical(unmatched, "drop"),
2222
{
23-
out <- rel_join_impl(x, y, by, "left", na_matches, suffix, keep, error_call)
23+
out <- rel_join_impl(x, y, by, "left", na_matches, suffix, keep, relationship, error_call)
2424
return(out)
2525
}
2626
)

R/right_join.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ right_join.duckplyr_df <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x
1919
"{.arg multiple} not supported" = !identical(multiple, "all"),
2020
"{.arg unmatched} not supported" = !identical(unmatched, "drop"),
2121
{
22-
out <- rel_join_impl(x, y, by, "right", na_matches, suffix, keep, error_call)
22+
out <- rel_join_impl(x, y, by, "right", na_matches, suffix, keep, relationship, error_call)
2323
return(out)
2424
}
2525
)

tests/testthat/_snaps/dplyr-join-rows.md

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,39 @@
6565
! Each row in `x` must match at most 1 row in `y`.
6666
i Row 1 of `x` matches multiple rows in `y`.
6767

68+
# join_rows() gives meaningful many-to-many warnings
69+
70+
Code
71+
join_rows(c(1, 1), c(1, 1))
72+
Condition
73+
Warning:
74+
Detected an unexpected many-to-many relationship between `x` and `y`.
75+
i Row 1 of `x` matches multiple rows in `y`.
76+
i Row 1 of `y` matches multiple rows in `x`.
77+
i If a many-to-many relationship is expected, set `relationship = "many-to-many"` to silence this warning.
78+
Output
79+
$x
80+
[1] 1 1 2 2
81+
82+
$y
83+
[1] 1 2 1 2
84+
85+
86+
---
87+
88+
Code
89+
duckplyr_left_join(df, df, by = join_by(x))
90+
Condition
91+
Warning in `duckplyr_left_join()`:
92+
Detected an unexpected many-to-many relationship between `x` and `y`.
93+
i If a many-to-many relationship is expected, set `relationship = "many-to-many"` to silence this warning.
94+
Output
95+
x
96+
1 1
97+
2 1
98+
3 1
99+
4 1
100+
68101
# join_rows() gives meaningful error message on unmatched rows
69102

70103
Code

tests/testthat/_snaps/dplyr-join.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,15 @@
5050
Error:
5151
! `na_matches` must be one of "na" or "never", not "foo".
5252

53+
# mutating joins trigger many-to-many warning
54+
55+
Code
56+
out <- duckplyr_left_join(df, df, join_by(x))
57+
Condition
58+
Warning in `duckplyr_left_join()`:
59+
Detected an unexpected many-to-many relationship between `x` and `y`.
60+
i If a many-to-many relationship is expected, set `relationship = "many-to-many"` to silence this warning.
61+
5362
# mutating joins compute common columns
5463

5564
Code

tests/testthat/test-dplyr-join-rows.R

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,6 @@ test_that("join_rows() gives meaningful many-to-one errors", {
190190
})
191191

192192
test_that("join_rows() gives meaningful many-to-many warnings", {
193-
skip("TODO duckdb")
194193
expect_snapshot({
195194
join_rows(c(1, 1), c(1, 1))
196195
})

tests/testthat/test-dplyr-join.R

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,6 @@ test_that("join_filter() validates arguments", {
353353
})
354354

355355
test_that("mutating joins trigger many-to-many warning", {
356-
skip("TODO duckdb")
357356
df <- tibble(x = c(1, 1))
358357
expect_snapshot(out <- duckplyr_left_join(df, df, join_by(x)))
359358
})

tools/00-funs.R

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -237,14 +237,10 @@ duckplyr_tests <- head(n = -1, list(
237237
NULL
238238
),
239239
"test-join-rows.R" = c(
240-
"join_rows() gives meaningful many-to-many warnings",
241240
NULL
242241
),
243242
"test-join.R" = c(
244-
"mutating joins trigger multiple match warning",
245-
"mutating joins don't trigger multiple match warning when called indirectly",
246-
247-
"mutating joins trigger many-to-many warning",
243+
# FIXME: How to detect an indirect call?
248244
"mutating joins don't trigger many-to-many warning when called indirectly",
249245
NULL
250246
),

0 commit comments

Comments
 (0)