diff --git a/NEWS.md b/NEWS.md index bd03e2b9d..461d690a5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,11 @@ # tidyr (development version) +* `unnest()` is now much faster (@mgirlich, @DavisVaughan, #1127). + +* `unnest()` no longer allows unnesting a list-col containing a mix of vector + and data frame elements. Previously, this only worked by accident, and is + considered an off-label usage of `unnest()` that has now become an error. + * `unchop()` is now much faster, which propagates through to various functions, such as `unnest()`, `unnest_longer()`, `unnest_wider()`, and `separate_rows()` (@mgirlich, @DavisVaughan, #1127). diff --git a/R/nest.R b/R/nest.R index 286e70be8..ad402576a 100644 --- a/R/nest.R +++ b/R/nest.R @@ -329,20 +329,9 @@ unnest.data.frame <- function( .id = "DEPRECATED", .sep = "DEPRECATED", .preserve = "DEPRECATED") { - cols <- tidyselect::eval_select(enquo(cols), data) - - if (nrow(data) == 0) { - for (col in names(cols)) { - data[[col]] <- as_empty_df(data[[col]], col = col) - } - } else { - for (col in names(cols)) { - data[[col]] <- map(data[[col]], as_df, col = col) - } - } - data <- unchop(data, any_of(cols), keep_empty = keep_empty, ptype = ptype) + cols <- cols[map_lgl(unclass(data)[cols], is.data.frame)] unpack(data, any_of(cols), names_sep = names_sep, names_repair = names_repair) } @@ -370,30 +359,3 @@ unnest.rowwise_df <- function( out } - -# helpers ----------------------------------------------------------------- - -# n cols, n rows -as_df <- function(x, col) { - if (is.null(x)) { - x - } else if (is.data.frame(x)) { - x - } else if (vec_is(x)) { - # Preserves vec_size() invariant - new_data_frame(list(x), names = col) - } else { - stop("Input must be list of vectors", call. = FALSE) - } -} - -as_empty_df <- function(x, col) { - if (is_list_of(x)) { - x - } else if (is.list(x)) { - list_of(.ptype = tibble(!!col := unspecified())) - } else { - stop("Input must be list of vectors", call. = FALSE) - } -} - diff --git a/tests/testthat/_snaps/nest.md b/tests/testthat/_snaps/nest.md index 3a59d6d9b..98a5a181f 100644 --- a/tests/testthat/_snaps/nest.md +++ b/tests/testthat/_snaps/nest.md @@ -1,3 +1,10 @@ +# bad inputs generate errors + + Code + unnest(df, y) + Error + Input must be a vector, not a function. + # multiple columns must be same length Code @@ -12,3 +19,10 @@ Error In row 1, can't recycle input of size 2 to size 3. +# unnesting column of mixed vector / data frame input is an error + + Code + unnest(df, x) + Error + Can't combine `..1` and `..2` . + diff --git a/tests/testthat/test-nest.R b/tests/testthat/test-nest.R index 7b366fee4..40036c696 100644 --- a/tests/testthat/test-nest.R +++ b/tests/testthat/test-nest.R @@ -119,7 +119,7 @@ test_that("empty rows still affect output type", { test_that("bad inputs generate errors", { df <- tibble(x = 1, y = list(mean)) - expect_error(unnest(df, y), "must be list of vectors") + expect_snapshot(error = TRUE, unnest(df, y)) }) test_that("unesting combines augmented vectors", { @@ -204,6 +204,15 @@ test_that("can use non-syntactic names", { expect_named(out, "foo bar") }) +test_that("unpacks df-cols (#1112)", { + df <- tibble(x = 1, y = tibble(a = 1, b = 2)) + expect_identical(unnest(df, y), tibble(x = 1, a = 1, b = 2)) +}) + +test_that("unnesting column of mixed vector / data frame input is an error", { + df <- tibble(x = list(1, tibble(a = 1))) + expect_snapshot(error = TRUE, unnest(df, x)) +}) # other methods ----------------------------------------------------------------- @@ -233,15 +242,27 @@ test_that("can unnest empty data frame", { expect_equal(out, tibble(x = integer(), y = unspecified())) }) +test_that("unnesting bare lists of NULLs is equivalent to unnesting empty lists", { + df <- tibble(x = 1L, y = list(NULL)) + out <- unnest(df, y) + expect_identical(out, tibble(x = integer(), y = unspecified())) +}) + test_that("unnest() preserves ptype", { tbl <- tibble(x = integer(), y = list_of(ptype = tibble(a = integer()))) res <- unnest(tbl, y) expect_equal(res, tibble(x = integer(), a = integer())) }) -test_that("errors on bad inputs", { +test_that("unnesting typed lists of NULLs retains ptype", { + df <- tibble(x = 1L, y = list_of(NULL, .ptype = tibble(a = integer()))) + out <- unnest(df, y) + expect_identical(out, tibble(x = integer(), a = integer())) +}) + +test_that("skips over vector columns", { df <- tibble(x = integer(), y = list()) - expect_error(unnest(df, x), "list of vectors") + expect_identical(unnest(df, x), df) }) test_that("unnest keeps list cols", {