diff --git a/R/compare.R b/R/compare.R index 7dbaa1bc2..d716cab75 100644 --- a/R/compare.R +++ b/R/compare.R @@ -184,7 +184,7 @@ order_proxy <- function(proxy, direction = "asc", na_value = "largest") { if (vec_size(proxy) == 0L) { return(integer(0L)) } - args <- map(unname(proxy), function(.x) { + args <- map(unstructure(proxy), function(.x) { if (is.data.frame(.x)) { .x <- order(vec_order(.x, direction = direction, na_value = na_value)) } diff --git a/R/compat-purrr.R b/R/compat-purrr.R index f8f362cb8..34c39369c 100644 --- a/R/compat-purrr.R +++ b/R/compat-purrr.R @@ -1,33 +1,28 @@ -# nocov start - compat-purrr (last updated: rlang 0.2.0) - -# This file serves as a reference for compatibility functions for -# purrr. They are not drop-in replacements but allow a similar style -# of programming. This is useful in cases where purrr is too heavy a -# package to depend on. Please find the most recent version in rlang's -# repository. +# nocov start --- compat-purrr-vctrs --- 2020-08-18 map <- function(.x, .f, ...) { - lapply(.x, .f, ...) -} -map_mold <- function(.x, .f, .mold, ...) { - out <- vapply(.x, .f, .mold, ..., USE.NAMES = FALSE) - names(out) <- names(.x) - out + if (is.data.frame(.x)) { + .x <- unclass(.x) + } + vctrs::vec_map(.x, .f, ...) } map_lgl <- function(.x, .f, ...) { - map_mold(.x, .f, logical(1), ...) + map(.x, .f, ..., .ptype = lgl()) } map_int <- function(.x, .f, ...) { - map_mold(.x, .f, integer(1), ...) + map(.x, .f, ..., .ptype = int()) } map_dbl <- function(.x, .f, ...) { - map_mold(.x, .f, double(1), ...) + map(.x, .f, ..., .ptype = dbl()) } map_chr <- function(.x, .f, ...) { - map_mold(.x, .f, character(1), ...) + map(.x, .f, ..., .ptype = chr()) } map_cpl <- function(.x, .f, ...) { - map_mold(.x, .f, complex(1), ...) + map(.x, .f, ..., .ptype = cpl()) +} +map_raw <- function(.x, .f, ...) { + map(.x, .f, ..., .ptype = raw()) } walk <- function(.x, .f, ...) { @@ -53,6 +48,9 @@ pluck_chr <- function(.x, .f) { pluck_cpl <- function(.x, .f) { map_cpl(.x, `[[`, .f) } +pluck_raw <- function(.x, .f) { + map_raw(.x, `[[`, .f) +} map2 <- function(.x, .y, .f, ...) { Map(.f, .x, .y, ...) @@ -93,10 +91,29 @@ pmap <- function(.l, .f, ...) { } probe <- function(.x, .p, ...) { + as_predicate <- function(.fn, ..., .allow_na = FALSE) { + .fn <- as_function(.fn, ...) + + function(...) { + out <- .fn(...) + + if (!is_bool(out)) { + if (is_na(out) && .allow_na) { + # Always return a logical NA + return(NA) + } + abort("Predicate functions must return a single `TRUE` or `FALSE`.") + } + + out + } + } + if (is_logical(.p)) { stopifnot(length(.p) == length(.x)) .p } else { + .p <- as_predicate(.p, ...) map_lgl(.x, .p, ...) } } @@ -108,10 +125,58 @@ discard <- function(.x, .p, ...) { sel <- probe(.x, .p, ...) .x[is.na(sel) | !sel] } -map_if <- function(.x, .p, .f, ...) { - matches <- probe(.x, .p) - .x[matches] <- map(.x[matches], .f, ...) - .x +map_if <- function(.x, .p, .f, ..., .else = NULL) { + sel <- probe(.x, .p) + + out <- rep_along(.x, list(NULL)) + out[sel] <- map(.x[sel], .f, ...) + + if (is_null(.else)) { + out[!sel] <- .x[!sel] + } else { + out[!sel] <- map(.x[!sel], .else, ...) + } + + set_names(out, names(.x)) +} + +map_at <- function(.x, .at, .f, ...) { + at_selection <- function(nm, .at){ + if (is_quosures(.at)){ + if (!is_installed("tidyselect")) { + abort("Using tidyselect in `map_at()` requires tidyselect.") + } + .at <- tidyselect::vars_select(.vars = nm, !!!.at) + } + .at + } + inv_which <- function(x, sel) { + if (is.character(sel)) { + names <- names(x) + if (is.null(names)) { + stop("character indexing requires a named object", call. = FALSE) + } + names %in% sel + } else if (is.numeric(sel)) { + if (any(sel < 0)) { + !seq_along(x) %in% abs(sel) + } else { + seq_along(x) %in% sel + } + + } else { + stop("unrecognised index type", call. = FALSE) + } + } + + where <- at_selection(names(.x), .at) + sel <- inv_which(.x, where) + + out <- rep_along(.x, list(NULL)) + out[sel] <- map(.x[sel], .f, ...) + out[!sel] <- .x[!sel] + + set_names(out, names(.x)) } transpose <- function(.l) { diff --git a/R/map.R b/R/map.R new file mode 100644 index 000000000..73ba1fa64 --- /dev/null +++ b/R/map.R @@ -0,0 +1,6 @@ + +vec_map <- function(.x, .fn, ..., .ptype = list()) { + .elt <- NULL # Defined in the mapping loop + .fn <- as_function(.fn) + .External(vctrs_map, .x, environment(), .ptype) +} diff --git a/R/partial-factor.R b/R/partial-factor.R index 8439ed5a9..c3e95dd44 100644 --- a/R/partial-factor.R +++ b/R/partial-factor.R @@ -50,7 +50,7 @@ new_partial_factor <- function(partial = factor(), learned = factor()) { vec_ptype_full.vctrs_partial_factor <- function(x, ...) { empty <- "" - levels <- map(x, levels) + levels <- map(unclass(x), levels) hashes <- map_chr(levels, hash_label) needs_indent <- hashes != empty diff --git a/src/init.c b/src/init.c index f19421d6d..89d329e60 100644 --- a/src/init.c +++ b/src/init.c @@ -124,6 +124,7 @@ extern SEXP vctrs_cast_dispatch_native(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_fast_c(SEXP, SEXP); extern SEXP vctrs_data_frame(SEXP, SEXP, SEXP); extern SEXP vctrs_df_list(SEXP, SEXP, SEXP); +extern SEXP vctrs_map(SEXP, SEXP, SEXP); // Maturing @@ -292,6 +293,7 @@ static const R_ExternalMethodDef ExtEntries[] = { {"vctrs_c", (DL_FUNC) &vctrs_c, 3}, {"vctrs_new_data_frame", (DL_FUNC) &vctrs_new_data_frame, -1}, {"vctrs_chop2", (DL_FUNC) &vctrs_chop2, 1}, + {"vctrs_map", (DL_FUNC) &vctrs_map, 3}, {NULL, NULL, 0} }; @@ -325,6 +327,7 @@ void vctrs_init_bind(SEXP ns); void vctrs_init_cast(SEXP ns); void vctrs_init_data(SEXP ns); void vctrs_init_dictionary(SEXP ns); +void vctrs_init_map(SEXP ns); void vctrs_init_names(SEXP ns); void vctrs_init_proxy_restore(SEXP ns); void vctrs_init_slice(SEXP ns); @@ -346,6 +349,7 @@ SEXP vctrs_init_library(SEXP ns) { vctrs_init_cast(ns); vctrs_init_data(ns); vctrs_init_dictionary(ns); + vctrs_init_map(ns); vctrs_init_names(ns); vctrs_init_proxy_restore(ns); vctrs_init_slice(ns); diff --git a/src/map.c b/src/map.c new file mode 100644 index 000000000..4b46d65b0 --- /dev/null +++ b/src/map.c @@ -0,0 +1,124 @@ +#include "vctrs.h" +#include "slice.h" +#include "utils.h" + +// Defined at load time +SEXP vec_map_call = NULL; + +static SEXP atomic_map(SEXP x, SEXP env, SEXP ptype); +static SEXP list_map(SEXP x, SEXP env, SEXP ptype); + + +SEXP vctrs_map(SEXP args) { + args = CDR(args); + SEXP x = CAR(args); args = CDR(args); + SEXP env = CAR(args); args = CDR(args); + SEXP ptype = CAR(args); + + if (ptype == R_NilValue) { + r_abort("`.ptype` can't be NULL."); + } + + SEXP orig = x; + bool list_input = vec_is_list(orig); + bool list_output = vec_is_list(ptype); + + // Instead of using `[[` or equivalent to access the elements of + // atomic inputs, we chop them into a list + if (!list_input) { + x = vec_chop2(x); + } + PROTECT(x); + + SEXP out; + if (list_output) { + out = list_map(x, env, ptype); + } else { + out = atomic_map(x, env, ptype); + } + PROTECT(out); + + SEXP names = PROTECT(vec_names(orig)); + if (names != R_NilValue) { + vec_set_names(out, names); + } + + UNPROTECT(3); + return out; +} + +static +SEXP list_map(SEXP x, SEXP env, SEXP ptype) { + // When mapping to a list, we update the input list with the results + // inplace. We first zap the attributes of this list because it's + // cast to the target prototype later on. + SEXP out = PROTECT(r_clone_referenced(x)); + SET_ATTRIB(out, R_NilValue); + SET_OBJECT(out, 0); + + r_ssize n = r_length(x); + const SEXP* p_x = VECTOR_PTR_RO(x); + + for (r_ssize i = 0; i < n; ++i) { + r_env_poke(env, syms_dot_elt, p_x[i]); + SET_VECTOR_ELT(out, i, r_eval_force(vec_map_call, env)); + } + + // Genericity is accomplished by casting the complete list of + // outputs to the target prototype. Should use a ptype identity + // check before casting. Probably `vec_cast()` should make that + // check. + if (OBJECT(ptype)) { + out = vec_cast(out, ptype, NULL, NULL); + } + + UNPROTECT(1); + return out; +} + +static +SEXP atomic_map(SEXP x, SEXP env, SEXP ptype) { + r_ssize n = r_length(x); + + // Genericity is handled in a typical fashion when mapping to an + // atomic vector. We initialise the target prototype and coerce each + // element to that target before assigning. + SEXP out = PROTECT(vec_init(ptype, n)); + + SEXP out_proxy = vec_proxy(out); + PROTECT_INDEX out_proxy_pi; + PROTECT_WITH_INDEX(out_proxy, &out_proxy_pi); + + SEXP loc = PROTECT(compact_seq(0, 0, true)); + int* p_loc = INTEGER(loc); + + const SEXP* p_x = VECTOR_PTR_RO(x); + + for (r_ssize i = 0; i < n; ++i) { + r_env_poke(env, syms_dot_elt, p_x[i]); + + SEXP elt_out = PROTECT(r_eval_force(vec_map_call, env)); + if (vec_size(elt_out) != 1) { + r_abort("Mapped function must return a size 1 vector."); + } + + elt_out = PROTECT(vec_cast(elt_out, ptype, NULL, NULL)); + + init_compact_seq(p_loc, i, 1, true); + out_proxy = vec_proxy_assign(out_proxy, loc, elt_out); + + UNPROTECT(2); + REPROTECT(out_proxy, out_proxy_pi); + } + + out = vec_restore(out_proxy, ptype, R_NilValue, VCTRS_OWNED_true); + + UNPROTECT(3); + return out; +} + + +void vctrs_init_map(SEXP ns) { + vec_map_call = r_parse(".fn(.elt, ...)"); + R_PreserveObject(vec_map_call); +} diff --git a/src/utils.c b/src/utils.c index 73389e512..929f8c2f3 100644 --- a/src/utils.c +++ b/src/utils.c @@ -911,6 +911,7 @@ const void* r_vec_deref_const(SEXP x) { case CPLXSXP: return COMPLEX_RO(x); case STRSXP: return STRING_PTR_RO(x); case RAWSXP: return RAW_RO(x); + case VECSXP: return VECTOR_PTR_RO(x); default: stop_unimplemented_type("r_vec_deref_const", TYPEOF(x)); } } @@ -1751,6 +1752,7 @@ SEXP syms_vctrs_common_class_fallback = NULL; SEXP syms_fallback_class = NULL; SEXP syms_abort = NULL; SEXP syms_message = NULL; +SEXP syms_dot_elt = NULL; SEXP fns_bracket = NULL; SEXP fns_quote = NULL; @@ -2022,6 +2024,7 @@ void vctrs_init_utils(SEXP ns) { syms_fallback_class = Rf_install("fallback_class"); syms_abort = Rf_install("abort"); syms_message = Rf_install("message"); + syms_dot_elt = Rf_install(".elt"); fns_bracket = Rf_findVar(syms_bracket, R_BaseEnv); fns_quote = Rf_findVar(Rf_install("quote"), R_BaseEnv); diff --git a/src/utils.h b/src/utils.h index bee0a128c..898590321 100644 --- a/src/utils.h +++ b/src/utils.h @@ -103,6 +103,14 @@ void stop_unimplemented_type(const char* fn, SEXPTYPE type) { stop_internal(fn, "Unimplemented type `%s`.", Rf_type2char(type)); } +static inline +SEXP r_eval_force(SEXP expr, SEXP env) { +#if R_VERSION >= R_Version(3, 2, 3) + return R_forceAndCall(expr, 1, env); +#else + return Rf_eval(expr, env); +#endif +} SEXP map(SEXP x, SEXP (*fn)(SEXP)); SEXP map_with_data(SEXP x, SEXP (*fn)(SEXP, void*), void* data); @@ -605,6 +613,7 @@ extern SEXP syms_vctrs_common_class_fallback; extern SEXP syms_fallback_class; extern SEXP syms_abort; extern SEXP syms_message; +extern SEXP syms_dot_elt; static const char * const c_strs_vctrs_common_class_fallback = "vctrs:::common_class_fallback"; diff --git a/src/vctrs.h b/src/vctrs.h index fdc5aadd8..c044f83e8 100644 --- a/src/vctrs.h +++ b/src/vctrs.h @@ -637,7 +637,10 @@ void stop_corrupt_ordered_levels(SEXP x, struct vctrs_arg* arg) __attribute__((n # define COMPLEX_RO(x) ((const Rcomplex*) COMPLEX(x)) # define STRING_PTR_RO(x) ((const SEXP*) STRING_PTR(x)) # define RAW_RO(x) ((const Rbyte*) RAW(x)) +# define DATAPTR_RO(x) ((const void*) DATAPTR(x)) #endif +#define VECTOR_PTR_RO(x) ((const SEXP*) DATAPTR_RO(x)) + #endif diff --git a/tests/testthat/test-map.R b/tests/testthat/test-map.R new file mode 100644 index 000000000..5f8278855 --- /dev/null +++ b/tests/testthat/test-map.R @@ -0,0 +1,185 @@ + +test_that("vec_map() handles S3 vectors", { + vctr <- new_vctr(1:3) + rcrd <- new_rcrd(list(x = 1:3, y = 4:6)) + + expect_identical( + vec_map(vctr, identity), + vec_chop(vctr) + ) + expect_identical( + vec_map(rcrd, identity), + vec_chop(rcrd) + ) + + expect_identical( + vec_map(vctr, identity, .ptype = vctr), + vctr + ) + expect_identical( + vec_map(rcrd, identity, .ptype = rcrd), + rcrd + ) +}) + +test_that("vec_map() handles S3 lists", { + local_list_rcrd_methods() + x <- new_list_rcrd(list(1:2, 3:5, 6:9)) + + exp <- list(1:2, 3:4, 6:7) + expect_identical( + vec_map(x, `[`, 1:2), + exp + ) + + expect_identical( + vec_map(x, `[`, 1:2, .ptype = x), + new_list_rcrd(exp) + ) +}) + +test_that("vec_map() requires a non-NULL ptype", { + expect_error(vec_map(1, identity, .ptype = NULL), "can't be NULL") +}) + +test_that("functions must return size 1 vector when mapping to atomic", { + expect_error( + map(1:2, rep, 2, .ptype = int()), + "must return a size 1 vector" + ) +}) + + +# Tests imported from purrr ----------------------------------------------- + +# These tests rely on compat-purrr-vctrs + +test_that("preserves names", { + out <- map(list(x = 1, y = 2), identity) + expect_equal(names(out), c("x", "y")) +}) + +test_that("creates simple call", { + out <- map(1, function(x) sys.call())[[1]] + expect_equal(out, quote(.fn(.elt, ...))) +}) + +test_that("fails on non-vectors", { + expect_error(map(environment(), identity), class = "vctrs_error_scalar_type") + expect_error(map(quote(a), identity), class = "vctrs_error_scalar_type") +}) + +test_that("0 length input gives 0 length output", { + out1 <- map(list(), identity) + expect_equal(out1, list()) + + return("Used to work in purrr") + + out2 <- map(NULL, identity) + expect_equal(out2, list()) +}) + +test_that("map() always returns a list", { + expect_is(map(mtcars, mean), "list") +}) + +test_that("types automatically coerced upwards", { + expect_identical(map_int(c(FALSE, TRUE), identity), c(0L, 1L)) + + expect_identical(map_dbl(c(FALSE, TRUE), identity), c(0, 1)) + expect_identical(map_dbl(c(1L, 2L), identity), c(1, 2)) + + return("Used to work in purrr") + + expect_identical(map_int(as.raw(0:1), identity), 0:1) + expect_identical(map_dbl(as.raw(0:1), identity), c(0, 1)) + expect_identical(map_chr(as.raw(0:255), identity), as.character(as.raw(0:255))) + + expect_identical(map_chr(c(FALSE, TRUE), identity), c("FALSE", "TRUE")) + expect_identical(map_chr(c(1L, 2L), identity), c("1", "2")) + expect_identical(map_chr(c(1.5, 2.5), identity), c("1.500000", "2.500000")) +}) + +test_that("map_raw",{ + expect_equal(map_raw("a", charToRaw), charToRaw("a")) +}) + +test_that("logical and integer NA become correct double NA", { + expect_identical( + map_dbl(list(NA, NA_integer_), identity), + c(NA_real_, NA_real_) + ) +}) + +test_that("map forces arguments in same way as base R", { + f_map <- map(1:2, function(i) function(x) x + i) + f_base <- lapply(1:2, function(i) function(x) x + i) + + expect_equal(f_map[[1]](0), f_base[[1]](0)) + expect_equal(f_map[[2]](0), f_base[[2]](0)) +}) + +test_that("walk is used for side-effects", { + expect_output(walk(1:3, str)) +}) + +test_that("map_if() and map_at() always return a list", { + skip_if_not_installed("tibble") + df <- tibble::tibble(x = 1, y = "a") + expect_identical(map_if(df, is.character, ~"out"), list(x = 1, y = "out")) + expect_identical(map_at(df, 1, ~"out"), list(x = "out", y = "a")) +}) + +test_that("map_at() works with tidyselect", { + skip_if_not_installed("tidyselect") + x <- list(a = "b", b = "c", aa = "bb") + one <- map_at(x, quos(a), toupper) + expect_identical(one$a, "B") + expect_identical(one$aa, "bb") + two <- map_at(x, quos(tidyselect::contains("a")), toupper) + expect_identical(two$a, "B") + expect_identical(two$aa, "BB") +}) + +test_that("negative .at omits locations", { + x <- c(1, 2, 3) + out <- map_at(x, -1, ~ .x * 2) + expect_equal(out, list(1, 4, 6)) +}) + +test_that("map works with calls and pairlists", { + expect_true(TRUE) + return("Used to work in purrr") + + out <- map(quote(f(x)), ~ quote(z)) + expect_equal(out, list(quote(z), quote(z))) + + out <- map(pairlist(1, 2), ~ . + 1) + expect_equal(out, list(2, 3)) +}) + +test_that("primitive dispatch correctly", { + local_bindings(.env = global_env(), + as.character.test_class = function(x) "dispatched!" + ) + x <- structure(list(), class = "test_class") + expect_identical(map(list(x, x), as.character), list("dispatched!", "dispatched!")) +}) + +test_that("map_if requires predicate functions", { + expect_error(map_if(1:3, ~ NA, ~ "foo"), "must return") +}) + +test_that("`.else` maps false elements", { + expect_identical(map_if(-1:1, ~ .x > 0, paste, .else = ~ "bar", "suffix"), list("bar", "bar", "1 suffix")) +}) + +test_that("map() with empty input copies names", { + named_list <- named(list()) + expect_identical( map(named_list, identity), named(list())) + expect_identical(map_lgl(named_list, identity), named(lgl())) + expect_identical(map_int(named_list, identity), named(int())) + expect_identical(map_dbl(named_list, identity), named(dbl())) + expect_identical(map_chr(named_list, identity), named(chr())) + expect_identical(map_raw(named_list, identity), named(raw())) +})