diff --git a/DESCRIPTION b/DESCRIPTION index b9a68d83e..8ffa4cd91 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -15,7 +15,7 @@ License: MIT + file LICENSE URL: https://tune.tidymodels.org/, https://github.com/tidymodels/tune BugReports: https://github.com/tidymodels/tune/issues Depends: - R (>= 4.0) + R (>= 4.1) Imports: cli (>= 3.3.0), dials (>= 1.3.0.9000), @@ -50,7 +50,7 @@ Suggests: kknn, knitr, modeldata, - probably, + probably (>= 1.0.3.9001), scales, spelling, splines2, @@ -58,14 +58,14 @@ Suggests: xgboost, xml2 Remotes: + tidymodels/dials, tidymodels/hardhat, tidymodels/parsnip, + tidymodels/probably, tidymodels/recipes, tidymodels/rsample, tidymodels/tailor, - tidymodels/workflows, - tidymodels/dials, - tidymodels/probably + tidymodels/workflows Config/Needs/website: pkgdown, tidymodels, kknn, doParallel, doFuture, tidyverse/tidytemplate Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index 7b1e69c87..e05522358 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -262,7 +262,6 @@ importFrom(cli,qty) importFrom(dials,encode_unit) importFrom(dials,is_unknown) importFrom(dials,parameters) -importFrom(dplyr,"%>%") importFrom(dplyr,all_of) importFrom(dplyr,anti_join) importFrom(dplyr,arrange) diff --git a/R/0_imports.R b/R/0_imports.R index 21a2c136c..fd75584a9 100644 --- a/R/0_imports.R +++ b/R/0_imports.R @@ -1,4 +1,4 @@ -#' @importFrom dplyr filter select %>% full_join mutate bind_rows case_when vars +#' @importFrom dplyr filter select full_join mutate bind_rows case_when vars #' @importFrom dplyr all_of ungroup slice bind_cols pull sample_n desc anti_join #' @importFrom dplyr distinct arrange rename mutate_if starts_with inner_join #' @importFrom dplyr last diff --git a/R/acquisition.R b/R/acquisition.R index c25c5952c..8a2b7f0b3 100644 --- a/R/acquisition.R +++ b/R/acquisition.R @@ -84,20 +84,20 @@ predict.prob_improve <- } new_data <- - new_data %>% + new_data |> mutate(.sd = ifelse(.sd <= object$eps, object$eps, .sd)) if (maximize) { new_data <- - new_data %>% + new_data |> mutate(delta = ((.mean - best - trade_off) / .sd)) } else { new_data <- - new_data %>% + new_data |> mutate(delta = ((trade_off + best - .mean) / .sd)) } - new_data %>% - dplyr::mutate(objective = pnorm(delta)) %>% + new_data |> + dplyr::mutate(objective = pnorm(delta)) |> dplyr::select(objective) } @@ -137,23 +137,23 @@ predict.exp_improve <- function(object, new_data, maximize, iter, best, ...) { } new_data <- - new_data %>% + new_data |> mutate(sd_trunc = ifelse(.sd <= object$eps, object$eps, .sd)) if (maximize) { - new_data <- new_data %>% mutate(delta = .mean - best - trade_off) + new_data <- new_data |> mutate(delta = .mean - best - trade_off) } else { - new_data <- new_data %>% mutate(delta = trade_off + best - .mean) + new_data <- new_data |> mutate(delta = trade_off + best - .mean) } new_data <- - new_data %>% + new_data |> mutate( snr = delta / sd_trunc, z = ifelse(.sd <= object$eps, 0, snr), objective = (delta * pnorm(z)) + (sd_trunc * dnorm(z)) ) - new_data %>% dplyr::select(objective) + new_data |> dplyr::select(objective) } @@ -188,9 +188,9 @@ predict.conf_bound <- function(object, new_data, maximize, iter, ...) { # `tune` is setup to always maximize the objective function if (maximize) { - new_data <- new_data %>% mutate(objective = .mean + kappa * .sd) + new_data <- new_data |> mutate(objective = .mean + kappa * .sd) } else { - new_data <- new_data %>% mutate(objective = -(.mean + kappa * .sd)) + new_data <- new_data |> mutate(objective = -(.mean + kappa * .sd)) } - new_data %>% dplyr::select(objective) + new_data |> dplyr::select(objective) } diff --git a/R/case_weights.R b/R/case_weights.R index a17147f20..eed483995 100644 --- a/R/case_weights.R +++ b/R/case_weights.R @@ -14,10 +14,10 @@ #' library(parsnip) #' library(dplyr) #' -#' frequency_weights(1:10) %>% +#' frequency_weights(1:10) |> #' .use_case_weights_with_yardstick() #' -#' importance_weights(seq(1, 10, by = .1))%>% +#' importance_weights(seq(1, 10, by = .1))|> #' .use_case_weights_with_yardstick() .use_case_weights_with_yardstick <- function(x) { UseMethod(".use_case_weights_with_yardstick") diff --git a/R/checks.R b/R/checks.R index b7111a340..7a115d8ec 100644 --- a/R/checks.R +++ b/R/checks.R @@ -451,8 +451,8 @@ check_initial <- function(x, param_nms <- .get_tune_parameter_names(x) if (inherits(x, "tune_race")) { num_resamples <- - x %>% - collect_metrics(summarize = FALSE) %>% + x |> + collect_metrics(summarize = FALSE) |> dplyr::count(.config) max_resamples <- max(num_resamples$n) configs <- num_resamples$.config[num_resamples$n == max_resamples] @@ -461,8 +461,8 @@ check_initial <- function(x, x$.order <- NULL } else { num_grid <- - collect_metrics(x) %>% - dplyr::distinct(!!!rlang::syms(param_nms)) %>% + collect_metrics(x) |> + dplyr::distinct(!!!rlang::syms(param_nms)) |> nrow() } if (any(checks == "bayes")) { @@ -472,7 +472,7 @@ check_initial <- function(x, } } if (!any(names(x) == ".iter")) { - x <- x %>% dplyr::mutate(.iter = 0L) + x <- x |> dplyr::mutate(.iter = 0L) } x } diff --git a/R/collect.R b/R/collect.R index b61ca8e99..afaca000d 100644 --- a/R/collect.R +++ b/R/collect.R @@ -103,28 +103,28 @@ #' library(recipes) #' library(tibble) #' -#' lm_mod <- linear_reg() %>% set_engine("lm") +#' lm_mod <- linear_reg() |> set_engine("lm") #' set.seed(93599150) #' car_folds <- vfold_cv(mtcars, v = 2, repeats = 3) #' ctrl <- control_resamples(save_pred = TRUE, extract = extract_fit_engine) #' #' spline_rec <- -#' recipe(mpg ~ ., data = mtcars) %>% +#' recipe(mpg ~ ., data = mtcars) |> #' step_spline_natural(disp, deg_free = tune("df")) #' #' grid <- tibble(df = 3:6) #' #' resampled <- -#' lm_mod %>% +#' lm_mod |> #' tune_grid(spline_rec, resamples = car_folds, control = ctrl, grid = grid) #' -#' collect_predictions(resampled) %>% arrange(.row) -#' collect_predictions(resampled, summarize = TRUE) %>% arrange(.row) +#' collect_predictions(resampled) |> arrange(.row) +#' collect_predictions(resampled, summarize = TRUE) |> arrange(.row) #' collect_predictions( #' resampled, #' summarize = TRUE, #' parameters = grid[1, ] -#' ) %>% arrange(.row) +#' ) |> arrange(.row) #' #' collect_extracts(resampled) #' @@ -182,7 +182,7 @@ filter_predictions <- function(x, parameters) { params <- attr(x, "parameters") if (is.null(params)) { cli::cli_warn( - "The object is missing some attributes; it is probably from an earlier + "The object is missing some attributes; it is probably from an earlier version of {.pkg tune}. The predictions can't be filtered." ) @@ -214,8 +214,8 @@ numeric_summarize <- function(x) { nms <- names(x) group_cols <- nms[!(nms %in% pred_cols)] x <- - x %>% - dplyr::group_by(!!!rlang::syms(group_cols)) %>% + x |> + dplyr::group_by(!!!rlang::syms(group_cols)) |> dplyr::summarise( dplyr::across(dplyr::starts_with(".pred"), mean_na_rm) ) @@ -239,34 +239,34 @@ prob_summarize <- function(x, p) { group_cols <- nms[!(nms %in% pred_cols)] x <- - x %>% - dplyr::group_by(!!!rlang::syms(group_cols)) %>% + x |> + dplyr::group_by(!!!rlang::syms(group_cols)) |> dplyr::summarise( dplyr::across(dplyr::starts_with(".pred_"), mean_na_rm) - ) %>% + ) |> ungroup() # In case the class probabilities do not add up to 1 after averaging group_cols <- group_cols[group_cols != y_cols] totals <- - x %>% - dplyr::select(-dplyr::all_of(y_cols)) %>% + x |> + dplyr::select(-dplyr::all_of(y_cols)) |> tidyr::pivot_longer( cols = c(dplyr::all_of(pred_cols)), names_to = ".column", values_to = ".value" - ) %>% - dplyr::group_by(!!!rlang::syms(group_cols)) %>% - dplyr::summarize(.totals = sum(.value)) %>% + ) |> + dplyr::group_by(!!!rlang::syms(group_cols)) |> + dplyr::summarize(.totals = sum(.value)) |> dplyr::ungroup() x <- - x %>% - dplyr::full_join(totals, by = group_cols) %>% + x |> + dplyr::full_join(totals, by = group_cols) |> dplyr::mutate( dplyr::across(dplyr::starts_with(".pred_"), ~ .x / .totals - )) %>% + )) |> dplyr::select(-.totals) # If we started with hard class predictions, recompute them based on the @@ -275,21 +275,21 @@ prob_summarize <- function(x, p) { lvl <- levels(x[[y_cols]]) ord <- is.ordered(x[[y_cols]]) class_pred <- - x %>% - dplyr::select(-dplyr::all_of(y_cols)) %>% + x |> + dplyr::select(-dplyr::all_of(y_cols)) |> tidyr::pivot_longer( cols = c(dplyr::all_of(pred_cols)), names_to = ".column", values_to = ".value" - ) %>% - dplyr::group_by(!!!rlang::syms(group_cols)) %>% - dplyr::arrange(dplyr::desc(.value), .by_group = TRUE) %>% - dplyr::slice(1) %>% + ) |> + dplyr::group_by(!!!rlang::syms(group_cols)) |> + dplyr::arrange(dplyr::desc(.value), .by_group = TRUE) |> + dplyr::slice(1) |> dplyr::mutate( .pred_class = gsub("\\.pred_", "", .column), .pred_class = factor(.pred_class, levels = lvl, ordered = ord) - ) %>% - dplyr::ungroup() %>% + ) |> + dplyr::ungroup() |> dplyr::select(-.value, -.column) x <- full_join(x, class_pred, by = group_cols) } @@ -308,12 +308,12 @@ class_summarize <- function(x, p) { group_cols <- nms[!(nms %in% pred_cols)] outcome_col <- group_cols[!(group_cols %in% c(p, ".row", ".iter", ".config"))] x <- - x %>% - dplyr::group_by(!!!rlang::syms(group_cols)) %>% - dplyr::count(.pred_class) %>% - dplyr::arrange(dplyr::desc(n), .by_group = TRUE) %>% - dplyr::slice(1) %>% - dplyr::ungroup() %>% + x |> + dplyr::group_by(!!!rlang::syms(group_cols)) |> + dplyr::count(.pred_class) |> + dplyr::arrange(dplyr::desc(n), .by_group = TRUE) |> + dplyr::slice(1) |> + dplyr::ungroup() |> dplyr::select(-n) x } @@ -322,7 +322,7 @@ surv_summarize <- function(x, param, y) { pred_cols <- grep("^\\.pred", names(x), value = TRUE) nms <- names(x) - outcomes <- x[, c(".row", y)] %>% dplyr::slice(1, .by = .row) + outcomes <- x[, c(".row", y)] |> dplyr::slice(1, .by = .row) res <- NULL @@ -340,13 +340,13 @@ surv_summarize <- function(x, param, y) { if (any(pred_cols == ".pred")) { nest_cols <- c(".eval_time", ".pred_survival", ".weight_censored") tmp <- - x %>% + x |> dplyr::select(.pred, .config, .row, dplyr::any_of(param), - dplyr::any_of(".iter")) %>% - tidyr::unnest(.pred) %>% + dplyr::any_of(".iter")) |> + tidyr::unnest(.pred) |> dplyr::summarize( .pred_survival = mean(.pred_survival, na.rm = TRUE), .weight_censored = mean(.weight_censored, na.rm = TRUE), @@ -357,7 +357,7 @@ surv_summarize <- function(x, param, y) { dplyr::any_of(param), dplyr::any_of(".iter") ) - ) %>% + ) |> tidyr::nest( .pred = c(dplyr::all_of(nest_cols)), .by = c(.row, .config, @@ -393,8 +393,8 @@ average_predictions <- function(x, grid = NULL) { } x <- - x %>% - collect_predictions() %>% + x |> + collect_predictions() |> dplyr::select(-starts_with("id")) if (all(metric_types == "numeric")) { @@ -588,11 +588,11 @@ estimate_tune_results <- function(x, ..., col_name = ".metrics") { x <- dplyr::distinct(x) } - x <- x %>% - tibble::as_tibble() %>% - vctrs::vec_slice(., .$id != "Apparent") %>% + x <- x |> tibble::as_tibble() + + x <- vctrs::vec_slice(x, x$id != "Apparent") |> dplyr::group_by(!!!rlang::syms(param_names), .metric, .estimator, - !!!rlang::syms(group_cols)) %>% + !!!rlang::syms(group_cols)) |> dplyr::summarize( mean = mean(.estimate, na.rm = TRUE), n = sum(!is.na(.estimate)), @@ -602,10 +602,10 @@ estimate_tune_results <- function(x, ..., col_name = ".metrics") { # only join when parameters are being tuned (#600) if (length(param_names) == 0) { - x <- x %>% + x <- x |> dplyr::bind_cols(config_key) } else { - x <- x %>% + x <- x |> dplyr::full_join(config_key, by = param_names) } @@ -635,8 +635,8 @@ collect_notes.tune_results <- function(x, ...) { return(x$.notes[[1]]) } - x %>% - dplyr::select(dplyr::starts_with("id"), dplyr::any_of(".iter"), .notes) %>% + x |> + dplyr::select(dplyr::starts_with("id"), dplyr::any_of(".iter"), .notes) |> tidyr::unnest(cols = .notes) } @@ -666,8 +666,8 @@ collect_extracts.tune_results <- function(x, ...) { )) } - x %>% - dplyr::select(dplyr::starts_with("id"), dplyr::any_of(".iter"), .extracts) %>% + x |> + dplyr::select(dplyr::starts_with("id"), dplyr::any_of(".iter"), .extracts) |> tidyr::unnest(cols = .extracts) } diff --git a/R/conf_mat_resampled.R b/R/conf_mat_resampled.R index f6e9fc5ff..2ce7d2ecf 100644 --- a/R/conf_mat_resampled.R +++ b/R/conf_mat_resampled.R @@ -22,8 +22,8 @@ #' #' set.seed(2393) #' res <- -#' logistic_reg() %>% -#' set_engine("glm") %>% +#' logistic_reg() |> +#' set_engine("glm") |> #' fit_resamples( #' Class ~ ., #' resamples = vfold_cv(two_class_dat, v = 3), @@ -56,8 +56,8 @@ conf_mat_resampled <- function(x, ..., parameters = NULL, tidy = TRUE) { params <- .get_tune_parameter_names(x) if (length(params) > 0) { param_combos <- - preds %>% - dplyr::select(!!!params) %>% + preds |> + dplyr::select(!!!params) |> distinct() if (nrow(param_combos) > 1) { cli::cli_abort( @@ -75,8 +75,8 @@ conf_mat_resampled <- function(x, ..., parameters = NULL, tidy = TRUE) { id_cols <- grep("(^id$)|($id[1-9]$)", names(preds), value = TRUE) preds <- - preds %>% - dplyr::group_nest(!!!syms(id_cols)) %>% + preds |> + dplyr::group_nest(!!!syms(id_cols)) |> dplyr::mutate( conf_mats = purrr::map(data, ~ yardstick::conf_mat(.x, truth = {{ truth }}, estimate = .pred_class)) @@ -86,10 +86,10 @@ conf_mat_resampled <- function(x, ..., parameters = NULL, tidy = TRUE) { options(dplyr.summarise.inform = FALSE) res <- - purrr::map(preds$conf_mats, ~ as.data.frame(.x$table)) %>% - purrr::list_rbind() %>% - dplyr::group_by(Prediction, Truth) %>% - dplyr::summarize(Freq = mean(Freq, na.rm = TRUE)) %>% + purrr::map(preds$conf_mats, ~ as.data.frame(.x$table)) |> + purrr::list_rbind() |> + dplyr::group_by(Prediction, Truth) |> + dplyr::summarize(Freq = mean(Freq, na.rm = TRUE)) |> dplyr::ungroup() options(dplyr.summarise.inform = opt) @@ -99,7 +99,7 @@ conf_mat_resampled <- function(x, ..., parameters = NULL, tidy = TRUE) { res <- matrix(res$Freq, ncol = length(lvls), byrow = TRUE) colnames(res) <- lvls rownames(res) <- lvls - res <- as.table(res) %>% yardstick::conf_mat() + res <- as.table(res) |> yardstick::conf_mat() } res } diff --git a/R/data.R b/R/data.R index 24d3d3992..509db6efa 100644 --- a/R/data.R +++ b/R/data.R @@ -27,13 +27,13 @@ #' # ------------------------------------------------------------------------------ #' #' ames_rec <- -#' recipe(Sale_Price ~ ., data = ames_train) %>% -#' step_log(Sale_Price, base = 10) %>% -#' step_YeoJohnson(Lot_Area, Gr_Liv_Area) %>% -#' step_other(Neighborhood, threshold = .1) %>% -#' step_dummy(all_nominal()) %>% -#' step_zv(all_predictors()) %>% -#' step_spline_natural(Longitude, deg_free = tune("lon")) %>% +#' recipe(Sale_Price ~ ., data = ames_train) |> +#' step_log(Sale_Price, base = 10) |> +#' step_YeoJohnson(Lot_Area, Gr_Liv_Area) |> +#' step_other(Neighborhood, threshold = .1) |> +#' step_dummy(all_nominal()) |> +#' step_zv(all_predictors()) |> +#' step_spline_natural(Longitude, deg_free = tune("lon")) |> #' step_spline_natural(Latitude, deg_free = tune("lat")) #' #' knn_model <- @@ -42,21 +42,21 @@ #' neighbors = tune("K"), #' weight_func = tune(), #' dist_power = tune() -#' ) %>% +#' ) |> #' set_engine("kknn") #' #' ames_wflow <- -#' workflow() %>% -#' add_recipe(ames_rec) %>% +#' workflow() |> +#' add_recipe(ames_rec) |> #' add_model(knn_model) #' #' ames_set <- -#' extract_parameter_set_dials(ames_wflow) %>% +#' extract_parameter_set_dials(ames_wflow) |> #' update(K = neighbors(c(1, 50))) #' #' set.seed(7014) #' ames_grid <- -#' ames_set %>% +#' ames_set |> #' grid_max_entropy(size = 10) #' #' ames_grid_search <- diff --git a/R/expo_decay.R b/R/expo_decay.R index cf5b52f77..caabf3070 100644 --- a/R/expo_decay.R +++ b/R/expo_decay.R @@ -28,7 +28,7 @@ #' limit_val = 0, #' slope = 1 / 5 #' ) -#' ) %>% +#' ) |> #' ggplot(aes(x = iter, y = value)) + #' geom_path() #' @export diff --git a/R/extract.R b/R/extract.R index 8c6e51c2e..6f9c255f5 100644 --- a/R/extract.R +++ b/R/extract.R @@ -55,10 +55,10 @@ #' set.seed(6735) #' tr_te_split <- initial_split(mtcars) #' -#' spline_rec <- recipe(mpg ~ ., data = mtcars) %>% +#' spline_rec <- recipe(mpg ~ ., data = mtcars) |> #' step_spline_natural(disp) #' -#' lin_mod <- linear_reg() %>% +#' lin_mod <- linear_reg() |> #' set_engine("lm") #' #' spline_res <- last_fit(lin_mod, spline_rec, split = tr_te_split) diff --git a/R/filter_parameters.R b/R/filter_parameters.R index 0658c068a..c0ae1152e 100644 --- a/R/filter_parameters.R +++ b/R/filter_parameters.R @@ -28,25 +28,25 @@ #' ## ----------------------------------------------------------------------------- #' # select all combinations using the 'rank' weighting scheme #' -#' ames_grid_search %>% +#' ames_grid_search |> #' collect_metrics() #' -#' filter_parameters(ames_grid_search, weight_func == "rank") %>% +#' filter_parameters(ames_grid_search, weight_func == "rank") |> #' collect_metrics() #' #' rank_only <- tibble::tibble(weight_func = "rank") -#' filter_parameters(ames_grid_search, parameters = rank_only) %>% +#' filter_parameters(ames_grid_search, parameters = rank_only) |> #' collect_metrics() #' #' ## ----------------------------------------------------------------------------- #' # Keep only the results from the numerically best combination #' -#' ames_iter_search %>% +#' ames_iter_search |> #' collect_metrics() #' #' best_param <- select_best(ames_iter_search, metric = "rmse") -#' ames_iter_search %>% -#' filter_parameters(parameters = best_param) %>% +#' ames_iter_search |> +#' filter_parameters(parameters = best_param) |> #' collect_metrics() #' @details #' Removing some parameter combinations might affect the results of `autoplot()` diff --git a/R/finalize.R b/R/finalize.R index 64ddd7033..84c48ba25 100644 --- a/R/finalize.R +++ b/R/finalize.R @@ -21,7 +21,7 @@ #' neighbors = tune("K"), #' weight_func = tune(), #' dist_power = tune() -#' ) %>% +#' ) |> #' set_engine("kknn") #' #' lowest_rmse <- select_best(ames_grid_search, metric = "rmse") @@ -60,7 +60,7 @@ finalize_recipe <- function(x, parameters) { } check_final_param(parameters) pset <- - hardhat::extract_parameter_set_dials(x) %>% + hardhat::extract_parameter_set_dials(x) |> dplyr::filter(id %in% names(parameters) & source == "recipe") if (tibble::is_tibble(parameters)) { @@ -113,7 +113,7 @@ finalize_tailor <- function(x, parameters) { } check_final_param(parameters) pset <- - hardhat::extract_parameter_set_dials(x) %>% + hardhat::extract_parameter_set_dials(x) |> dplyr::filter(id %in% names(parameters) & source == "tailor") if (tibble::is_tibble(parameters)) { diff --git a/R/fit_best.R b/R/fit_best.R index d91b73802..5b7947176 100644 --- a/R/fit_best.R +++ b/R/fit_best.R @@ -44,7 +44,7 @@ #' library(dplyr) #' #' data(meats, package = "modeldata") -#' meats <- meats %>% select(-water, -fat) +#' meats <- meats |> select(-water, -fat) #' #' set.seed(1) #' meat_split <- initial_split(meats) @@ -55,11 +55,11 @@ #' meat_rs <- vfold_cv(meat_train, v = 10) #' #' pca_rec <- -#' recipe(protein ~ ., data = meat_train) %>% -#' step_normalize(all_numeric_predictors()) %>% +#' recipe(protein ~ ., data = meat_train) |> +#' step_normalize(all_numeric_predictors()) |> #' step_pca(all_numeric_predictors(), num_comp = tune()) #' -#' knn_mod <- nearest_neighbor(neighbors = tune()) %>% set_mode("regression") +#' knn_mod <- nearest_neighbor(neighbors = tune()) |> set_mode("regression") #' #' ctrl <- control_grid(save_workflow = TRUE) #' diff --git a/R/grid_helpers.R b/R/grid_helpers.R index 3f6ad66d2..2d55020c5 100644 --- a/R/grid_helpers.R +++ b/R/grid_helpers.R @@ -195,8 +195,8 @@ forge_from_workflow <- function(new_data, workflow) { make_submod_arg <- function(grid, model, submodels) { # Assumes only one submodel parameter per model real_name <- - parsnip::get_from_env(paste(class(model$spec)[1], "args", sep = "_")) %>% - dplyr::filter(has_submodel & engine == model$spec$engine) %>% + parsnip::get_from_env(paste(class(model$spec)[1], "args", sep = "_")) |> + dplyr::filter(has_submodel & engine == model$spec$engine) |> dplyr::pull(parsnip) names(submodels) <- real_name submodels @@ -205,8 +205,8 @@ make_submod_arg <- function(grid, model, submodels) { make_rename_arg <- function(grid, model, submodels) { # Assumes only one submodel parameter per model real_name <- - parsnip::get_from_env(paste(class(model$spec)[1], "args", sep = "_")) %>% - dplyr::filter(has_submodel & engine == model$spec$engine) %>% + parsnip::get_from_env(paste(class(model$spec)[1], "args", sep = "_")) |> + dplyr::filter(has_submodel & engine == model$spec$engine) |> dplyr::pull(parsnip) res <- list(real_name) names(res) <- names(submodels) @@ -323,14 +323,14 @@ compute_grid_info <- function(workflow, grid) { # define a loop over any preprocessing tuning parameter combinations. if (any_parameters_preprocessor) { pp_df <- - dplyr::distinct(res, !!!syms_pre) %>% - dplyr::arrange(!!!syms_pre) %>% + dplyr::distinct(res, !!!syms_pre) |> + dplyr::arrange(!!!syms_pre) |> dplyr::mutate( .iter_preprocessor = dplyr::row_number(), .lab_pre = recipes::names0(max(dplyr::n()), "Preprocessor") ) res <- - dplyr::full_join(res, pp_df, by = parameters_preprocessor$id) %>% + dplyr::full_join(res, pp_df, by = parameters_preprocessor$id) |> dplyr::arrange(.iter_preprocessor) } else { res$.iter_preprocessor <- 1L @@ -350,16 +350,16 @@ compute_grid_info <- function(workflow, grid) { # (if any) res <- - res %>% - dplyr::group_nest(.iter_preprocessor, keep = TRUE) %>% + res |> + dplyr::group_nest(.iter_preprocessor, keep = TRUE) |> dplyr::mutate( .iter_config = purrr::map(data, make_iter_config), .model = purrr::map(data, ~ tibble::tibble(.iter_model = seq_len(nrow(.x)))), .num_models = purrr::map_int(.model, nrow) - ) %>% - dplyr::select(-.iter_preprocessor) %>% - tidyr::unnest(cols = c(data, .model, .iter_config)) %>% - dplyr::select(-.lab_pre) %>% + ) |> + dplyr::select(-.iter_preprocessor) |> + tidyr::unnest(cols = c(data, .model, .iter_config)) |> + dplyr::select(-.lab_pre) |> dplyr::relocate(dplyr::starts_with(".iter")) res$.msg_model <- @@ -367,8 +367,8 @@ compute_grid_info <- function(workflow, grid) { n = res$.num_models, res$.msg_preprocessor) - res %>% - dplyr::select(-.num_models) %>% + res |> + dplyr::select(-.num_models) |> dplyr::relocate(dplyr::starts_with(".msg")) } diff --git a/R/grid_performance.R b/R/grid_performance.R index fcfa024ad..6b0354f37 100644 --- a/R/grid_performance.R +++ b/R/grid_performance.R @@ -100,8 +100,8 @@ metrics_info <- function(x) { } estimate_reg <- function(dat, metric, param_names, outcome_name, case_weights) { - dat %>% - dplyr::group_by(!!!rlang::syms(param_names)) %>% + dat |> + dplyr::group_by(!!!rlang::syms(param_names)) |> metric(estimate = .pred, truth = !!sym(outcome_name), case_weights = !!case_weights) } @@ -133,8 +133,8 @@ estimate_class_prob <- function(dat, metric, param_names, outcome_name, } } - dat %>% - dplyr::group_by(!!!rlang::syms(param_names)) %>% + dat |> + dplyr::group_by(!!!rlang::syms(param_names)) |> metric( truth = !!truth, estimate = !!estimate, @@ -149,8 +149,8 @@ estimate_class_prob <- function(dat, metric, param_names, outcome_name, estimate_surv <- function(dat, metric, param_names, outcome_name, case_weights, types) { # potentially need to work around submodel parameters since those are within .pred dat <- unnest_parameters(dat, param_names) - dat %>% - dplyr::group_by(!!!rlang::syms(param_names)) %>% + dat |> + dplyr::group_by(!!!rlang::syms(param_names)) |> metric( truth = !!rlang::sym(outcome_name), estimate = !!maybe_estimate(metric), @@ -181,18 +181,18 @@ unnest_parameters <- function(x, params = NULL) { return(x) } - x <- x %>% parsnip::add_rowindex() + x <- x |> parsnip::add_rowindex() - others <- x %>% dplyr::select(-.pred) + others <- x |> dplyr::select(-.pred) rm_cols <- c(".pred_censored", ".weight_time") nest_cols <- c(".eval_time", ".pred_survival", ".weight_censored") x <- - x %>% - dplyr::select(.pred, .row) %>% - tidyr::unnest(cols = c(.pred)) %>% - dplyr::select(-dplyr::any_of(rm_cols)) %>% - tidyr::nest(.pred = c(all_of(nest_cols)), .by = c(.row, dplyr::all_of(params))) %>% - dplyr::full_join(others, by = ".row") %>% + x |> + dplyr::select(.pred, .row) |> + tidyr::unnest(cols = c(.pred)) |> + dplyr::select(-dplyr::any_of(rm_cols)) |> + tidyr::nest(.pred = c(all_of(nest_cols)), .by = c(.row, dplyr::all_of(params))) |> + dplyr::full_join(others, by = ".row") |> dplyr::select(-.row) x } diff --git a/R/int_pctl.R b/R/int_pctl.R index 015e82b20..49a95a7bd 100644 --- a/R/int_pctl.R +++ b/R/int_pctl.R @@ -56,7 +56,7 @@ #' sac_rs <- vfold_cv(Sacramento) #' #' lm_res <- -#' linear_reg() %>% +#' linear_reg() |> #' fit_resamples( #' log10(price) ~ beds + baths + sqft + type + latitude + longitude, #' resamples = sac_rs, @@ -116,8 +116,8 @@ int_pctl.tune_results <- function(.data, metrics = NULL, eval_time = NULL, config_keys, sample.int(10000, p), ~ boostrap_metrics_by_config(.x, .y, .data, metrics, times, allow_par, event_level, alpha, metrics_info) - ) %>% - purrr::list_rbind() %>% + ) |> + purrr::list_rbind() |> dplyr::arrange(.config, .metric) dplyr::as_tibble(res) } @@ -223,11 +223,11 @@ int_pctl_surv <- function(x, allow_par, alpha) { merge_keys <- c("term", grep("^\\.", names(res), value = TRUE)) merge_keys <- intersect(merge_keys, names(met_key)) - res <- res %>% - dplyr::full_join(met_key, by = merge_keys) %>% - dplyr::arrange(order) %>% - dplyr::select(-term, -order) %>% - dplyr::rename(term = old_term) %>% + res <- res |> + dplyr::full_join(met_key, by = merge_keys) |> + dplyr::arrange(order) |> + dplyr::select(-term, -order) |> + dplyr::rename(term = old_term) |> dplyr::relocate(term, dplyr::any_of(".eval_time")) } # nocov end @@ -238,7 +238,7 @@ get_configs <- function(x, parameters = NULL, as_list = TRUE) { param <- .get_tune_parameter_names(x) config_cols <- c(".config", ".iter", param) config_keys <- - collect_metrics(x, summarize = FALSE) %>% + collect_metrics(x, summarize = FALSE) |> dplyr::distinct(dplyr::pick(dplyr::any_of(config_cols))) if (!is.null(parameters)) { merge_cols <- intersect(names(config_keys), names(parameters)) diff --git a/R/last_fit.R b/R/last_fit.R index d080b7528..6f4c828d8 100644 --- a/R/last_fit.R +++ b/R/last_fit.R @@ -67,10 +67,10 @@ #' set.seed(6735) #' tr_te_split <- initial_split(mtcars) #' -#' spline_rec <- recipe(mpg ~ ., data = mtcars) %>% +#' spline_rec <- recipe(mpg ~ ., data = mtcars) |> #' step_spline_natural(disp) #' -#' lin_mod <- linear_reg() %>% +#' lin_mod <- linear_reg() |> #' set_engine("lm") #' #' spline_res <- last_fit(lin_mod, spline_rec, split = tr_te_split) @@ -86,8 +86,8 @@ #' #' library(workflows) #' spline_wfl <- -#' workflow() %>% -#' add_recipe(spline_rec) %>% +#' workflow() |> +#' add_recipe(spline_rec) |> #' add_model(lin_mod) #' #' last_fit(spline_wfl, split = tr_te_split) diff --git a/R/logging.R b/R/logging.R index 448a90b31..c0e54448a 100644 --- a/R/logging.R +++ b/R/logging.R @@ -171,7 +171,7 @@ log_catalog <- function(msg, type) { tune_catalog <- function(issues) { catalog <- rlang::env_get(env = tune_env, nm = "progress_catalog") - res <- dplyr::count(issues, type, note) %>% mutate(id = NA_integer_) + res <- dplyr::count(issues, type, note) |> mutate(id = NA_integer_) res <- dplyr::bind_rows(res, catalog) res <- dplyr::group_by(res, type, note) # dplyr::first will gain an `na_rm` argument in 1.1.0 diff --git a/R/merge.R b/R/merge.R index 3b27b03e4..12d991dd0 100644 --- a/R/merge.R +++ b/R/merge.R @@ -20,8 +20,8 @@ #' library(dials) #' #' pca_rec <- -#' recipe(mpg ~ ., data = mtcars) %>% -#' step_impute_knn(all_predictors(), neighbors = tune()) %>% +#' recipe(mpg ~ ., data = mtcars) |> +#' step_impute_knn(all_predictors(), neighbors = tune()) |> #' step_pca(all_predictors(), num_comp = tune()) #' #' pca_grid <- @@ -36,8 +36,8 @@ #' merge(pca_rec, pca_grid) #' #' spline_rec <- -#' recipe(mpg ~ ., data = mtcars) %>% -#' step_spline_natural(disp, deg_free = tune("disp df")) %>% +#' recipe(mpg ~ ., data = mtcars) |> +#' step_spline_natural(disp, deg_free = tune("disp df")) |> #' step_spline_natural(wt, deg_free = tune("wt df")) #' #' spline_grid <- @@ -54,13 +54,13 @@ #' data(hpc_data, package = "modeldata") #' #' xgb_mod <- -#' boost_tree(trees = tune(), min_n = tune()) %>% +#' boost_tree(trees = tune(), min_n = tune()) |> #' set_engine("xgboost") #' #' set.seed(254) #' xgb_grid <- -#' extract_parameter_set_dials(xgb_mod) %>% -#' finalize(hpc_data) %>% +#' extract_parameter_set_dials(xgb_mod) |> +#' finalize(hpc_data) |> #' grid_max_entropy(size = 3) #' #' merge(xgb_mod, xgb_grid) @@ -77,7 +77,7 @@ merge.model_spec <- function(x, y, ...) { update_model <- function(grid, object, pset, step_id, nms, ...) { for (i in nms) { - param_info <- pset %>% dplyr::filter(id == i & source == "model_spec") + param_info <- pset |> dplyr::filter(id == i & source == "model_spec") if (nrow(param_info) > 1) { cli::cli_abort("Cannot update; there are too many parameters.") } @@ -96,7 +96,7 @@ update_model <- function(grid, object, pset, step_id, nms, ...) { update_recipe <- function(grid, object, pset, step_id, nms, ...) { for (i in nms) { - param_info <- pset %>% dplyr::filter(id == i & source == "recipe") + param_info <- pset |> dplyr::filter(id == i & source == "recipe") if (nrow(param_info) == 1) { idx <- which(step_id == param_info$component_id) # check index @@ -137,10 +137,10 @@ merger <- function(x, y, ...) { return(res) } - y %>% + y |> dplyr::mutate( ..object = purrr::map(1:nrow(y), ~ updater(y[.x,], x, pset, step_ids, grid_name)) - ) %>% + ) |> dplyr::select(x = ..object) } diff --git a/R/metric-selection.R b/R/metric-selection.R index 09a54b297..e8bb8dec3 100644 --- a/R/metric-selection.R +++ b/R/metric-selection.R @@ -396,7 +396,7 @@ check_autoplot_eval_times <- function(x, metric, eval_time, call) { } # But there could be NA eval times for non-dynamic metrics - met <- estimate_tune_results(x) %>% dplyr::filter(.metric %in% metric) + met <- estimate_tune_results(x) |> dplyr::filter(.metric %in% metric) if (any(names(met) == ".eval_time")) { if (any(is.na(met$.eval_time))) { diff --git a/R/min_grid.R b/R/min_grid.R index 4e8ef71ab..77b29a307 100644 --- a/R/min_grid.R +++ b/R/min_grid.R @@ -51,13 +51,13 @@ #' ## No ability to exploit submodels: #' #' svm_spec <- -#' svm_poly(cost = tune(), degree = tune()) %>% -#' set_engine("kernlab") %>% +#' svm_poly(cost = tune(), degree = tune()) |> +#' set_engine("kernlab") |> #' set_mode("regression") #' #' svm_grid <- -#' svm_spec %>% -#' extract_parameter_set_dials() %>% +#' svm_spec |> +#' extract_parameter_set_dials() |> #' grid_regular(levels = 3) #' #' min_grid(svm_spec, svm_grid) @@ -66,13 +66,13 @@ #' ## Can use submodels #' #' xgb_spec <- -#' boost_tree(trees = tune(), min_n = tune()) %>% -#' set_engine("xgboost") %>% +#' boost_tree(trees = tune(), min_n = tune()) |> +#' set_engine("xgboost") |> #' set_mode("regression") #' #' xgb_grid <- -#' xgb_spec %>% -#' extract_parameter_set_dials() %>% +#' xgb_spec |> +#' extract_parameter_set_dials() |> #' grid_regular(levels = 3) #' #' min_grid(xgb_spec, xgb_grid) @@ -90,8 +90,8 @@ min_grid.model_spec <- function(x, grid, ...) { # Template for model results that do no have the sub-model feature blank_submodels <- function(grid) { - grid %>% - dplyr::mutate(.submodels = purrr::map(1:nrow(grid), ~ list())) %>% + grid |> + dplyr::mutate(.submodels = purrr::map(1:nrow(grid), ~ list())) |> dplyr::mutate_if(is.factor, as.character) } @@ -105,14 +105,14 @@ get_submodel_info <- function(spec) { cli::cli_abort("Please set the model's engine.") } param_info <- - get_from_env(paste0(class(spec)[1], "_args")) %>% - dplyr::filter(engine == spec$engine) %>% - dplyr::select(name = parsnip, has_submodel) %>% + get_from_env(paste0(class(spec)[1], "_args")) |> + dplyr::filter(engine == spec$engine) |> + dplyr::select(name = parsnip, has_submodel) |> dplyr::full_join( - hardhat::extract_parameter_set_dials(spec) %>% dplyr::select(name, id), + hardhat::extract_parameter_set_dials(spec) |> dplyr::select(name, id), by = "name" - ) %>% - dplyr::mutate(id = ifelse(is.na(id), name, id)) %>% + ) |> + dplyr::mutate(id = ifelse(is.na(id), name, id)) |> # In case the parameter is an engine parameter dplyr::mutate(has_submodel = ifelse(is.na(has_submodel), FALSE, has_submodel)) @@ -142,12 +142,12 @@ submod_and_others <- function(grid, fixed_args) { subm_nm <- orig_names[!(orig_names %in% fixed_args)] # avoid more rlangedness related to names until end: - grid <- grid %>% dplyr::rename(..val = !!subm_nm) + grid <- grid |> dplyr::rename(..val = !!subm_nm) fit_only <- - grid %>% - dplyr::group_by(!!!rlang::syms(fixed_args)) %>% - dplyr::summarize(max_val = max(..val, na.rm = TRUE)) %>% + grid |> + dplyr::group_by(!!!rlang::syms(fixed_args)) |> + dplyr::summarize(max_val = max(..val, na.rm = TRUE)) |> dplyr::ungroup() if (utils::packageVersion("dplyr") >= "1.0.99.9000") { @@ -158,21 +158,21 @@ submod_and_others <- function(grid, fixed_args) { dplyr::full_join(fit_only, grid, by = fixed_args) } - min_grid_df <- min_grid_df %>% - dplyr::filter(..val != max_val) %>% - dplyr::group_by(!!!rlang::syms(fixed_args)) %>% - dplyr::summarize(.submodels = list(tibble::lst(!!subm_nm := ..val))) %>% + min_grid_df <- min_grid_df |> + dplyr::filter(..val != max_val) |> + dplyr::group_by(!!!rlang::syms(fixed_args)) |> + dplyr::summarize(.submodels = list(tibble::lst(!!subm_nm := ..val))) |> dplyr::ungroup() if (utils::packageVersion("dplyr") >= "1.0.99.9000") { - min_grid_df <- min_grid_df %>% + min_grid_df <- min_grid_df |> dplyr::full_join(fit_only, by = fixed_args, multiple = "all") } else { - min_grid_df <- min_grid_df %>% + min_grid_df <- min_grid_df |> dplyr::full_join(fit_only, by = fixed_args) } - min_grid_df <- min_grid_df %>% + min_grid_df <- min_grid_df |> dplyr::rename(!!subm_nm := max_val) min_grid_df$.submodels <- @@ -182,7 +182,7 @@ submod_and_others <- function(grid, fixed_args) { purrr::map(1:nrow(min_grid_df), ~ list()) ) - dplyr::select(min_grid_df, dplyr::all_of(orig_names), .submodels) %>% + dplyr::select(min_grid_df, dplyr::all_of(orig_names), .submodels) |> dplyr::mutate_if(is.factor, as.character) } diff --git a/R/outcome-names.R b/R/outcome-names.R index ad1e761eb..39284942b 100644 --- a/R/outcome-names.R +++ b/R/outcome-names.R @@ -6,8 +6,8 @@ #' @keywords internal #' @examples #' library(dplyr) -#' lm(cbind(mpg, wt) ~ ., data = mtcars) %>% -#' purrr::pluck(terms) %>% +#' lm(cbind(mpg, wt) ~ ., data = mtcars) |> +#' purrr::pluck(terms) |> #' outcome_names() #' @export outcome_names <- function(x, ...) { diff --git a/R/plots.R b/R/plots.R index 66d7c8c94..ed1fbd6fd 100644 --- a/R/plots.R +++ b/R/plots.R @@ -153,7 +153,7 @@ get_param_columns <- function(x) { # Use the user-given id for the parameter or the parameter label? get_param_label <- function(x, id_val) { x <- tibble::as_tibble(x) - y <- dplyr::filter(x, id == id_val) %>% dplyr::slice(1) + y <- dplyr::filter(x, id == id_val) |> dplyr::slice(1) num_param <- sum(x$name == y$name) no_special_id <- y$name == y$id if (no_special_id && num_param == 1) { @@ -176,7 +176,7 @@ get_param_label <- function(x, id_val) { paste_param_by <- function(x) { if (".by" %in% colnames(x)) { x <- - x %>% + x |> dplyr::mutate( .metric = case_when( !is.na(.by) ~ paste0(.metric, "(", .by, ")"), @@ -195,8 +195,8 @@ is_factorial <- function(x, cutoff = 0.95) { p <- ncol(x) vals <- purrr::map(x, unique) full_fact <- - tidyr::crossing(!!!vals) %>% - dplyr::full_join(x %>% dplyr::mutate(..obs = 1), by = names(x)) + tidyr::crossing(!!!vals) |> + dplyr::full_join(x |> dplyr::mutate(..obs = 1), by = names(x)) mean(!is.na(full_fact$..obs)) >= cutoff } @@ -232,8 +232,8 @@ is_regular_grid <- function(grid) { use_regular_grid_plot <- function(x) { dat <- collect_metrics(x) param_cols <- get_param_columns(x) - grd <- dat %>% - dplyr::select(all_of(param_cols)) %>% + grd <- dat |> + dplyr::select(all_of(param_cols)) |> distinct() is_regular_grid(grd) } @@ -253,15 +253,15 @@ process_autoplot_metrics <- function(x, metric, eval_time) { # have the same value (if any). x <- paste_param_by(x) - x <- x %>% - dplyr::filter(.metric %in% metric) %>% + x <- x |> + dplyr::filter(.metric %in% metric) |> dplyr::filter(!is.na(mean)) num_eval_times <- length(eval_time[!is.na(eval_time)]) if(any_dyn & num_eval_times > 0) { - x <- x %>% - dplyr::filter(.eval_time %in% eval_time) %>% + x <- x |> + dplyr::filter(.eval_time %in% eval_time) |> dplyr::mutate( .metric = dplyr::if_else( @@ -288,8 +288,8 @@ plot_perf_vs_iter <- function(x, metric = NULL, eval_time = NULL, width = NULL, x <- process_autoplot_metrics(x, metric, eval_time) search_iter <- - x %>% - dplyr::filter(.iter > 0 & std_err > 0) %>% + x |> + dplyr::filter(.iter > 0 & std_err > 0) |> dplyr::mutate(const = ifelse(n > 0, qt(0.975, n), 0)) p <- @@ -328,7 +328,7 @@ plot_param_vs_iter <- function(x, call = rlang::caller_env()) { # Collect and filter resampling results x <- estimate_tune_results(x) - is_num <- purrr::map_lgl(x %>% dplyr::select(dplyr::all_of(param_cols)), is.numeric) + is_num <- purrr::map_lgl(x |> dplyr::select(dplyr::all_of(param_cols)), is.numeric) num_param_cols <- param_cols[is_num] # ---------------------------------------------------------------------------- @@ -352,8 +352,8 @@ plot_param_vs_iter <- function(x, call = rlang::caller_env()) { # Stack numeric columns for filtering x <- - x %>% - dplyr::select(.iter, dplyr::all_of(num_param_cols)) %>% + x |> + dplyr::select(.iter, dplyr::all_of(num_param_cols)) |> tidyr::pivot_longer(cols = dplyr::all_of(num_param_cols)) # ------------------------------------------------------------------------------ @@ -394,14 +394,14 @@ plot_marginals <- function(x, metric = NULL, eval_time = NULL, call = rlang::cal # ---------------------------------------------------------------------------- # Check types of parameters then sort by unique values - is_num <- purrr::map_lgl(x %>% dplyr::select(dplyr::all_of(param_cols)), is.numeric) - num_val <- purrr::map_int(x %>% dplyr::select(dplyr::all_of(param_cols)), ~ length(unique(.x))) + is_num <- purrr::map_lgl(x |> dplyr::select(dplyr::all_of(param_cols)), is.numeric) + num_val <- purrr::map_int(x |> dplyr::select(dplyr::all_of(param_cols)), ~ length(unique(.x))) if (any(num_val < 2)) { rm_param <- param_cols[num_val < 2] param_cols <- param_cols[num_val >= 2] is_num <- is_num[num_val >= 2] - x <- x %>% dplyr::select(-dplyr::all_of(rm_param)) + x <- x |> dplyr::select(-dplyr::all_of(rm_param)) } if (any(!is_num)) { @@ -443,9 +443,9 @@ plot_marginals <- function(x, metric = NULL, eval_time = NULL, call = rlang::cal # Stack numeric parameters for faceting. x <- - x %>% - dplyr::rename(`# resamples` = n) %>% - dplyr::select(dplyr::all_of(param_cols), mean, `# resamples`, .metric) %>% + x |> + dplyr::rename(`# resamples` = n) |> + dplyr::select(dplyr::all_of(param_cols), mean, `# resamples`, .metric) |> tidyr::pivot_longer(cols = dplyr::all_of(num_param_cols)) # ---------------------------------------------------------------------------- @@ -525,7 +525,7 @@ plot_regular_grid <- function(x, {.pkg tune} version 0.1.0 or later.") } - grd <- dat %>% dplyr::select(all_of(param_cols)) + grd <- dat |> dplyr::select(all_of(param_cols)) # ---------------------------------------------------------------------------- # Determine which parameter goes on the x-axis and their types @@ -575,9 +575,9 @@ plot_regular_grid <- function(x, # ---------------------------------------------------------------------------- dat <- - dat %>% - dplyr::rename(`# resamples` = n) %>% - dplyr::select(dplyr::all_of(param_cols), mean, `# resamples`, .metric) %>% + dat |> + dplyr::rename(`# resamples` = n) |> + dplyr::select(dplyr::all_of(param_cols), mean, `# resamples`, .metric) |> tidyr::pivot_longer(cols = dplyr::all_of(x_col)) # ------------------------------------------------------------------------------ diff --git a/R/pull.R b/R/pull.R index 89c5da2a3..a15a54d6a 100644 --- a/R/pull.R +++ b/R/pull.R @@ -13,8 +13,8 @@ extract_details <- function(object, extractor) { pulley <- function(resamples, res, col, order) { if (all(purrr::map_lgl(res, inherits, "simpleError"))) { res <- - resamples %>% - mutate(col = purrr::map(splits, ~NULL)) %>% + resamples |> + mutate(col = purrr::map(splits, ~NULL)) |> setNames(c(names(resamples), col)) return(res) } @@ -25,12 +25,12 @@ pulley <- function(resamples, res, col, order) { resamples <- vctrs::vec_slice(resamples, order) - pulled_vals <- purrr::map(res, ~ .x[[col]]) %>% purrr::list_rbind() + pulled_vals <- purrr::map(res, ~ .x[[col]]) |> purrr::list_rbind() if (nrow(pulled_vals) == 0) { res <- - resamples %>% - mutate(col = purrr::map(splits, ~NULL)) %>% + resamples |> + mutate(col = purrr::map(splits, ~NULL)) |> setNames(c(names(resamples), col)) return(res) } diff --git a/R/resample.R b/R/resample.R index 44e5b4eff..bc295e78a 100644 --- a/R/resample.R +++ b/R/resample.R @@ -29,11 +29,11 @@ #' set.seed(6735) #' folds <- vfold_cv(mtcars, v = 5) #' -#' spline_rec <- recipe(mpg ~ ., data = mtcars) %>% -#' step_spline_natural(disp) %>% +#' spline_rec <- recipe(mpg ~ ., data = mtcars) |> +#' step_spline_natural(disp) |> #' step_spline_natural(wt) #' -#' lin_mod <- linear_reg() %>% +#' lin_mod <- linear_reg() |> #' set_engine("lm") #' #' control <- control_resamples(save_pred = TRUE) @@ -48,8 +48,8 @@ #' # supply that to `fit_resamples()` instead. Here, a workflows "variables" #' # preprocessor is used, which lets you supply terms using dplyr selectors. #' # The variables are used as-is, no preprocessing is done to them. -#' wf <- workflow() %>% -#' add_variables(outcomes = mpg, predictors = everything()) %>% +#' wf <- workflow() |> +#' add_variables(outcomes = mpg, predictors = everything()) |> #' add_model(lin_mod) #' #' wf_res <- fit_resamples(wf, folds) diff --git a/R/schedule.R b/R/schedule.R index 1be46eaff..4e64108c0 100644 --- a/R/schedule.R +++ b/R/schedule.R @@ -45,14 +45,14 @@ schedule_stages <- function(grid, wflow) { param_info <- get_param_info(wflow) # schedule preprocessing stage and push the rest into a nested tibble - param_pre_stage <- param_info %>% - dplyr::filter(source == "recipe") %>% + param_pre_stage <- param_info |> + dplyr::filter(source == "recipe") |> dplyr::pull(id) - schedule <- grid %>% + schedule <- grid |> tidyr::nest(.by = dplyr::all_of(param_pre_stage), .key = "model_stage") # schedule next stages nested within `schedule_model_stage_i()` - schedule %>% + schedule |> dplyr::mutate( model_stage = purrr::map( model_stage, @@ -64,11 +64,11 @@ schedule_stages <- function(grid, wflow) { } schedule_model_stage_i <- function(model_stage, param_info, wflow) { - model_param <- param_info %>% - dplyr::filter(source == "model_spec") %>% + model_param <- param_info |> + dplyr::filter(source == "model_spec") |> dplyr::pull(id) - non_submodel_param <- param_info %>% - dplyr::filter(source == "model_spec" & !has_submodel) %>% + non_submodel_param <- param_info |> + dplyr::filter(source == "model_spec" & !has_submodel) |> dplyr::pull(id) any_non_submodel_param <- length(non_submodel_param) > 0 @@ -77,7 +77,7 @@ schedule_model_stage_i <- function(model_stage, param_info, wflow) { schedule <- min_model_grid(model_stage, model_param, wflow) # push remaining parameters into the next stage - next_stage <- model_stage %>% + next_stage <- model_stage |> tidyr::nest( .by = dplyr::all_of(non_submodel_param), .key = "predict_stage" @@ -86,15 +86,15 @@ schedule_model_stage_i <- function(model_stage, param_info, wflow) { if (any_non_submodel_param) { # min_model_grid() may change the row order, thus use next_stage as the # "left" data frame here to preserve the original row order - schedule <- next_stage %>% - dplyr::left_join(schedule, by = non_submodel_param) %>% + schedule <- next_stage |> + dplyr::left_join(schedule, by = non_submodel_param) |> dplyr::relocate(dplyr::all_of(model_param)) } else { schedule <- dplyr::bind_cols(schedule, next_stage) } # schedule next stages nested within `schedule_predict_stage_i()` - schedule %>% + schedule |> dplyr::mutate( predict_stage = purrr::map( predict_stage, @@ -106,24 +106,24 @@ schedule_model_stage_i <- function(model_stage, param_info, wflow) { min_model_grid <- function(grid, model_param, wflow) { # work on only the model parameters - model_grid <- grid %>% - dplyr::select(dplyr::all_of(model_param)) %>% + model_grid <- grid |> + dplyr::select(dplyr::all_of(model_param)) |> dplyr::distinct() if (nrow(model_grid) < 1) { return(model_grid) } - min_grid(extract_spec_parsnip(wflow), model_grid) %>% + min_grid(extract_spec_parsnip(wflow), model_grid) |> dplyr::select(dplyr::all_of(model_param)) } schedule_predict_stage_i <- function(predict_stage, param_info) { - submodel_param <- param_info %>% - dplyr::filter(source == "model_spec" & has_submodel) %>% + submodel_param <- param_info |> + dplyr::filter(source == "model_spec" & has_submodel) |> dplyr::pull(id) - predict_stage %>% + predict_stage |> tidyr::nest( .by = dplyr::all_of(submodel_param), .key = "post_stage" @@ -131,15 +131,15 @@ schedule_predict_stage_i <- function(predict_stage, param_info) { } get_param_info <- function(wflow) { - param_info <- tune_args(wflow) %>% + param_info <- tune_args(wflow) |> dplyr::select(name, id, source) model_spec <- extract_spec_parsnip(wflow) model_type <- class(model_spec)[1] model_eng <- model_spec$engine - model_param <- parsnip::get_from_env(paste0(model_type, "_args")) %>% - dplyr::filter(engine == model_spec$engine) %>% + model_param <- parsnip::get_from_env(paste0(model_type, "_args")) |> + dplyr::filter(engine == model_spec$engine) |> dplyr::select(name = parsnip, has_submodel) param_info <- dplyr::left_join(param_info, model_param, by = "name") diff --git a/R/select_best.R b/R/select_best.R index ef69df29e..916bec640 100644 --- a/R/select_best.R +++ b/R/select_best.R @@ -93,14 +93,14 @@ show_best.tune_results <- function(x, summary_res <- .filter_perf_metrics(x, metric, eval_time) if (metric_info$direction == "maximize") { - summary_res <- summary_res %>% dplyr::arrange(dplyr::desc(mean)) + summary_res <- summary_res |> dplyr::arrange(dplyr::desc(mean)) } else if (metric_info$direction == "minimize") { - summary_res <- summary_res %>% dplyr::arrange(mean) + summary_res <- summary_res |> dplyr::arrange(mean) } else if (metric_info$direction == "zero") { - summary_res <- summary_res %>% dplyr::arrange(abs(mean)) + summary_res <- summary_res |> dplyr::arrange(abs(mean)) } show_ind <- 1:min(nrow(summary_res), n) - summary_res %>% + summary_res |> dplyr::slice(show_ind) } @@ -128,7 +128,7 @@ select_best.tune_results <- function(x, ..., metric = NULL, eval_time = NULL) { param_names <- .get_tune_parameter_names(x) res <- show_best(x, metric = metric, n = 1, eval_time = eval_time, call = rlang::current_env()) - res %>% dplyr::select(dplyr::all_of(param_names), .config) + res |> dplyr::select(dplyr::all_of(param_names), .config) } @@ -168,14 +168,14 @@ select_by_pct_loss.tune_results <- function(x, ..., metric = NULL, eval_time = N } summary_res <- - summary_res %>% - dplyr::rowwise() %>% + summary_res |> + dplyr::rowwise() |> dplyr::mutate( .best = best_metric, # can calculate loss without knowledge of direction since # we know that losses must be larger than that for the best .loss = abs((abs(mean) - abs(.best)) / .best) * 100 - ) %>% + ) |> dplyr::ungroup() @@ -191,10 +191,10 @@ select_by_pct_loss.tune_results <- function(x, ..., metric = NULL, eval_time = N # discard models more complex than the best and # remove models with greater increase in loss than the limit best_index <- which(summary_res$.loss == 0) - summary_res %>% - dplyr::slice(1:best_index) %>% - dplyr::filter(.loss < limit) %>% - dplyr::slice(1) %>% + summary_res |> + dplyr::slice(1:best_index) |> + dplyr::filter(.loss < limit) |> + dplyr::slice(1) |> dplyr::select(dplyr::all_of(param_names), .config) } @@ -229,22 +229,22 @@ select_by_one_std_err.tune_results <- function(x, ..., metric = NULL, eval_time best <- summary_res$mean[best_index] bound <- best - summary_res$std_err[best_index] summary_res <- - summary_res %>% + summary_res |> dplyr::mutate( .best = best, .bound = bound - ) %>% + ) |> dplyr::filter(mean >= .bound) } else if (metric_info$direction == "minimize") { best_index <- which.min(summary_res$mean) best <- summary_res$mean[best_index] bound <- best + summary_res$std_err[best_index] summary_res <- - summary_res %>% + summary_res |> dplyr::mutate( .best = best, .bound = bound - ) %>% + ) |> dplyr::filter(mean <= .bound) } else if (metric_info$direction == "zero") { best_index <- which.min(abs(summary_res$mean)) @@ -252,13 +252,13 @@ select_by_one_std_err.tune_results <- function(x, ..., metric = NULL, eval_time bound_lower <- -abs(best) - summary_res$std_err[best_index] bound_upper <- abs(best) + summary_res$std_err[best_index] summary_res <- - summary_res %>% - dplyr::rowwise() %>% + summary_res |> + dplyr::rowwise() |> dplyr::mutate( .best = best, .bound = list(c(lower = bound_lower, upper = bound_upper)) - ) %>% - dplyr::filter(mean >= .bound[[1]] & mean <= .bound[[2]]) %>% + ) |> + dplyr::filter(mean >= .bound[[1]] & mean <= .bound[[2]]) |> dplyr::ungroup() } @@ -270,8 +270,8 @@ select_by_one_std_err.tune_results <- function(x, ..., metric = NULL, eval_time var_nm <- var_nm[!var_nm %in% colnames(collect_metrics(x))] cli::cli_abort("Could not sort results by {.var {var_nm}}.") } - summary_res %>% - dplyr::slice(1) %>% + summary_res |> + dplyr::slice(1) |> dplyr::select(dplyr::all_of(param_names), .config) } diff --git a/R/tune_bayes.R b/R/tune_bayes.R index 9603b448d..69c242b36 100644 --- a/R/tune_bayes.R +++ b/R/tune_bayes.R @@ -145,13 +145,13 @@ #' folds <- vfold_cv(mtcars, v = 5) #' #' car_rec <- -#' recipe(mpg ~ ., data = mtcars) %>% +#' recipe(mpg ~ ., data = mtcars) |> #' step_normalize(all_predictors()) #' #' # define an svm with parameters to tune #' svm_mod <- -#' svm_rbf(cost = tune(), rbf_sigma = tune()) %>% -#' set_engine("kernlab") %>% +#' svm_rbf(cost = tune(), rbf_sigma = tune()) |> +#' set_engine("kernlab") |> #' set_mode("regression") #' #' # use a space-filling design with 6 points @@ -388,7 +388,7 @@ tune_bayes_workflow <- function(object, gp_mod <- .catch_and_log( fit_gp( - mean_stats %>% dplyr::select(-.iter), + mean_stats |> dplyr::select(-.iter), pset = param_info, metric = opt_metric_name, eval_time = opt_metric_time, @@ -411,7 +411,7 @@ tune_bayes_workflow <- function(object, pred_gp( gp_mod, param_info, control = control, - current = mean_stats %>% dplyr::select(dplyr::all_of(param_info$id)) + current = mean_stats |> dplyr::select(dplyr::all_of(param_info$id)) ) check_time(start_time, control$time_limit) @@ -475,9 +475,9 @@ tune_bayes_workflow <- function(object, ~ dplyr::mutate(., .config = paste0("Iter", i)) ) } - unsummarized <- dplyr::bind_rows(unsummarized, tmp_res %>% mutate(.iter = i)) + unsummarized <- dplyr::bind_rows(unsummarized, tmp_res |> mutate(.iter = i)) rs_estimate <- estimate_tune_results(tmp_res) - mean_stats <- dplyr::bind_rows(mean_stats, rs_estimate %>% dplyr::mutate(.iter = i)) + mean_stats <- dplyr::bind_rows(mean_stats, rs_estimate |> dplyr::mutate(.iter = i)) score_card <- update_score_card(score_card, i, tmp_res) log_progress(control, x = mean_stats, maximize = maximize, objective = opt_metric_name, eval_time = opt_metric_time) @@ -585,17 +585,17 @@ encode_set <- function(x, pset, ..., as_matrix = FALSE) { } fit_gp <- function(dat, pset, metric, eval_time = NULL, control, ...) { - dat <- dat %>% dplyr::filter(.metric == metric) + dat <- dat |> dplyr::filter(.metric == metric) if (!is.null(eval_time)) { - dat <- dat %>% dplyr::filter(.eval_time == eval_time) + dat <- dat |> dplyr::filter(.eval_time == eval_time) } - dat <- dat %>% - check_gp_data() %>% + dat <- dat |> + check_gp_data() |> dplyr::select(dplyr::all_of(pset$id), mean) - x <- encode_set(dat %>% dplyr::select(-mean), pset, as_matrix = TRUE) + x <- encode_set(dat |> dplyr::select(-mean), pset, as_matrix = TRUE) if (nrow(x) <= ncol(x) + 1 && nrow(x) > 0) { msg <- @@ -629,12 +629,12 @@ fit_gp <- function(dat, pset, metric, eval_time = NULL, control, ...) { pred_gp <- function(object, pset, size = 5000, current = NULL, control) { pred_grid <- - dials::grid_space_filling(pset, size = size, type = "latin_hypercube") %>% + dials::grid_space_filling(pset, size = size, type = "latin_hypercube") |> dplyr::distinct() if (!is.null(current)) { pred_grid <- - pred_grid %>% + pred_grid |> dplyr::anti_join(current, by = pset$id) } @@ -646,7 +646,7 @@ pred_gp <- function(object, pset, size = 5000, current = NULL, control) { msg <- paste(msg, as.character(object)) } tune_log(control, split_labels = NULL, task = msg, type = "warning") - return(pred_grid %>% dplyr::mutate(.mean = NA_real_, .sd = NA_real_)) + return(pred_grid |> dplyr::mutate(.mean = NA_real_, .sd = NA_real_)) } tune_log( @@ -662,15 +662,15 @@ pred_gp <- function(object, pset, size = 5000, current = NULL, control) { tune_log(control, split_labels = NULL, task = "Predicted candidates", type = "info", catalog = FALSE) - pred_grid %>% + pred_grid |> dplyr::mutate(.mean = gp_pred$Y_hat, .sd = sqrt(gp_pred$MSE)) } pick_candidate <- function(results, info, control) { if (info$uncertainty < control$uncertain) { - results <- results %>% - dplyr::arrange(dplyr::desc(objective)) %>% + results <- results |> + dplyr::arrange(dplyr::desc(objective)) |> dplyr::slice(1) } else { if (control$verbose_iter) { @@ -678,9 +678,9 @@ pick_candidate <- function(results, info, control) { message(msg) } results <- - results %>% - dplyr::arrange(dplyr::desc(.sd)) %>% - dplyr::slice(1:floor(.1 * nrow(results))) %>% + results |> + dplyr::arrange(dplyr::desc(.sd)) |> + dplyr::slice(1:floor(.1 * nrow(results))) |> dplyr::sample_n(1) } results @@ -688,13 +688,13 @@ pick_candidate <- function(results, info, control) { update_score_card <- function(info, iter, results, control) { current_val <- - results %>% - estimate_tune_results() %>% + results |> + estimate_tune_results() |> dplyr::filter(.metric == info$metrics) if (!is.null(info$eval_time)) { current_val <- - current_val %>% + current_val |> dplyr::filter(.eval_time == info$eval_time) } @@ -727,19 +727,19 @@ update_score_card <- function(info, iter, results, control) { # save opt_metric_name and maximize to simplify!!!!!!!!!!!!!!! initial_info <- function(stats, metrics, maximize, eval_time) { best_res <- - stats %>% - dplyr::filter(.metric == metrics) %>% + stats |> + dplyr::filter(.metric == metrics) |> dplyr::filter(!is.na(mean)) if (maximize) { best_res <- - best_res %>% - dplyr::arrange(desc(mean)) %>% + best_res |> + dplyr::arrange(desc(mean)) |> slice(1) } else { best_res <- - best_res %>% - dplyr::arrange(mean) %>% + best_res |> + dplyr::arrange(mean) |> slice(1) } best_val <- best_res$mean[1] diff --git a/R/tune_grid.R b/R/tune_grid.R index fe67c6a69..87c4774f8 100644 --- a/R/tune_grid.R +++ b/R/tune_grid.R @@ -180,12 +180,12 @@ #' # tuning recipe parameters: #' #' spline_rec <- -#' recipe(mpg ~ ., data = mtcars) %>% -#' step_spline_natural(disp, deg_free = tune("disp")) %>% +#' recipe(mpg ~ ., data = mtcars) |> +#' step_spline_natural(disp, deg_free = tune("disp")) |> #' step_spline_natural(wt, deg_free = tune("wt")) #' #' lin_mod <- -#' linear_reg() %>% +#' linear_reg() |> #' set_engine("lm") #' #' # manually create a grid @@ -205,12 +205,12 @@ #' # tune model parameters only (example requires the `kernlab` package) #' #' car_rec <- -#' recipe(mpg ~ ., data = mtcars) %>% +#' recipe(mpg ~ ., data = mtcars) |> #' step_normalize(all_predictors()) #' #' svm_mod <- -#' svm_rbf(cost = tune(), rbf_sigma = tune()) %>% -#' set_engine("kernlab") %>% +#' svm_rbf(cost = tune(), rbf_sigma = tune()) |> +#' set_engine("kernlab") |> #' set_mode("regression") #' #' # Use a space-filling design with 7 points @@ -231,8 +231,8 @@ #' # to `tune_grid()`, you can also wrap them up in a workflow and pass #' # that along instead (note that this doesn't do any preprocessing to #' # the variables, it passes them along as-is). -#' wf <- workflow() %>% -#' add_variables(outcomes = mpg, predictors = everything()) %>% +#' wf <- workflow() |> +#' add_variables(outcomes = mpg, predictors = everything()) |> #' add_model(svm_mod) #' #' set.seed(3254) diff --git a/R/tune_results.R b/R/tune_results.R index caa749733..980b74d30 100644 --- a/R/tune_results.R +++ b/R/tune_results.R @@ -58,15 +58,15 @@ summarize_notes <- function(x) { return(invisible(NULL)) } notes <- - x %>% - dplyr::select(dplyr::starts_with("id"), .notes) %>% + x |> + dplyr::select(dplyr::starts_with("id"), .notes) |> tidyr::unnest(cols = .notes) by_type <- - notes %>% - dplyr::group_nest(type) %>% - dplyr::mutate(data = purrr::map(data, ~ dplyr::count(.x, note))) %>% - tidyr::unnest(data) %>% - dplyr::rowwise() %>% + notes |> + dplyr::group_nest(type) |> + dplyr::mutate(data = purrr::map(data, ~ dplyr::count(.x, note))) |> + tidyr::unnest(data) |> + dplyr::rowwise() |> dplyr::mutate( note = gsub("(Error:)", "", note), note = glue::glue_collapse(note, width = 0.85 * getOption("width")), @@ -149,7 +149,7 @@ peek_tune_results_outcomes <- function(x) { #' @export show_notes <- function(x, n = 10) { res <- - collect_notes(x) %>% + collect_notes(x) |> dplyr::distinct(type, note) if (nrow(res) == 0) { diff --git a/inst/WORDLIST b/inst/WORDLIST index af0bf9c30..0048ddc30 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -16,6 +16,12 @@ RNGkind Wadsworth backtrace cdot +doAzureParallel +doMC +doMPI +doParallel +doRedis +doSNOW doi el finetune @@ -27,6 +33,7 @@ iteratively misspecified notag parallelize +postprocessor pre preprocessed preprocessor diff --git a/inst/event_time_analysis.Rmd b/inst/event_time_analysis.Rmd index c2883afa8..3f66d53a1 100644 --- a/inst/event_time_analysis.Rmd +++ b/inst/event_time_analysis.Rmd @@ -25,12 +25,12 @@ registerDoMC(cores = 10) # ------------------------------------------------------------------------------ engines <- - get_from_env("models") %>% - map(~ get_from_env(.x) %>% mutate(model = .x)) %>% - list_rbind() %>% + get_from_env("models") |> + map(~ get_from_env(.x) |> mutate(model = .x)) |> + list_rbind() |> filter(mode == "censored regression") -num_models <- engines %>% distinct(model) %>% nrow() +num_models <- engines |> distinct(model) |> nrow() ``` The censored package was released in June 2022, enabling users to fit event time/survival time models using the tidymodels framework. As of this writing, there are now a total of `r nrow(engines)` different engines that can be used with `r num_models` different models. @@ -90,8 +90,8 @@ covar_data <- id.col = "num") heart_data <- - full_join(outcome_data, covar_data, by = "num") %>% - select(-num) %>% + full_join(outcome_data, covar_data, by = "num") |> + select(-num) |> as_tibble() heart_data @@ -104,7 +104,7 @@ Also, tidymodels expects that the event times and corresponding status data are ```{r} #| label: reformat-data heart_data <- - heart_data %>% + heart_data |> mutate( event_time = Surv(fuyrs, status), lv = @@ -137,7 +137,7 @@ heart_data <- TRUE ~ "stentless_porcine_tissue" ), across(where(is.character), factor) - ) %>% + ) |> select(-fuyrs, -status) ``` @@ -163,13 +163,13 @@ To demonstrate, let's fit a bagged tree to the training data: #| label: initial-example bag_spec <- - bag_tree() %>% - set_mode("censored regression") %>% + bag_tree() |> + set_mode("censored regression") |> set_engine("rpart", nbagg = 50) set.seed(29872) bag_fit <- - bag_spec %>% + bag_spec |> fit(event_time ~ ., data = valve_tr) ``` @@ -191,11 +191,11 @@ The standard `predict()` machinery can be used to get static (e.g., `type = "tim #| fake-predictions time_points <- seq(0, 10, by = .1) bag_pred <- - predict(bag_fit, fake_examples, type = "survival", eval_time = time_points) %>% + predict(bag_fit, fake_examples, type = "survival", eval_time = time_points) |> bind_cols( predict(bag_fit, fake_examples), - fake_examples %>% select(event_time) - ) %>% + fake_examples |> select(event_time) + ) |> add_rowindex() bag_pred ``` @@ -204,7 +204,7 @@ As usual, the prediction columns are prefixed with `.pred_`. What is unusual is ```{r} #| dot-pred -bag_pred$.pred[[1]] %>% slice(1:5) +bag_pred$.pred[[1]] |> slice(1:5) ``` We can unnest these and plot the per-patient survival curves: @@ -214,9 +214,9 @@ We can unnest these and plot the per-patient survival curves: #| out-width: "60%" #| fig-align: center -bag_pred %>% - unnest(.pred) %>% - mutate(sample = format(.row)) %>% +bag_pred |> + unnest(.pred) |> + mutate(sample = format(.row)) |> ggplot(aes(.eval_time, .pred_survival, group = sample, col = sample)) + geom_step() + lims(y = 0:1) + @@ -236,13 +236,13 @@ For static, a common choice is the [concordance statistic](https://www.ncbi.nlm. #| label: test-concordance test_pred <- - predict(bag_fit, valve_te, type = "survival", eval_time = time_points) %>% + predict(bag_fit, valve_te, type = "survival", eval_time = time_points) |> bind_cols( predict(bag_fit, valve_te), - valve_te %>% select(event_time) + valve_te |> select(event_time) ) -test_pred %>% +test_pred |> concordance_survival(truth = event_time, estimate = .pred_time) ``` @@ -259,8 +259,8 @@ If you were to compute model performance manually (as above), these weights are ```{r} #| ipcw-comp ipcw_data <- - test_pred %>% - .censoring_weights_graf(bag_fit, .) %>% + test_pred |> + .censoring_weights_graf(bag_fit, .) |> select(-.pred_time) ``` @@ -270,7 +270,7 @@ This adds a column called `.weight_censored` to the tibble of predicted survival ipcw_data # The adjusted data: -ipcw_data$.pred[[1]] %>% slice(1:5) +ipcw_data$.pred[[1]] |> slice(1:5) ``` With the data in this format, we can use a yardstick function for dynamic metrics like `brier_survival()`: @@ -279,10 +279,10 @@ With the data in this format, we can use a yardstick function for dynamic metric #| label: brier-surv brier_scores <- - ipcw_data %>% + ipcw_data |> # No argument name is used for .pred brier_survival(truth = event_time, .pred) -brier_scores %>% slice(1:5) +brier_scores |> slice(1:5) ``` We compute a score for each evaluation time: @@ -292,7 +292,7 @@ We compute a score for each evaluation time: #| out-width: "60%" #| fig-align: center -brier_scores %>% +brier_scores |> ggplot(aes(.eval_time, .estimate)) + geom_hline(yintercept = 0.25, col = "red", alpha = 1 / 2, lty = 2) + geom_line() + @@ -319,7 +319,7 @@ set.seed(12) valve_rs <- vfold_cv(valve_tr, repeats = 5) bag_tree_res <- - bag_spec %>% + bag_spec |> fit_resamples(event_time ~ ., resamples = valve_rs, eval_time = time_points) ``` @@ -330,14 +330,14 @@ By default, the Brier score is used: #| out-width: "60%" #| fig-align: center -collect_metrics(bag_tree_res) %>% slice(1:5) +collect_metrics(bag_tree_res) |> slice(1:5) -bag_tree_res %>% - collect_metrics() %>% +bag_tree_res |> + collect_metrics() |> mutate( lower = mean - 1.96 * std_err, upper = mean + 1.96 * std_err - ) %>% + ) |> ggplot(aes(.eval_time)) + geom_hline(yintercept = 0.25, col = "red", alpha = 1 / 2, lty = 2) + geom_line(aes(y = mean)) + @@ -357,14 +357,14 @@ Suppose we try a regularized Cox model for these data. We'll add a recipe to the #| fig-align: center lasso_spec <- - proportional_hazards(penalty = tune(), mixture = 0) %>% - set_engine("glmnet") %>% + proportional_hazards(penalty = tune(), mixture = 0) |> + set_engine("glmnet") |> set_mode("censored regression") lasso_rec <- - recipe(event_time ~ ., data = valve_tr) %>% - step_dummy(all_nominal_predictors()) %>% - step_zv(all_predictors()) %>% + recipe(event_time ~ ., data = valve_tr) |> + step_dummy(all_nominal_predictors()) |> + step_zv(all_predictors()) |> step_normalize(all_numeric_predictors()) lasso_wflow <- workflow(lasso_rec, lasso_spec) @@ -372,7 +372,7 @@ lasso_wflow <- workflow(lasso_rec, lasso_spec) surv_metrics <- metric_set(brier_survival_integrated, brier_survival) lasso_tune_res <- - lasso_wflow %>% + lasso_wflow |> tune_grid( resamples = valve_rs, eval_time = time_points, @@ -403,7 +403,7 @@ Now we can update the workflow and, assuming that this is the model that we want ```{r} #| label: finalize lasso_final_wflow <- - lasso_wflow %>% + lasso_wflow |> finalize_workflow(best_penalty) lasso_final_wflow @@ -432,16 +432,16 @@ How do the Brier Score estimates compare between the test set and resampling? #| fig-align: center #| warning: false -collect_metrics(test_res) %>% - mutate(estimator = "testing") %>% - select(.eval_time, estimator, Brier = .estimate) %>% +collect_metrics(test_res) |> + mutate(estimator = "testing") |> + select(.eval_time, estimator, Brier = .estimate) |> bind_rows( - lasso_tune_res %>% - collect_metrics() %>% - mutate(estimator = "resampling") %>% - select(.eval_time, estimator, Brier = mean, penalty) %>% + lasso_tune_res |> + collect_metrics() |> + mutate(estimator = "resampling") |> + select(.eval_time, estimator, Brier = mean, penalty) |> inner_join(best_penalty, by = "penalty") - ) %>% + ) |> ggplot(aes(.eval_time)) + geom_hline(yintercept = 0.25, col = "red", alpha = 1 / 2, lty = 2) + geom_line(aes(y = Brier, col = estimator)) + diff --git a/inst/examples/Chicago_corr_knn.R b/inst/examples/Chicago_corr_knn.R index c095a32ad..c0260bae1 100644 --- a/inst/examples/Chicago_corr_knn.R +++ b/inst/examples/Chicago_corr_knn.R @@ -14,40 +14,40 @@ data_folds <- rolling_origin(Chicago, initial = 364 * 15, assess = 7 * 4, skip = stations <- names(Chicago)[2:21] chi_rec <- - recipe(ridership ~ ., data = Chicago) %>% - step_holiday(date) %>% - step_date(date) %>% - step_rm(date) %>% - step_dummy(all_nominal()) %>% - step_zv(all_predictors()) %>% + recipe(ridership ~ ., data = Chicago) |> + step_holiday(date) |> + step_date(date) |> + step_rm(date) |> + step_dummy(all_nominal()) |> + step_zv(all_predictors()) |> step_corr(all_of(!!stations), threshold = tune()) knn_model <- - nearest_neighbor(mode = "regression", neighbors = tune(), weight_func = tune(), dist_power = tune()) %>% + nearest_neighbor(mode = "regression", neighbors = tune(), weight_func = tune(), dist_power = tune()) |> set_engine("kknn") chi_wflow <- - workflow() %>% - add_recipe(chi_rec) %>% + workflow() |> + add_recipe(chi_rec) |> add_model(knn_model) chi_param <- - parameters(chi_wflow) %>% + parameters(chi_wflow) |> update( threshold = threshold(c(.8, .99)), dist_power = dist_power(c(1, 2)), neighbors = neighbors(c(1, 50))) chi_grid <- - chi_param %>% + chi_param |> grid_latin_hypercube(size = 18) res <- tune_grid(chi_wflow, resamples = data_folds, grid = chi_grid, control = control_grid(verbose = TRUE)) -# summarize(res) %>% -# dplyr::filter(.metric == "rmse") %>% -# select(-n, -std_err, -.estimator, -.metric) %>% +# summarize(res) |> +# dplyr::filter(.metric == "rmse") |> +# select(-n, -std_err, -.estimator, -.metric) |> # ggplot(aes(x = neighbors, y = mean, col = weight_func)) + # geom_point() + geom_line() + # facet_wrap(~threshold, scales = "free_x") diff --git a/inst/examples/Chicago_corr_lm.R b/inst/examples/Chicago_corr_lm.R index 18330bb3e..088182db6 100644 --- a/inst/examples/Chicago_corr_lm.R +++ b/inst/examples/Chicago_corr_lm.R @@ -12,27 +12,27 @@ data_folds <- rolling_origin(Chicago, initial = 364 * 15, assess = 7 * 4, skip = stations <- names(Chicago)[2:21] chi_rec <- - recipe(ridership ~ ., data = Chicago) %>% - step_holiday(date) %>% - step_date(date) %>% - step_rm(date) %>% - step_dummy(all_nominal()) %>% - step_zv(all_predictors()) %>% + recipe(ridership ~ ., data = Chicago) |> + step_holiday(date) |> + step_date(date) |> + step_rm(date) |> + step_dummy(all_nominal()) |> + step_zv(all_predictors()) |> step_corr(all_of(!!stations), threshold = tune()) lm_model <- - linear_reg(mode = "regression") %>% + linear_reg(mode = "regression") |> set_engine("lm") chi_wflow <- - workflow() %>% - add_recipe(chi_rec) %>% + workflow() |> + add_recipe(chi_rec) |> add_model(lm_model) chi_grid <- - parameters(chi_wflow) %>% - update(threshold = threshold(c(.8, .99))) %>% + parameters(chi_wflow) |> + update(threshold = threshold(c(.8, .99))) |> grid_regular(levels = 10) @@ -46,15 +46,15 @@ res <- tune_grid(chi_wflow, resamples = data_folds, grid = chi_grid, res_2 <- tune_grid(chi_rec, lm_model, resamples = data_folds, grid = chi_grid, control = control_grid(verbose = TRUE, extract = ext)) -# unnest(unnest(res %>% select(id, .extracts), cols = .extracts), cols = .extract) +# unnest(unnest(res |> select(id, .extracts), cols = .extracts), cols = .extract) lm_stats <- - res %>% - select(id, .extracts) %>% - unnest(cols = .extracts) %>% - unnest(cols = .extracts) %>% - select(id, threshold, adj.r.squared, sigma, AIC, BIC) %>% - group_by(threshold) %>% + res |> + select(id, .extracts) |> + unnest(cols = .extracts) |> + unnest(cols = .extracts) |> + select(id, threshold, adj.r.squared, sigma, AIC, BIC) |> + group_by(threshold) |> summarize( adj.r.squared = mean(adj.r.squared, na.rm = TRUE), sigma = mean(sigma, na.rm = TRUE), @@ -63,8 +63,8 @@ lm_stats <- ) rs_stats <- - summarize(res) %>% - select(threshold, .metric, mean) %>% + summarize(res) |> + select(threshold, .metric, mean) |> pivot_wider(names_from = .metric, values_from = mean, id_cols = threshold) all_stats <- full_join(lm_stats, rs_stats) @@ -78,15 +78,15 @@ ggplot(all_stats, aes(x = .panel_x, y = .panel_y, colour = threshold)) + theme_bw() -summarize(res) %>% - dplyr::filter(.metric == "rmse") %>% - select(-n, -std_err, -.estimator, -.metric) %>% +summarize(res) |> + dplyr::filter(.metric == "rmse") |> + select(-n, -std_err, -.estimator, -.metric) |> ggplot(aes(x = threshold, y = mean)) + geom_point() + geom_line() -summarize(res) %>% - dplyr::filter(.metric == "rmse") %>% - arrange(mean) %>% +summarize(res) |> + dplyr::filter(.metric == "rmse") |> + arrange(mean) |> slice(1) diff --git a/inst/examples/Chicago_corr_svm.R b/inst/examples/Chicago_corr_svm.R index 94a561639..4497a4c9b 100644 --- a/inst/examples/Chicago_corr_svm.R +++ b/inst/examples/Chicago_corr_svm.R @@ -11,34 +11,34 @@ data_folds <- rolling_origin(Chicago, initial = 364 * 15, assess = 7 * 4, skip = stations <- names(Chicago)[2:21] chi_rec <- - recipe(ridership ~ ., data = Chicago) %>% - step_holiday(date) %>% - step_date(date) %>% - step_rm(date) %>% - step_dummy(all_nominal()) %>% - step_zv(all_predictors()) %>% + recipe(ridership ~ ., data = Chicago) |> + step_holiday(date) |> + step_date(date) |> + step_rm(date) |> + step_dummy(all_nominal()) |> + step_zv(all_predictors()) |> step_corr(all_of(!!stations), threshold = tune()) svm_mod <- - svm_rbf(mode = "regression", cost = tune(), rbf_sigma = tune(), margin = tune()) %>% + svm_rbf(mode = "regression", cost = tune(), rbf_sigma = tune(), margin = tune()) |> set_engine("kernlab") chi_wflow <- - workflow() %>% - add_recipe(chi_rec) %>% + workflow() |> + add_recipe(chi_rec) |> add_model(svm_mod) -cor_mat <- Chicago %>% dplyr::select(all_of(stations)) %>% cor() +cor_mat <- Chicago |> dplyr::select(all_of(stations)) |> cor() cor_mat <- tibble(cor = cor_mat[upper.tri(cor_mat)]) ggplot(cor_mat, aes(x = cor)) + geom_histogram(binwidth = .01, col = "white") chi_set <- - parameters(chi_wflow) %>% + parameters(chi_wflow) |> update(threshold = threshold(c(.8, .99))) chi_grid <- - chi_set %>% + chi_set |> grid_max_entropy(size = 5) @@ -49,16 +49,16 @@ ext <- function(x) { res <- tune_grid(chi_wflow, resamples = data_folds, grid = chi_grid, control = control_grid(verbose = TRUE, save_pred = TRUE, extract = ext)) -summarize(res) %>% - dplyr::filter(.metric == "rmse") %>% - select(-n, -std_err, -.estimator, -.metric) %>% +summarize(res) |> + dplyr::filter(.metric == "rmse") |> + select(-n, -std_err, -.estimator, -.metric) |> ggplot(aes(x = threshold, y = mean)) + geom_point() + geom_line() -summarize(res) %>% - dplyr::filter(.metric == "rmse") %>% - arrange(mean) %>% +summarize(res) |> + dplyr::filter(.metric == "rmse") |> + arrange(mean) |> slice(1) foo <- function(i) { diff --git a/inst/examples/Chicago_knn.R b/inst/examples/Chicago_knn.R index dcf29263b..d410c7aee 100644 --- a/inst/examples/Chicago_knn.R +++ b/inst/examples/Chicago_knn.R @@ -11,12 +11,12 @@ data_folds <- rolling_origin(Chicago, initial = 364 * 15, assess = 7 * 4, skip = stations <- names(Chicago)[2:21] chi_rec <- - recipe(ridership ~ ., data = Chicago) %>% - step_holiday(date) %>% - step_date(date) %>% - step_rm(date) %>% - step_dummy(all_nominal()) %>% - step_zv(all_predictors()) %>% + recipe(ridership ~ ., data = Chicago) |> + step_holiday(date) |> + step_date(date) |> + step_rm(date) |> + step_dummy(all_nominal()) |> + step_zv(all_predictors()) |> step_normalize(all_predictors()) knn_model <- @@ -25,29 +25,29 @@ knn_model <- neighbors = tune(), weight_func = tune(), dist_power = tune() - ) %>% + ) |> set_engine("kknn") chi_wflow <- - workflow() %>% - add_recipe(chi_rec) %>% + workflow() |> + add_recipe(chi_rec) |> add_model(knn_model) set.seed(255) chi_grid <- - chi_wflow %>% - parameters %>% - update(neighbors = neighbors(c(1, 30))) %>% - update(dist_power = dist_power(c(1/4, 2))) %>% + chi_wflow |> + parameters |> + update(neighbors = neighbors(c(1, 30))) |> + update(dist_power = dist_power(c(1/4, 2))) |> grid_regular(levels = c(30, 3, 3)) reg_knn_grid <- tune_grid(chi_wflow, resamples = data_folds, grid = chi_grid, control = control_grid(verbose = TRUE)) -summarize(reg_knn_grid) %>% - dplyr::filter(.metric == "rmse") %>% - mutate(RMSE = mean, `Minkowski distance parameter` = dist_power, weights = weight_func) %>% +summarize(reg_knn_grid) |> + dplyr::filter(.metric == "rmse") |> + mutate(RMSE = mean, `Minkowski distance parameter` = dist_power, weights = weight_func) |> ggplot(aes(x = neighbors, y = RMSE, col = weights)) + geom_path() + geom_point() + @@ -56,22 +56,22 @@ summarize(reg_knn_grid) %>% xlab("# Nearest-Neighbors") -summarize(reg_knn_grid) %>% - dplyr::filter(.metric == "rmse") %>% - arrange(mean) %>% +summarize(reg_knn_grid) |> + dplyr::filter(.metric == "rmse") |> + arrange(mean) |> slice(1) # ------------------------------------------------------------------------------ chi_set <- - chi_wflow %>% - parameters %>% - update(neighbors = neighbors(c(1, 30))) %>% + chi_wflow |> + parameters |> + update(neighbors = neighbors(c(1, 30))) |> update(dist_power = dist_power(c(1/10, 2))) set.seed(255) smol_grid <- - chi_set %>% + chi_set |> grid_random(size = 5) @@ -89,7 +89,7 @@ knn_search <- ) ggplot( - knn_search %>% filter(.metric == "rmse"), + knn_search |> filter(.metric == "rmse"), aes(x = neighbors, y = dist_power, col = weight_func, size = mean)) + geom_point(alpha = .4) + theme_bw() + @@ -101,8 +101,8 @@ library(gganimate) for (i in 0:max(knn_search$.iter)) { cumulative_data <- - knn_search %>% - filter(.metric == "rmse" & .iter <= i) %>% + knn_search |> + filter(.metric == "rmse" & .iter <= i) |> mutate(frame = i + 1, RMSE = mean, weights = weight_func) if (i == 0) { ani_data <- cumulative_data diff --git a/inst/examples/ames_cart.R b/inst/examples/ames_cart.R index 7a5bd6fd6..333477c05 100644 --- a/inst/examples/ames_cart.R +++ b/inst/examples/ames_cart.R @@ -19,19 +19,19 @@ cv_splits <- vfold_cv(ames_train, v = 10, strata = "Sale_Price") # ------------------------------------------------------------------------------ ames_rec <- - recipe(Sale_Price ~ ., data = ames_train) %>% - step_log(Sale_Price, base = 10) %>% + recipe(Sale_Price ~ ., data = ames_train) |> + step_log(Sale_Price, base = 10) |> step_YeoJohnson(Lot_Area, Gr_Liv_Area) cart_model <- decision_tree( - mode = "regression", cost_complexity = tune(), min_n = tune()) %>% + mode = "regression", cost_complexity = tune(), min_n = tune()) |> set_engine("rpart") ames_wflow <- - workflow() %>% - add_recipe(ames_rec) %>% + workflow() |> + add_recipe(ames_rec) |> add_model(cart_model) extr <- function(x) { @@ -45,7 +45,7 @@ num_leaves <- function(x) { prm <- - parameters(ames_wflow) %>% + parameters(ames_wflow) |> update(min_n = min_n(c(3, 10))) set.seed(4567367) diff --git a/inst/examples/ames_glmnet.R b/inst/examples/ames_glmnet.R index 3fd7c89ff..c5d479f57 100644 --- a/inst/examples/ames_glmnet.R +++ b/inst/examples/ames_glmnet.R @@ -18,13 +18,13 @@ cv_splits <- vfold_cv(ames_train, v = 10, strata = "Sale_Price") # ------------------------------------------------------------------------------ lm_mod <- - linear_reg(penalty = tune(), mixture = tune()) %>% + linear_reg(penalty = tune(), mixture = tune()) |> set_engine("glmnet") ames_wflow <- - workflow() %>% - add_formula(log(Sale_Price) ~ .) %>% + workflow() |> + add_formula(log(Sale_Price) ~ .) |> add_model(lm_mod) grid_df <- grid_regular(ames_wflow, levels = c(10, 3)) @@ -32,18 +32,18 @@ grid_df <- grid_regular(ames_wflow, levels = c(10, 3)) ames_glmnet <- tune_grid(ames_wflow, resamples = cv_splits, grid = grid_df, control = control_grid(verbose = TRUE)) -# summarize(ames_glmnet) %>% -# dplyr::filter(.metric == "rmse") %>% -# select(-n, -std_err, -.estimator, -.metric) %>% -# mutate(penalty = log10(penalty)) %>% -# gather(parameter, value, -mean) %>% +# summarize(ames_glmnet) |> +# dplyr::filter(.metric == "rmse") |> +# select(-n, -std_err, -.estimator, -.metric) |> +# mutate(penalty = log10(penalty)) |> +# gather(parameter, value, -mean) |> # ggplot(aes(x = value, y = mean)) + # geom_point() + # facet_wrap(~parameter, scales = "free_x") # -# summarize(ames_glmnet) %>% -# dplyr::filter(.metric == "rmse") %>% -# arrange(mean) %>% +# summarize(ames_glmnet) |> +# dplyr::filter(.metric == "rmse") |> +# arrange(mean) |> # slice(1) set.seed(9890) diff --git a/inst/examples/ames_knn.R b/inst/examples/ames_knn.R index d95ca54c5..a486b0a10 100644 --- a/inst/examples/ames_knn.R +++ b/inst/examples/ames_knn.R @@ -19,13 +19,13 @@ rs_splits <- vfold_cv(ames_train, strata = "Sale_Price") # ------------------------------------------------------------------------------ ames_rec <- - recipe(Sale_Price ~ ., data = ames_train) %>% - step_log(Sale_Price, base = 10) %>% - step_YeoJohnson(Lot_Area, Gr_Liv_Area) %>% - step_other(Neighborhood, threshold = .1) %>% - step_dummy(all_nominal()) %>% - step_zv(all_predictors()) %>% - step_ns(Longitude, deg_free = tune("lon")) %>% + recipe(Sale_Price ~ ., data = ames_train) |> + step_log(Sale_Price, base = 10) |> + step_YeoJohnson(Lot_Area, Gr_Liv_Area) |> + step_other(Neighborhood, threshold = .1) |> + step_dummy(all_nominal()) |> + step_zv(all_predictors()) |> + step_ns(Longitude, deg_free = tune("lon")) |> step_ns(Latitude, deg_free = tune("lat")) knn_model <- @@ -34,21 +34,21 @@ knn_model <- neighbors = tune("K"), weight_func = tune(), dist_power = tune() - ) %>% + ) |> set_engine("kknn") ames_wflow <- - workflow() %>% - add_recipe(ames_rec) %>% + workflow() |> + add_recipe(ames_rec) |> add_model(knn_model) ames_set <- - parameters(ames_wflow) %>% + parameters(ames_wflow) |> update(K = neighbors(c(1, 50))) set.seed(7014) ames_grid <- - ames_set %>% + ames_set |> grid_max_entropy(size = 10) ames_grid_search <- diff --git a/inst/examples/ames_knn.Rout b/inst/examples/ames_knn.Rout index a8c1e2611..890a00fe6 100644 --- a/inst/examples/ames_knn.Rout +++ b/inst/examples/ames_knn.Rout @@ -62,13 +62,13 @@ Loading required package: parallel > # ------------------------------------------------------------------------------ > > ames_rec <- -+ recipe(Sale_Price ~ ., data = ames_train) %>% -+ step_log(Sale_Price, base = 10) %>% -+ step_YeoJohnson(Lot_Area, Gr_Liv_Area) %>% -+ step_other(Neighborhood, threshold = .1) %>% -+ step_dummy(all_nominal()) %>% -+ step_zv(all_predictors()) %>% -+ step_ns(Longitude, deg_free = tune("lon")) %>% ++ recipe(Sale_Price ~ ., data = ames_train) |> ++ step_log(Sale_Price, base = 10) |> ++ step_YeoJohnson(Lot_Area, Gr_Liv_Area) |> ++ step_other(Neighborhood, threshold = .1) |> ++ step_dummy(all_nominal()) |> ++ step_zv(all_predictors()) |> ++ step_ns(Longitude, deg_free = tune("lon")) |> + step_ns(Latitude, deg_free = tune("lat")) > > knn_model <- @@ -77,21 +77,21 @@ Loading required package: parallel + neighbors = tune("K"), + weight_func = tune(), + dist_power = tune() -+ ) %>% ++ ) |> + set_engine("kknn") > > ames_wflow <- -+ workflow() %>% -+ add_recipe(ames_rec) %>% ++ workflow() |> ++ add_recipe(ames_rec) |> + add_model(knn_model) > > ames_set <- -+ parameters(ames_wflow) %>% ++ parameters(ames_wflow) |> + update(K = neighbors(c(1, 50))) > > set.seed(7014) > ames_grid <- -+ ames_set %>% ++ ames_set |> + grid_max_entropy(size = 10) > > ames_grid_search <- diff --git a/inst/examples/ames_mars.R b/inst/examples/ames_mars.R index 8ad276785..22b031121 100644 --- a/inst/examples/ames_mars.R +++ b/inst/examples/ames_mars.R @@ -6,7 +6,7 @@ library(doMC) registerDoMC(cores=10) # ------------------------------------------------------------------------------ -ames <- make_ames() %>% mutate(Sale_Price = log10(Sale_Price)) +ames <- make_ames() |> mutate(Sale_Price = log10(Sale_Price)) # Make sure that you get the same random numbers set.seed(4595) @@ -20,31 +20,31 @@ cv_splits <- vfold_cv(ames_train, v = 10, strata = "Sale_Price") # ------------------------------------------------------------------------------ ames_rec <- - recipe(Sale_Price ~ ., data = ames_train) %>% - step_YeoJohnson(Lot_Area, Gr_Liv_Area) %>% - step_other(Neighborhood, threshold = tune()) %>% - step_dummy(all_nominal()) %>% + recipe(Sale_Price ~ ., data = ames_train) |> + step_YeoJohnson(Lot_Area, Gr_Liv_Area) |> + step_other(Neighborhood, threshold = tune()) |> + step_dummy(all_nominal()) |> step_zv(all_predictors()) mars_mod <- - mars(mode = "regression", num_terms = tune(), prod_degree = tune()) %>% + mars(mode = "regression", num_terms = tune(), prod_degree = tune()) |> set_engine("earth") ames_wflow <- - workflow() %>% - add_recipe(ames_rec) %>% + workflow() |> + add_recipe(ames_rec) |> add_model(mars_mod) set.seed(4567367) ames_set <- - parameters(ames_wflow) %>% - update(threshold = threshold(c(0, .2))) %>% + parameters(ames_wflow) |> + update(threshold = threshold(c(0, .2))) |> update(num_terms = num_terms(c(1, 50))) ames_grid <- - ames_set %>% + ames_set |> grid_max_entropy(size = 10) res <- @@ -55,16 +55,16 @@ res <- control = control_grid(verbose = TRUE) ) -# collect_metrics(res) %>% -# dplyr::filter(.metric == "rmse") %>% +# collect_metrics(res) |> +# dplyr::filter(.metric == "rmse") |> # ggplot(aes(x = num_terms, y = mean, col = factor(prod_degree))) + # geom_point(cex = 1) + # geom_path() + # facet_wrap(~ threshold) # -# collect_metrics(res) %>% -# dplyr::filter(.metric == "rmse") %>% -# arrange(mean) %>% +# collect_metrics(res) |> +# dplyr::filter(.metric == "rmse") |> +# arrange(mean) |> # slice(1) # diff --git a/inst/examples/ames_mlp.R b/inst/examples/ames_mlp.R index f9b1386eb..dd5304859 100644 --- a/inst/examples/ames_mlp.R +++ b/inst/examples/ames_mlp.R @@ -18,37 +18,37 @@ cv_splits <- vfold_cv(ames_train, v = 10, strata = "Sale_Price") # ------------------------------------------------------------------------------ ames_rec <- - recipe(Sale_Price ~ ., data = ames_train) %>% - step_log(Sale_Price, base = 10) %>% - step_YeoJohnson(Lot_Area, Gr_Liv_Area) %>% - step_other(Neighborhood, threshold = tune("Nhood_other")) %>% - step_other(MS_SubClass , threshold = tune("SubClass_other")) %>% - step_dummy(all_nominal()) %>% - step_zv(all_predictors()) %>% - step_normalize(all_predictors()) %>% + recipe(Sale_Price ~ ., data = ames_train) |> + step_log(Sale_Price, base = 10) |> + step_YeoJohnson(Lot_Area, Gr_Liv_Area) |> + step_other(Neighborhood, threshold = tune("Nhood_other")) |> + step_other(MS_SubClass , threshold = tune("SubClass_other")) |> + step_dummy(all_nominal()) |> + step_zv(all_predictors()) |> + step_normalize(all_predictors()) |> step_pca(all_predictors(), num_comp = tune()) nn_mod <- mlp(mode = "regression", hidden_units = tune(), dropout = tune(), - epochs = tune(), activation = tune()) %>% + epochs = tune(), activation = tune()) |> set_engine("keras", verbose = 0, validation = .1) ames_wflow <- - workflow() %>% - add_recipe(ames_rec) %>% + workflow() |> + add_recipe(ames_rec) |> add_model(nn_mod) set.seed(4567367) ames_set <- - parameters(ames_wflow) %>% - update(Nhood_other = threshold(c(0, .09))) %>% - update(SubClass_other = threshold(c(0, .059))) %>% + parameters(ames_wflow) |> + update(Nhood_other = threshold(c(0, .09))) |> + update(SubClass_other = threshold(c(0, .059))) |> update(num_comp = num_comp(c(1, 20))) set.seed(7520) ames_grid <- - ames_set %>% + ames_set |> grid_max_entropy(size = 10) initial_grid <- tune_grid(ames_wflow, resamples = cv_splits, grid = ames_grid, control = control_grid(verbose = TRUE)) diff --git a/inst/examples/ames_svm.R b/inst/examples/ames_svm.R index 948c9e092..97c7d8fb1 100644 --- a/inst/examples/ames_svm.R +++ b/inst/examples/ames_svm.R @@ -18,22 +18,22 @@ cv_splits <- vfold_cv(ames_train, v = 10, strata = "Sale_Price") # ------------------------------------------------------------------------------ ames_rec <- - recipe(Sale_Price ~ ., data = ames_train) %>% - step_log(Sale_Price, base = 10) %>% - step_YeoJohnson(Lot_Area, Gr_Liv_Area) %>% - step_other(Neighborhood, threshold = .1) %>% - step_dummy(all_nominal()) %>% + recipe(Sale_Price ~ ., data = ames_train) |> + step_log(Sale_Price, base = 10) |> + step_YeoJohnson(Lot_Area, Gr_Liv_Area) |> + step_other(Neighborhood, threshold = .1) |> + step_dummy(all_nominal()) |> step_zv(all_predictors()) svm_model <- svm_rbf( - mode = "regression", cost = tune(), rbf_sigma = tune()) %>% + mode = "regression", cost = tune(), rbf_sigma = tune()) |> set_engine("kernlab") ames_wflow <- - workflow() %>% - add_recipe(ames_rec) %>% + workflow() |> + add_recipe(ames_rec) |> add_model(svm_model) @@ -42,7 +42,7 @@ ames_set <- parameters(ames_wflow) ames_grid <- - ames_set %>% + ames_set |> grid_max_entropy(size = 3) initial_grid <- tune_grid(ames_wflow, resamples = cv_splits, grid = ames_grid, control = control_grid(verbose = TRUE)) diff --git a/inst/examples/fine_foods_boosting.R b/inst/examples/fine_foods_boosting.R index 956fad6e0..8ad773f67 100644 --- a/inst/examples/fine_foods_boosting.R +++ b/inst/examples/fine_foods_boosting.R @@ -22,39 +22,39 @@ binary_hash <- function(x) { } pre_proc <- - recipe(score ~ product + review, data = training_data) %>% - update_role(product, new_role = "id") %>% - step_mutate(review_raw = review) %>% - step_textfeature(review_raw) %>% + recipe(score ~ product + review, data = training_data) |> + update_role(product, new_role = "id") |> + step_mutate(review_raw = review) |> + step_textfeature(review_raw) |> step_rename_at( starts_with("textfeature_"), fn = ~ gsub("textfeature_review_raw_", "", .) - ) %>% - step_tokenize(review) %>% - step_stopwords(review) %>% - step_stem(review) %>% - step_texthash(review, signed = TRUE) %>% - step_rename_at(starts_with("review_hash"), fn = ~ gsub("review_", "", .)) %>% - step_mutate_at(starts_with("hash"), fn = binary_hash) %>% - step_YeoJohnson(all_of(!!basics)) %>% + ) |> + step_tokenize(review) |> + step_stopwords(review) |> + step_stem(review) |> + step_texthash(review, signed = TRUE) |> + step_rename_at(starts_with("review_hash"), fn = ~ gsub("review_", "", .)) |> + step_mutate_at(starts_with("hash"), fn = binary_hash) |> + step_YeoJohnson(all_of(!!basics)) |> step_zv(all_predictors()) boost_mod <- boost_tree(mode = "classification", mtry = tune(), trees = tune(), min_n = tune(), learn_rate = tune(), tree_depth = tune(), - loss_reduction = tune(), sample_size = tune()) %>% + loss_reduction = tune(), sample_size = tune()) |> set_engine("xgboost") text_wflow <- - workflow() %>% - add_recipe(pre_proc) %>% + workflow() |> + add_recipe(pre_proc) |> add_model(boost_mod) text_set <- - text_wflow %>% - parameters() %>% - update(mtry = mtry_long(c(0, 3))) %>% + text_wflow |> + parameters() |> + update(mtry = mtry_long(c(0, 3))) |> update(sample_size = sample_prop(0:1)) diff --git a/inst/examples/fine_foods_glmnet.R b/inst/examples/fine_foods_glmnet.R index d721e5fa9..d398faf94 100644 --- a/inst/examples/fine_foods_glmnet.R +++ b/inst/examples/fine_foods_glmnet.R @@ -19,32 +19,32 @@ binary_hash <- function(x) { } pre_proc <- - recipe(score ~ product + review, data = training_data) %>% - update_role(product, new_role = "id") %>% - step_mutate(review_raw = review) %>% - step_textfeature(review_raw) %>% + recipe(score ~ product + review, data = training_data) |> + update_role(product, new_role = "id") |> + step_mutate(review_raw = review) |> + step_textfeature(review_raw) |> step_rename_at( starts_with("textfeature_"), fn = ~ gsub("textfeature_review_raw_", "", .) - ) %>% - step_tokenize(review) %>% - step_stopwords(review) %>% - step_stem(review) %>% - step_texthash(review, signed = TRUE, num_terms = tune()) %>% - step_rename_at(starts_with("review_hash"), fn = ~ gsub("review_", "", .)) %>% - step_mutate_at(starts_with("hash"), fn = binary_hash) %>% - step_YeoJohnson(all_of(basics)) %>% - step_zv(all_predictors()) %>% + ) |> + step_tokenize(review) |> + step_stopwords(review) |> + step_stem(review) |> + step_texthash(review, signed = TRUE, num_terms = tune()) |> + step_rename_at(starts_with("review_hash"), fn = ~ gsub("review_", "", .)) |> + step_mutate_at(starts_with("hash"), fn = binary_hash) |> + step_YeoJohnson(all_of(basics)) |> + step_zv(all_predictors()) |> step_normalize(all_predictors()) lr_mod <- - logistic_reg(penalty = tune(), mixture = tune()) %>% + logistic_reg(penalty = tune(), mixture = tune()) |> set_engine("glmnet") text_wflow <- - workflow() %>% - add_recipe(pre_proc) %>% + workflow() |> + add_recipe(pre_proc) |> add_model(lr_mod) set.seed(8935) @@ -69,26 +69,26 @@ text_glmnet <- tune_grid(text_wflow, resamples = folds, grid = text_grid, metric print(warnings()) -# text_glmnet %>% -# select(id, .extracts) %>% -# unnest() %>% -# select(-penalty) %>% -# unnest() %>% -# filter(num_terms == 1024 & mixture > 0) %>% -# mutate(group = paste(id, mixture)) %>% +# text_glmnet |> +# select(id, .extracts) |> +# unnest() |> +# select(-penalty) |> +# unnest() |> +# filter(num_terms == 1024 & mixture > 0) |> +# mutate(group = paste(id, mixture)) |> # ggplot(aes(x = penalty, y = num_vars)) + # geom_path(aes(group = group, col = factor(mixture)), alpha = .1) + # scale_x_log10() -summarize(text_glmnet) %>% - filter(.metric == "accuracy") %>% +summarize(text_glmnet) |> + filter(.metric == "accuracy") |> ggplot(aes(x = log10(penalty), y = mixture, fill = mean)) + facet_wrap(~ num_terms) + geom_tile() + theme_bw() -summarize(text_glmnet) %>% - filter(.metric == "accuracy") %>% +summarize(text_glmnet) |> + filter(.metric == "accuracy") |> ggplot(aes(x = penalty, y = mean, col = mixture, group = mixture)) + facet_wrap(~ num_terms) + geom_point() + geom_line() + @@ -97,8 +97,8 @@ summarize(text_glmnet) %>% # ------------------------------------------------------------------------------ test_set <- - text_wflow %>% - parameters() %>% + text_wflow |> + parameters() |> update(num_terms = num_hash()) trade_decay <- function(iter) { diff --git a/inst/examples/fine_foods_import.R b/inst/examples/fine_foods_import.R index 88eaced25..7acbb7762 100644 --- a/inst/examples/fine_foods_import.R +++ b/inst/examples/fine_foods_import.R @@ -16,16 +16,16 @@ raw <- delim = "\n", col_names = "text", col_types = cols(text = col_character()) - ) %>% + ) |> mutate( text = str_remove(text, "product/"), text = str_remove(text, "review/"), prod_num = ifelse(str_detect(text, "productId"), 1, 0), prod_num = cumsum(prod_num) - ) %>% + ) |> dplyr::filter( str_detect(text, "(productId:)|(text:)|(score:)") - ) %>% + ) |> mutate( field = case_when( str_detect(text, "productId:") ~ "product", @@ -34,9 +34,9 @@ raw <- TRUE ~ "unknown" ), text = str_replace(text, "(productId: )|(text: )|(score: )", "") - ) %>% - spread(field, text) %>% - dplyr::select(-prod_num) %>% + ) |> + spread(field, text) |> + dplyr::select(-prod_num) |> mutate( score = factor(ifelse(score == "5.0", "great", "other")) ) @@ -46,26 +46,26 @@ raw <- # rows per product. prod_dist <- - raw %>% - group_by(product) %>% - count() %>% - ungroup() %>% + raw |> + group_by(product) |> + count() |> + ungroup() |> arrange(desc(n)) sampled <- - raw %>% - group_by(product) %>% - sample_n(1) %>% - ungroup() %>% + raw |> + group_by(product) |> + sample_n(1) |> + ungroup() |> sample_n(5000) training_data <- - sampled %>% + sampled |> sample_n(4000) testing_data <- - sampled %>% + sampled |> anti_join(training_data, by = c("product", "review", "score")) save(training_data, testing_data, file = "data/small_fine_foods.RData", version = 2, compress = "xz") diff --git a/inst/examples/fine_foods_import.Rout b/inst/examples/fine_foods_import.Rout index c30da55a9..6bf2a7253 100644 --- a/inst/examples/fine_foods_import.Rout +++ b/inst/examples/fine_foods_import.Rout @@ -46,16 +46,16 @@ Registered S3 method overwritten by 'rvest': + delim = "\n", + col_names = "text", + col_types = cols(text = col_character()) -+ ) %>% ++ ) |> + mutate( + text = str_remove(text, "product/"), + text = str_remove(text, "review/"), + prod_num = ifelse(str_detect(text, "productId"), 1, 0), + prod_num = cumsum(prod_num) -+ ) %>% ++ ) |> + dplyr::filter( + str_detect(text, "(productId:)|(text:)|(score:)") -+ ) %>% ++ ) |> + mutate( + field = case_when( + str_detect(text, "productId:") ~ "product", @@ -64,9 +64,9 @@ Registered S3 method overwritten by 'rvest': + TRUE ~ "unknown" + ), + text = str_replace(text, "(productId: )|(text: )|(score: )", "") -+ ) %>% -+ spread(field, text) %>% -+ dplyr::select(-prod_num) %>% ++ ) |> ++ spread(field, text) |> ++ dplyr::select(-prod_num) |> + mutate( + score = factor(ifelse(score == "5.0", "great", "other")) + ) @@ -76,31 +76,31 @@ Registered S3 method overwritten by 'rvest': > # rows per product. > > prod_dist <- -+ raw %>% -+ group_by(product) %>% -+ count() %>% -+ ungroup() %>% ++ raw |> ++ group_by(product) |> ++ count() |> ++ ungroup() |> + arrange(desc(n)) > > # Take the 25 well characterized products for the training set (or more for larger > # training set) > set.seed(9565) > train_prods <- -+ prod_dist %>% -+ dplyr::filter(n > 100) %>% -+ sample_n(25) %>% ++ prod_dist |> ++ dplyr::filter(n > 100) |> ++ sample_n(25) |> + dplyr::select(product) > > training_data <- -+ train_prods %>% ++ train_prods |> + inner_join(raw, by = "product") > > testing_data <- -+ raw %>% -+ anti_join(train_prods %>% dplyr::select(product), by = "product") %>% -+ group_by(product) %>% -+ sample_n(1) %>% -+ ungroup() %>% ++ raw |> ++ anti_join(train_prods |> dplyr::select(product), by = "product") |> ++ group_by(product) |> ++ sample_n(1) |> ++ ungroup() |> + sample_n(1000) > > save(training_data, testing_data, file = "data/small_fine_foods.RData", version = 2, compress = "xz") diff --git a/inst/examples/iono_svm.R b/inst/examples/iono_svm.R index 63a5f02d2..8825350e8 100644 --- a/inst/examples/iono_svm.R +++ b/inst/examples/iono_svm.R @@ -10,13 +10,13 @@ set.seed(151) iono_rs <- bootstraps(Ionosphere, times = 30) svm_mod <- - svm_rbf(cost = tune(), rbf_sigma = tune()) %>% - set_mode("classification") %>% + svm_rbf(cost = tune(), rbf_sigma = tune()) |> + set_mode("classification") |> set_engine("kernlab") iono_rec <- - recipe(Class ~ ., data = Ionosphere) %>% - step_zv(all_predictors())%>% + recipe(Class ~ ., data = Ionosphere) |> + step_zv(all_predictors())|> step_dummy(all_predictors(), -all_numeric()) roc_vals <- metric_set(roc_auc) diff --git a/inst/examples/mtcars_splines.R b/inst/examples/mtcars_splines.R index 09f9b04df..e5d1c89f2 100644 --- a/inst/examples/mtcars_splines.R +++ b/inst/examples/mtcars_splines.R @@ -9,36 +9,36 @@ data_folds <- vfold_cv(mtcars, repeats = 2) # ------------------------------------------------------------------------------ base_rec <- - recipe(mpg ~ ., data = mtcars) %>% + recipe(mpg ~ ., data = mtcars) |> step_normalize(all_predictors()) disp_rec <- - base_rec %>% - step_bs(disp, degree = tune(), deg_free = tune()) %>% + base_rec |> + step_bs(disp, degree = tune(), deg_free = tune()) |> step_bs(wt, degree = tune("wt degree"), deg_free = tune("wt df")) lm_model <- - linear_reg(mode = "regression") %>% + linear_reg(mode = "regression") |> set_engine("lm") cars_wflow <- - workflow() %>% - add_recipe(disp_rec) %>% + workflow() |> + add_recipe(disp_rec) |> add_model(lm_model) cars_set <- - cars_wflow %>% - parameters %>% - update(degree = degree_int(1:2)) %>% - update(deg_free = deg_free(c(2, 10))) %>% - update(`wt degree` = degree_int(1:2)) %>% + cars_wflow |> + parameters |> + update(degree = degree_int(1:2)) |> + update(deg_free = deg_free(c(2, 10))) |> + update(`wt degree` = degree_int(1:2)) |> update(`wt df` = deg_free(c(2, 10))) set.seed(255) cars_grid <- - cars_set %>% + cars_set |> grid_regular(levels = c(3, 2, 3, 2)) diff --git a/inst/examples/mutagen_svm_pca.R b/inst/examples/mutagen_svm_pca.R index 5f556afb5..c5896f74b 100644 --- a/inst/examples/mutagen_svm_pca.R +++ b/inst/examples/mutagen_svm_pca.R @@ -14,30 +14,30 @@ data_folds <- validation_split(Mutagen_Dragon) # ------------------------------------------------------------------------------ Mutagen_rec <- - recipe(Class ~ ., data = Mutagen_Dragon) %>% - step_nzv(all_predictors()) %>% - step_YeoJohnson(all_predictors()) %>% - step_normalize(all_predictors()) %>% + recipe(Class ~ ., data = Mutagen_Dragon) |> + step_nzv(all_predictors()) |> + step_YeoJohnson(all_predictors()) |> + step_normalize(all_predictors()) |> step_pca(all_predictors(), num_comp = tune()) svm_model <- # svm_poly(mode = "classification", cost = tune(), degree = tune(), - # scale_factor = tune()) %>% - svm_rbf(mode = "classification", cost = tune(), rbf_sigma = tune()) %>% + # scale_factor = tune()) |> + svm_rbf(mode = "classification", cost = tune(), rbf_sigma = tune()) |> set_engine("kernlab") Mutagen_wflow <- - workflow() %>% - add_recipe(Mutagen_rec) %>% + workflow() |> + add_recipe(Mutagen_rec) |> add_model(svm_model) Mutagen_param <- - parameters(Mutagen_wflow) %>% + parameters(Mutagen_wflow) |> update(num_comp = num_comp(c(0, 20))) set.seed(552) Mutagen_grid <- - Mutagen_param %>% + Mutagen_param |> grid_max_entropy(size = 5) class_only <- metric_set(accuracy, kap, mcc) @@ -46,7 +46,7 @@ res <- tune_grid(Mutagen_wflow, resamples = data_folds, grid = Mutagen_grid, met control = control_grid(verbose = TRUE)) -summarize(res) %>% filter(.metric == "accuracy") %>% arrange(desc(mean)) +summarize(res) |> filter(.metric == "accuracy") |> arrange(desc(mean)) set.seed(3654) svm_search <- diff --git a/inst/examples/pima_cart.R b/inst/examples/pima_cart.R index 4966b64b7..5bfb47bdf 100644 --- a/inst/examples/pima_cart.R +++ b/inst/examples/pima_cart.R @@ -10,13 +10,13 @@ set.seed(151) pima_rs <- vfold_cv(PimaIndiansDiabetes, repeats = 3) tree_mod <- - decision_tree(cost_complexity = tune(), min_n = tune()) %>% - set_mode("classification") %>% + decision_tree(cost_complexity = tune(), min_n = tune()) |> + set_mode("classification") |> set_engine("rpart") pima_wflow <- - workflow() %>% - add_formula(diabetes ~ .) %>% + workflow() |> + add_formula(diabetes ~ .) |> add_model(tree_mod) roc_vals <- metric_set(roc_auc) @@ -33,8 +33,8 @@ ggplot(rs_estimates, aes(x = cost_complexity, y = min_n, col = mean, size = mean scale_x_log10() best_vals <- - rs_estimates %>% - arrange(desc(mean)) %>% + rs_estimates |> + arrange(desc(mean)) |> slice(1:2) diff --git a/inst/examples/plot_testing.R b/inst/examples/plot_testing.R index d0f9b9fe1..b4777d99d 100644 --- a/inst/examples/plot_testing.R +++ b/inst/examples/plot_testing.R @@ -7,37 +7,37 @@ theme_set(theme_bw()) simple_rec <- recipe(Class ~ A + B, data = two_class_dat) spline_rec <- - simple_rec %>% + simple_rec |> step_ns(A, deg_free = tune("degrees of freedom")) knn_K <- - nearest_neighbor(neighbors = tune()) %>% - set_engine("kknn") %>% + nearest_neighbor(neighbors = tune()) |> + set_engine("kknn") |> set_mode("classification") knn_weights <- - nearest_neighbor(weight_func = tune()) %>% - set_engine("kknn") %>% + nearest_neighbor(weight_func = tune()) |> + set_engine("kknn") |> set_mode("classification") knn_two_vars <- - nearest_neighbor(neighbors = tune(), weight_func = tune()) %>% - set_engine("kknn") %>% + nearest_neighbor(neighbors = tune(), weight_func = tune()) |> + set_engine("kknn") |> set_mode("classification") knn_three_vars <- - nearest_neighbor(neighbors = tune(), weight_func = tune(), dist_power = tune()) %>% - set_engine("kknn") %>% + nearest_neighbor(neighbors = tune(), weight_func = tune(), dist_power = tune()) |> + set_engine("kknn") |> set_mode("classification") knn_no_vars <- - nearest_neighbor(neighbors = 3) %>% - set_engine("kknn") %>% + nearest_neighbor(neighbors = 3) |> + set_engine("kknn") |> set_mode("classification") svm_mod <- - svm_rbf(cost = tune(), rbf_sigma = tune()) %>% - set_engine("kernlab") %>% + svm_rbf(cost = tune(), rbf_sigma = tune()) |> + set_engine("kernlab") |> set_mode("classification") one_perf <- metric_set(roc_auc) @@ -48,7 +48,7 @@ two_perf <- metric_set(roc_auc, mcc) grid_plot <- function(rec, mod, sfd = TRUE, ...) { set.seed(7898) data_folds <- vfold_cv(two_class_dat, v = 3) - wflow <- workflow() %>% add_model(mod) %>% add_recipe(rec) + wflow <- workflow() |> add_model(mod) |> add_recipe(rec) pset <- parameters(wflow) is_quant <- purrr::map_lgl(pull(pset, object), inherits, "quant_param") diff --git a/inst/examples/seg_mlp.R b/inst/examples/seg_mlp.R index 5d261d4b5..444431126 100644 --- a/inst/examples/seg_mlp.R +++ b/inst/examples/seg_mlp.R @@ -8,7 +8,7 @@ theme_set(theme_bw()) # ------------------------------------------------------------------------------ segmentationData <- - segmentationData %>% + segmentationData |> dplyr::select(-Case, -Cell, -contains("Centroid")) set.seed(8567) @@ -23,28 +23,28 @@ val_split <- mc_cv(seg_train, times = 1) # ------------------------------------------------------------------------------ seg_pre_proc <- - recipe(Class ~ ., data = seg_train) %>% - step_YeoJohnson(all_predictors()) %>% - step_normalize(all_predictors()) %>% - step_pca(all_predictors(), num_comp = tune()) %>% + recipe(Class ~ ., data = seg_train) |> + step_YeoJohnson(all_predictors()) |> + step_normalize(all_predictors()) |> + step_pca(all_predictors(), num_comp = tune()) |> step_downsample(Class) nn_mod <- mlp(mode = "classification", hidden_units = tune(), dropout = tune(), - epochs = tune(), activation = tune()) %>% + epochs = tune(), activation = tune()) |> set_engine("keras", verbose = 0, validation = .1) nn_wflow <- - workflow() %>% - add_model(nn_mod) %>% + workflow() |> + add_model(nn_mod) |> add_recipe(seg_pre_proc) # ------------------------------------------------------------------------------ nn_set <- - nn_wflow %>% - parameters() %>% + nn_wflow |> + parameters() |> # In case you want to manually adjust the parameter specification update(num_comp = num_comp(c(1, 20))) @@ -96,20 +96,20 @@ autoplot(nn_search_2, type = "performance", metric = "roc_auc") # ------------------------------------------------------------------------------ -nn_search_2 %>% - summarize() %>% - dplyr::filter(.iter > 0 & .metric == "roc_auc") %>% - select(-.iter, -.metric, -.estimator, -n, -std_err) %>% - gather(parameter, value, -activation, -mean) %>% +nn_search_2 |> + summarize() |> + dplyr::filter(.iter > 0 & .metric == "roc_auc") |> + select(-.iter, -.metric, -.estimator, -n, -std_err) |> + gather(parameter, value, -activation, -mean) |> ggplot(aes(x = value, y = mean, col = activation)) + geom_point(alpha = .3) + facet_wrap(~ parameter, scales = "free_x") -nn_search_2 %>% - summarize() %>% - dplyr::filter(.iter > 0 & .metric == "roc_auc") %>% - select(-.metric, -.estimator, -n, -std_err) %>% - gather(parameter, value, -activation, -mean, -.iter) %>% +nn_search_2 |> + summarize() |> + dplyr::filter(.iter > 0 & .metric == "roc_auc") |> + select(-.metric, -.estimator, -n, -std_err) |> + gather(parameter, value, -activation, -mean, -.iter) |> ggplot(aes(x = .iter, y = value, col = activation, size = mean)) + geom_point(alpha = .3) + facet_wrap(~ parameter, scales = "free_y") diff --git a/inst/examples/seg_svm.R b/inst/examples/seg_svm.R index 8cf1fbdc7..a028f0e91 100644 --- a/inst/examples/seg_svm.R +++ b/inst/examples/seg_svm.R @@ -8,7 +8,7 @@ theme_set(theme_bw()) # ------------------------------------------------------------------------------ segmentationData <- - segmentationData %>% + segmentationData |> dplyr::select(-Case, -Cell, -contains("Centroid")) set.seed(8567) @@ -23,26 +23,26 @@ folds <- vfold_cv(seg_train) # ------------------------------------------------------------------------------ seg_pre_proc <- - recipe(Class ~ ., data = seg_train) %>% - step_YeoJohnson(all_predictors()) %>% - step_normalize(all_predictors()) %>% - step_pca(all_predictors(), num_comp = tune()) %>% + recipe(Class ~ ., data = seg_train) |> + step_YeoJohnson(all_predictors()) |> + step_normalize(all_predictors()) |> + step_pca(all_predictors(), num_comp = tune()) |> step_downsample(Class) svm_mod <- - svm_rbf(mode = "classification", cost = tune(), rbf_sigma = tune()) %>% + svm_rbf(mode = "classification", cost = tune(), rbf_sigma = tune()) |> set_engine("kernlab") svm_wflow <- - workflow() %>% - add_model(svm_mod) %>% + workflow() |> + add_model(svm_mod) |> add_recipe(seg_pre_proc) # ------------------------------------------------------------------------------ svm_set <- - svm_wflow %>% - parameters() %>% + svm_wflow |> + parameters() |> # In case you want to manually adjust the parameter specification update(num_comp = num_comp(c(1, 20))) @@ -89,7 +89,7 @@ autoplot(svm_search_2, type = "performance", metric = "kap") # ------------------------------------------------------------------------------ -svm_search_2 %>% dplyr::filter(.iter > 0) %>% +svm_search_2 |> dplyr::filter(.iter > 0) |> ggplot(aes(x = cost, y = rbf_sigma)) + geom_path(aes(x = cost, y = rbf_sigma), col = "black") + geom_point(aes(col = num_comp, size = mean)) + @@ -97,21 +97,21 @@ svm_search_2 %>% dplyr::filter(.iter > 0) %>% scale_x_log10() + scale_y_log10() -svm_search_2 %>% - dplyr::filter(.metric == "kap") %>% - dplyr::select(-.estimator, -.metric, -n, -std_err) %>% - mutate(cost = log10(cost), rbf_sigma = log10(rbf_sigma)) %>% - gather(variable, value, -mean, -.iter) %>% +svm_search_2 |> + dplyr::filter(.metric == "kap") |> + dplyr::select(-.estimator, -.metric, -n, -std_err) |> + mutate(cost = log10(cost), rbf_sigma = log10(rbf_sigma)) |> + gather(variable, value, -mean, -.iter) |> ggplot(aes(x = .iter, y = value)) + geom_point() + facet_wrap(~ variable, scales = "free_y") -svm_search_2 %>% - dplyr::filter(.metric == "kap") %>% - dplyr::select(-.estimator, -.metric, -n, -std_err) %>% - mutate(cost = log10(cost), rbf_sigma = log10(rbf_sigma)) %>% - gather(variable, value, -mean, -.iter) %>% +svm_search_2 |> + dplyr::filter(.metric == "kap") |> + dplyr::select(-.estimator, -.metric, -n, -std_err) |> + mutate(cost = log10(cost), rbf_sigma = log10(rbf_sigma)) |> + gather(variable, value, -mean, -.iter) |> ggplot(aes(x = value, y = mean)) + geom_point() + facet_wrap(~ variable, scales = "free_x") diff --git a/inst/examples/seg_svm_rbf_only.R b/inst/examples/seg_svm_rbf_only.R index fbc4f6a31..76d4a99d6 100644 --- a/inst/examples/seg_svm_rbf_only.R +++ b/inst/examples/seg_svm_rbf_only.R @@ -11,7 +11,7 @@ registerDoMC(cores = 20) # ------------------------------------------------------------------------------ segmentationData <- - segmentationData %>% + segmentationData |> dplyr::select(-Case, -Cell, -contains("Centroid")) set.seed(8567) @@ -25,26 +25,26 @@ folds <- vfold_cv(seg_train, repeats = 10) # ------------------------------------------------------------------------------ seg_pre_proc <- - recipe(Class ~ ., data = seg_train) %>% - step_YeoJohnson(all_predictors()) %>% - step_normalize(all_predictors()) %>% - step_pca(all_predictors(), num_comp = tune()) %>% + recipe(Class ~ ., data = seg_train) |> + step_YeoJohnson(all_predictors()) |> + step_normalize(all_predictors()) |> + step_pca(all_predictors(), num_comp = tune()) |> step_downsample(Class) svm_mod <- - svm_rbf(mode = "classification", cost = tune(), rbf_sigma = tune()) %>% + svm_rbf(mode = "classification", cost = tune(), rbf_sigma = tune()) |> set_engine("kernlab") svm_wflow <- - workflow() %>% - add_model(svm_mod) %>% + workflow() |> + add_model(svm_mod) |> add_recipe(seg_pre_proc) # ------------------------------------------------------------------------------ svm_set <- - svm_wflow %>% - parameters() %>% + svm_wflow |> + parameters() |> update(num_comp = num_comp(c(1, 20))) grid <- tibble(cost = 10^(-2.75), num_comp = 15, @@ -55,7 +55,7 @@ grid_results <- tune_grid(svm_wflow, resamples = folds, grid = grid, summarize(grid_results) ggplot( - summarize(grid_results) %>% filter(.metric == "accuracy"), + summarize(grid_results) |> filter(.metric == "accuracy"), aes(x = rbf_sigma, y = mean)) + geom_path() + scale_x_log10() @@ -63,14 +63,14 @@ ggplot( # ------------------------------------------------------------------------------ sigma_set <- - svm_set %>% - slice(2) %>% + svm_set |> + slice(2) |> update(rbf_sigma = rbf_sigma(c(-8, 0))) sigma_grid <- tibble(rbf_sigma = 10^seq(-8, 0, length = 100)) acc_vals_0 <- - summarize(grid_results) %>% + summarize(grid_results) |> slice(c(80, 125, 150)) # ------------------------------------------------------------------------------ @@ -78,30 +78,30 @@ acc_vals_0 <- gp_data_0 <- - tune:::encode_set(acc_vals_0 %>% select(rbf_sigma), sigma_set) %>% - set_names("scaled_sigma") %>% + tune:::encode_set(acc_vals_0 |> select(rbf_sigma), sigma_set) |> + set_names("scaled_sigma") |> mutate( mean = acc_vals_0$mean, rbf_sigma = acc_vals_0$rbf_sigma) gp_grid <- - tune:::encode_set(sigma_grid, sigma_set) %>% - set_names("scaled_sigma") %>% + tune:::encode_set(sigma_grid, sigma_set) |> + set_names("scaled_sigma") |> mutate(rbf_sigma = sigma_grid$rbf_sigma) library(GPfit) gp_0 <- GP_fit(X = as.matrix(gp_data_0[,1, drop = FALSE]), Y = gp_data_0$mean) gp_fit_0 <- - predict(gp_0, as.matrix(gp_grid[,1, drop = FALSE]))$complete_data %>% - as_tibble() %>% - setNames(c("scaled_sigma", "mean", "var")) %>% + predict(gp_0, as.matrix(gp_grid[,1, drop = FALSE]))$complete_data |> + as_tibble() |> + setNames(c("scaled_sigma", "mean", "var")) |> mutate(sd = sqrt(var), lower = mean - 1 * sd, upper = mean + 1 * sd, snr = (mean - max(gp_data_0$mean))/sd, prob_imp = pnorm(snr) - ) %>% - bind_cols(gp_grid %>% select(rbf_sigma)) + ) |> + bind_cols(gp_grid |> select(rbf_sigma)) ggplot(gp_fit_0, aes(x = rbf_sigma)) + geom_path(aes(y = mean)) + diff --git a/inst/examples/stack_overflow.R b/inst/examples/stack_overflow.R index 4fd77a663..f8a6882dc 100644 --- a/inst/examples/stack_overflow.R +++ b/inst/examples/stack_overflow.R @@ -6,29 +6,29 @@ library(readr) # ------------------------------------------------------------------------------ so_train <- - read_rds(url("https://github.com/juliasilge/supervised-ML-case-studies-course/blob/master/data/c2_training_full.rds?raw=true")) %>% - mutate(Country = as.factor(Country)) %>% - mutate_if(is.logical, as.numeric) %>% + read_rds(url("https://github.com/juliasilge/supervised-ML-case-studies-course/blob/master/data/c2_training_full.rds?raw=true")) |> + mutate(Country = as.factor(Country)) |> + mutate_if(is.logical, as.numeric) |> # ranger doesn't like spaces or "/" and will error with "Illegal column names in # formula interface. Fix column names or use alternative interface in ranger. rename_at(vars(dplyr::contains(" ")), ~ gsub("([[:blank:]])|([[:punct:]])", "_", .)) lr_rec <- - recipe(Remote ~ ., data = so_train) %>% - step_dummy(Country) %>% - step_downsample(Remote) %>% + recipe(Remote ~ ., data = so_train) |> + step_dummy(Country) |> + step_downsample(Remote) |> step_zv(all_predictors()) lr_mod <- - logistic_reg(penalty = tune(), mixture = tune()) %>% + logistic_reg(penalty = tune(), mixture = tune()) |> set_engine("glmnet") rf_rec <- - recipe(Remote ~ ., data = so_train) %>% + recipe(Remote ~ ., data = so_train) |> step_downsample(Remote) rf_mod <- - rand_forest(mode = "classification", mtry = tune(), min_n = tune(), trees = 1000) %>% + rand_forest(mode = "classification", mtry = tune(), min_n = tune(), trees = 1000) |> set_engine("ranger") set.seed(4290) @@ -50,8 +50,8 @@ glmn_search <- tune_grid(lr_rec, lr_mod, resamples = so_boots, metrics = metric_set(accuracy, roc_auc), control = control_grid(verbose = TRUE)) -summarize(glmn_search) %>% - filter(.metric == "accuracy") %>% +summarize(glmn_search) |> + filter(.metric == "accuracy") |> ggplot(aes(x = penalty, y = mean, col = factor(mixture))) + geom_point() + geom_line() + @@ -62,8 +62,8 @@ summarize(glmn_search) %>% set.seed(4538) rf_grid <- - parameters(rf_mod) %>% - update(mtry = mtry(c(1, 20))) %>% + parameters(rf_mod) |> + update(mtry = mtry(c(1, 20))) |> grid_latin_hypercube(size = 20) set.seed(1809) @@ -71,17 +71,17 @@ rf_search <- tune_grid(rf_rec, rf_mod, resamples = so_boots, grid = rf_grid, metrics = metric_set(accuracy, roc_auc)) -summarize(rf_search) %>% - filter(.metric == "accuracy") %>% - select(mtry, min_n, mean) %>% - pivot_longer(-mean, names_to = "parameter", values_to = "value") %>% +summarize(rf_search) |> + filter(.metric == "accuracy") |> + select(mtry, min_n, mean) |> + pivot_longer(-mean, names_to = "parameter", values_to = "value") |> ggplot(aes(x = value, y = mean)) + geom_point() + geom_smooth(se = FALSE) + facet_wrap( ~ parameter, scales = "free_x") -summarize(rf_search) %>% - filter(.metric == "accuracy") %>% +summarize(rf_search) |> + filter(.metric == "accuracy") |> ggplot(aes(x = mtry, y = min_n, size = mean)) + geom_point() diff --git a/inst/examples/teaching_example.R b/inst/examples/teaching_example.R index 3cfbcde85..63608c64c 100644 --- a/inst/examples/teaching_example.R +++ b/inst/examples/teaching_example.R @@ -22,7 +22,7 @@ registerDoMC(cores = 20) # ------------------------------------------------------------------------------ segmentationData <- - segmentationData %>% + segmentationData |> dplyr::select(-Case, -Cell, -contains("Centroid")) set.seed(8567) @@ -36,31 +36,31 @@ folds <- vfold_cv(seg_train, repeats = 3) # ------------------------------------------------------------------------------ seg_pre_proc <- - recipe(Class ~ ., data = seg_train) %>% - step_YeoJohnson(all_predictors()) %>% - step_normalize(all_predictors()) %>% - step_pca(all_predictors(), num_comp = tune()) %>% + recipe(Class ~ ., data = seg_train) |> + step_YeoJohnson(all_predictors()) |> + step_normalize(all_predictors()) |> + step_pca(all_predictors(), num_comp = tune()) |> step_downsample(Class) svm_mod <- - svm_rbf(mode = "classification", cost = tune(), rbf_sigma = tune()) %>% + svm_rbf(mode = "classification", cost = tune(), rbf_sigma = tune()) |> set_engine("kernlab") svm_wflow <- - workflow() %>% - add_model(svm_mod) %>% + workflow() |> + add_model(svm_mod) |> add_recipe(seg_pre_proc) # ------------------------------------------------------------------------------ svm_set <- - svm_wflow %>% - parameters() %>% + svm_wflow |> + parameters() |> update(num_comp = num_comp(c(1, 20)), rbf_sigma = rbf_sigma(c(-3, 0))) set.seed(354) grid <- - grid_max_entropy(svm_set, size = 150) %>% + grid_max_entropy(svm_set, size = 150) |> mutate(cost = 10^(-2.75), num_comp = 15) grid_results <- tune_grid(svm_wflow, resamples = folds, grid = grid, @@ -68,7 +68,7 @@ grid_results <- tune_grid(svm_wflow, resamples = folds, grid = grid, collect_metrics(grid_results) ggplot( - collect_metrics(grid_results) %>% filter(.metric == "accuracy"), + collect_metrics(grid_results) |> filter(.metric == "accuracy"), aes(x = rbf_sigma, y = mean)) + geom_path() + scale_x_log10() @@ -76,7 +76,7 @@ ggplot( # ------------------------------------------------------------------------------ sigma_set <- - svm_set %>% + svm_set |> slice(2) acc_results <- @@ -110,8 +110,8 @@ initial_split <- rsample::initial_split(ames, strata = "Sale_Price") initial_split #> <2199/731/2930> -ames_train <- initial_split %>% training() -ames_test <- initial_split %>% testing() +ames_train <- initial_split |> training() +ames_test <- initial_split |> testing() set.seed(2453) @@ -119,11 +119,11 @@ cv_splits <- vfold_cv(ames_train, strata = "Sale_Price") set.seed(24533) -ames_rec <- recipe(Sale_Price ~ ., data = ames_train) %>% - step_log(Sale_Price, base = 10) %>% - step_YeoJohnson(Lot_Area, Gr_Liv_Area) %>% - step_other(Neighborhood, threshold = tune()) %>% - step_dummy(all_nominal()) %>% +ames_rec <- recipe(Sale_Price ~ ., data = ames_train) |> + step_log(Sale_Price, base = 10) |> + step_YeoJohnson(Lot_Area, Gr_Liv_Area) |> + step_other(Neighborhood, threshold = tune()) |> + step_dummy(all_nominal()) |> step_zv(all_predictors()) mars_mod <- @@ -131,22 +131,22 @@ mars_mod <- mode = "regression", prod_degree = tune(), prune_method = tune() - ) %>% + ) |> set_engine("earth") -ames_wflow <- workflow() %>% - add_recipe(ames_rec) %>% +ames_wflow <- workflow() |> + add_recipe(ames_rec) |> add_model(mars_mod) set.seed(123456) -mars_set <- ames_wflow %>% - parameters() %>% - update(threshold = threshold(c(0, .2))) %>% +mars_set <- ames_wflow |> + parameters() |> + update(threshold = threshold(c(0, .2))) |> update(prune_method = prune_method(values = c("backward", "none", "forward", "cv"))) set.seed(987654) -mars_grid <- mars_set %>% grid_max_entropy(size = 5) +mars_grid <- mars_set |> grid_max_entropy(size = 5) set.seed(456321) initial_mars <- @@ -440,8 +440,8 @@ initial_split <- rsample::initial_split(ames, strata = "Sale_Price") initial_split #> <2199/731/2930> -ames_train <- initial_split %>% training() -ames_test <- initial_split %>% testing() +ames_train <- initial_split |> training() +ames_test <- initial_split |> testing() set.seed(2453) @@ -449,11 +449,11 @@ cv_splits <- vfold_cv(ames_train, strata = "Sale_Price") set.seed(24533) -ames_rec <- recipe(Sale_Price ~ ., data = ames_train) %>% - step_log(Sale_Price, base = 10) %>% - step_YeoJohnson(Lot_Area, Gr_Liv_Area) %>% - step_other(Neighborhood, threshold = tune()) %>% - step_dummy(all_nominal()) %>% +ames_rec <- recipe(Sale_Price ~ ., data = ames_train) |> + step_log(Sale_Price, base = 10) |> + step_YeoJohnson(Lot_Area, Gr_Liv_Area) |> + step_other(Neighborhood, threshold = tune()) |> + step_dummy(all_nominal()) |> step_zv(all_predictors()) mars_mod <- @@ -461,22 +461,22 @@ mars_mod <- mode = "regression", prod_degree = tune(), prune_method = tune() - ) %>% + ) |> set_engine("earth") -ames_wflow <- workflow() %>% - add_recipe(ames_rec) %>% +ames_wflow <- workflow() |> + add_recipe(ames_rec) |> add_model(mars_mod) set.seed(123456) -mars_set <- ames_wflow %>% - parameters() %>% - update(threshold = threshold(c(0, .2))) %>% +mars_set <- ames_wflow |> + parameters() |> + update(threshold = threshold(c(0, .2))) |> update(prune_method = prune_method(values = c("backward", "none", "forward", "cv"))) set.seed(987654) -mars_grid <- mars_set %>% grid_max_entropy(size = 5) +mars_grid <- mars_set |> grid_max_entropy(size = 5) set.seed(456321) initial_mars <- @@ -770,8 +770,8 @@ initial_split <- rsample::initial_split(ames, strata = "Sale_Price") initial_split #> <2199/731/2930> -ames_train <- initial_split %>% training() -ames_test <- initial_split %>% testing() +ames_train <- initial_split |> training() +ames_test <- initial_split |> testing() set.seed(2453) @@ -779,11 +779,11 @@ cv_splits <- vfold_cv(ames_train, strata = "Sale_Price") set.seed(24533) -ames_rec <- recipe(Sale_Price ~ ., data = ames_train) %>% - step_log(Sale_Price, base = 10) %>% - step_YeoJohnson(Lot_Area, Gr_Liv_Area) %>% - step_other(Neighborhood, threshold = tune()) %>% - step_dummy(all_nominal()) %>% +ames_rec <- recipe(Sale_Price ~ ., data = ames_train) |> + step_log(Sale_Price, base = 10) |> + step_YeoJohnson(Lot_Area, Gr_Liv_Area) |> + step_other(Neighborhood, threshold = tune()) |> + step_dummy(all_nominal()) |> step_zv(all_predictors()) mars_mod <- @@ -791,22 +791,22 @@ mars_mod <- mode = "regression", prod_degree = tune(), prune_method = tune() - ) %>% + ) |> set_engine("earth") -ames_wflow <- workflow() %>% - add_recipe(ames_rec) %>% +ames_wflow <- workflow() |> + add_recipe(ames_rec) |> add_model(mars_mod) set.seed(123456) -mars_set <- ames_wflow %>% - parameters() %>% - update(threshold = threshold(c(0, .2))) %>% +mars_set <- ames_wflow |> + parameters() |> + update(threshold = threshold(c(0, .2))) |> update(prune_method = prune_method(values = c("backward", "none", "forward", "cv"))) set.seed(987654) -mars_grid <- mars_set %>% grid_max_entropy(size = 5) +mars_grid <- mars_set |> grid_max_entropy(size = 5) set.seed(456321) initial_mars <- @@ -1100,8 +1100,8 @@ initial_split <- rsample::initial_split(ames, strata = "Sale_Price") initial_split #> <2199/731/2930> -ames_train <- initial_split %>% training() -ames_test <- initial_split %>% testing() +ames_train <- initial_split |> training() +ames_test <- initial_split |> testing() set.seed(2453) @@ -1109,11 +1109,11 @@ cv_splits <- vfold_cv(ames_train, strata = "Sale_Price") set.seed(24533) -ames_rec <- recipe(Sale_Price ~ ., data = ames_train) %>% - step_log(Sale_Price, base = 10) %>% - step_YeoJohnson(Lot_Area, Gr_Liv_Area) %>% - step_other(Neighborhood, threshold = tune()) %>% - step_dummy(all_nominal()) %>% +ames_rec <- recipe(Sale_Price ~ ., data = ames_train) |> + step_log(Sale_Price, base = 10) |> + step_YeoJohnson(Lot_Area, Gr_Liv_Area) |> + step_other(Neighborhood, threshold = tune()) |> + step_dummy(all_nominal()) |> step_zv(all_predictors()) mars_mod <- @@ -1121,22 +1121,22 @@ mars_mod <- mode = "regression", prod_degree = tune(), prune_method = tune() - ) %>% + ) |> set_engine("earth") -ames_wflow <- workflow() %>% - add_recipe(ames_rec) %>% +ames_wflow <- workflow() |> + add_recipe(ames_rec) |> add_model(mars_mod) set.seed(123456) -mars_set <- ames_wflow %>% - parameters() %>% - update(threshold = threshold(c(0, .2))) %>% +mars_set <- ames_wflow |> + parameters() |> + update(threshold = threshold(c(0, .2))) |> update(prune_method = prune_method(values = c("backward", "none", "forward", "cv"))) set.seed(987654) -mars_grid <- mars_set %>% grid_max_entropy(size = 5) +mars_grid <- mars_set |> grid_max_entropy(size = 5) set.seed(456321) initial_mars <- @@ -1147,12 +1147,12 @@ initial_mars <- control = control_grid(verbose = TRUE) ) -collect_metrics(initial_mars) %>% - filter(.metric == "rmse") %>% +collect_metrics(initial_mars) |> + filter(.metric == "rmse") |> arrange(rbf_sigma) acc_vals_1 <- - acc_results %>% + acc_results |> slice(c(50, 80, 120)) ggplot( @@ -1174,25 +1174,25 @@ gp_iter <- function(.data, pset, objective) { } gp_data_1 <- - tune:::encode_set(.data %>% select(rbf_sigma), pset) %>% - set_names("scaled_sigma") %>% - bind_cols(.data %>% select(mean, rbf_sigma)) + tune:::encode_set(.data |> select(rbf_sigma), pset) |> + set_names("scaled_sigma") |> + bind_cols(.data |> select(mean, rbf_sigma)) gp_grid_1 <- - tune:::encode_set(sigma_grid, pset) %>% - set_names("scaled_sigma") %>% + tune:::encode_set(sigma_grid, pset) |> + set_names("scaled_sigma") |> mutate(rbf_sigma = sigma_grid$rbf_sigma) gp_1 <- GP_fit(X = as.matrix(gp_data_1[,1, drop = FALSE]), Y = gp_data_1$mean) gp_fit_1 <- - predict(gp_1, as.matrix(gp_grid_1[,1, drop = FALSE]))$complete_data %>% - as_tibble() %>% - setNames(c("scaled_sigma", ".mean", "var")) %>% - mutate(.sd = sqrt(var)) %>% - bind_cols(gp_grid_1 %>% select(rbf_sigma)) + predict(gp_1, as.matrix(gp_grid_1[,1, drop = FALSE]))$complete_data |> + as_tibble() |> + setNames(c("scaled_sigma", ".mean", "var")) |> + mutate(.sd = sqrt(var)) |> + bind_cols(gp_grid_1 |> select(rbf_sigma)) gp_fit_1 <- - gp_fit_1 %>% + gp_fit_1 |> bind_cols( predict( objective, @@ -1201,7 +1201,7 @@ gp_iter <- function(.data, pset, objective) { iter = 1, best = max(.data$mean) ) - ) %>% + ) |> mutate( objective = ifelse(objective < 0, 0, objective), lower = .mean - const * .sd, @@ -1213,7 +1213,7 @@ gp_iter <- function(.data, pset, objective) { # ------------------------------------------------------------------------------ acc_vals <- - acc_results %>% + acc_results |> slice(c(80, 110, 140)) sigma_grid <- tibble(rbf_sigma = 10^seq(-3, 0, length = 500)) @@ -1251,7 +1251,7 @@ for (iter in 1:25) { xlab("Parameter") min_y <- - acc_vals %>% + acc_vals |> mutate(y = min(results$objective)) p_lower <- @@ -1279,18 +1279,18 @@ for (iter in 1:25) { } best <- - results %>% - arrange(desc(objective)) %>% - slice(1) %>% - select(best = rbf_sigma) %>% - cbind(acc_results %>% anti_join(acc_vals %>% select(rbf_sigma), by = "rbf_sigma")) %>% - mutate(error = abs(best - rbf_sigma)) %>% - arrange(error) %>% - slice(1) %>% + results |> + arrange(desc(objective)) |> + slice(1) |> + select(best = rbf_sigma) |> + cbind(acc_results |> anti_join(acc_vals |> select(rbf_sigma), by = "rbf_sigma")) |> + mutate(error = abs(best - rbf_sigma)) |> + arrange(error) |> + slice(1) |> select(rbf_sigma) acc_vals <- - acc_vals %>% + acc_vals |> bind_rows( inner_join(acc_results, best, by = "rbf_sigma") ) diff --git a/inst/examples/two_class_knn.R b/inst/examples/two_class_knn.R index d241f1948..c512517a3 100644 --- a/inst/examples/two_class_knn.R +++ b/inst/examples/two_class_knn.R @@ -14,7 +14,7 @@ data_folds <- vfold_cv(two_class_dat, repeats = 1) # ------------------------------------------------------------------------------ two_class_rec <- - recipe(Class ~ ., data = two_class_dat) %>% + recipe(Class ~ ., data = two_class_dat) |> step_normalize(A, B) knn_model <- @@ -23,22 +23,22 @@ knn_model <- neighbors = tune("K"), weight_func = tune(), dist_power = tune("exponent") - ) %>% + ) |> set_engine("kknn") two_class_wflow <- - workflow() %>% - add_recipe(two_class_rec) %>% + workflow() |> + add_recipe(two_class_rec) |> add_model(knn_model) two_class_set <- - parameters(two_class_wflow) %>% - update(K = neighbors(c(1, 50))) %>% + parameters(two_class_wflow) |> + update(K = neighbors(c(1, 50))) |> update(exponent = dist_power(c(1/10, 2))) set.seed(2494) two_class_grid <- - two_class_set %>% + two_class_set |> grid_max_entropy(size = 10) class_metrics <- metric_set(roc_auc, accuracy, kap, mcc) @@ -48,13 +48,13 @@ res <- tune_grid(two_class_wflow, resamples = data_folds, grid = two_class_grid, # all_pred <- -# res %>% -# select(starts_with("id"), .predictions) %>% -# unnest() %>% +# res |> +# select(starts_with("id"), .predictions) |> +# unnest() |> # nest(-K, -weight_func, -exponent) -summarize(res) %>% filter(.metric == "roc_auc") %>% arrange(desc(mean)) +summarize(res) |> filter(.metric == "roc_auc") |> arrange(desc(mean)) decr_kappa <- function(i) { if (i < 5) { @@ -82,7 +82,7 @@ svm_search <- control = control_bayes(verbose = TRUE, uncertain = 5, save_pred = TRUE) ) -ggplot(svm_search %>% summarize() %>% filter(.metric == "roc_auc")) + +ggplot(svm_search |> summarize() |> filter(.metric == "roc_auc")) + aes(x = K, y = exponent, col = weight_func, size = mean) + geom_point(alpha = .7) diff --git a/inst/examples/two_class_svm.R b/inst/examples/two_class_svm.R index c00d5fe0c..d197e0119 100644 --- a/inst/examples/two_class_svm.R +++ b/inst/examples/two_class_svm.R @@ -11,26 +11,26 @@ data_folds <- vfold_cv(two_class_dat, repeats = 5) # ------------------------------------------------------------------------------ two_class_rec <- - recipe(Class ~ ., data = two_class_dat) %>% + recipe(Class ~ ., data = two_class_dat) |> step_normalize(A, B) svm_model <- svm_poly(mode = "classification", cost = tune(), degree = tune(), - scale_factor = tune()) %>% + scale_factor = tune()) |> set_engine("kernlab") two_class_wflow <- - workflow() %>% - add_recipe(two_class_rec) %>% + workflow() |> + add_recipe(two_class_rec) |> add_model(svm_model) two_class_set <- - parameters(two_class_wflow) %>% + parameters(two_class_wflow) |> update(cost = cost(c(-10, 4))) set.seed(2494) two_class_grid <- - two_class_set %>% + two_class_set |> grid_max_entropy(size = 5) class_only <- metric_set(accuracy, kap, mcc) @@ -38,7 +38,7 @@ class_only <- metric_set(accuracy, kap, mcc) res <- tune_grid(two_class_wflow, resamples = data_folds, grid = two_class_grid, metrics = class_only, control = control_grid(save_pred = TRUE)) -summarize(res) %>% filter(.metric == "accuracy") %>% arrange(desc(mean)) +summarize(res) |> filter(.metric == "accuracy") |> arrange(desc(mean)) set.seed(365456) svm_search <- @@ -56,18 +56,18 @@ svm_search <- # ------------------------------------------------------------------------------ svm_model <- - svm_poly(mode = "classification", cost = tune()) %>% + svm_poly(mode = "classification", cost = tune()) |> set_engine("kernlab") two_class_wflow <- - workflow() %>% - add_recipe(two_class_rec) %>% + workflow() |> + add_recipe(two_class_rec) |> add_model(svm_model) set.seed(37) two_class_grid <- - two_class_set %>% + two_class_set |> grid_random(size = 5) class_only <- metric_set(accuracy) @@ -80,36 +80,36 @@ ggplot(summarize(grid_res), aes(x = cost, y = mean)) + theme_bw() cost_grid <- - two_class_set %>% + two_class_set |> grid_regular(levels = 100) acc_vals_0 <- summarize(grid_res) gp_data_0 <- - tune:::encode_set(acc_vals_0 %>% select(cost), two_class_set) %>% - set_names("scaled_cost") %>% + tune:::encode_set(acc_vals_0 |> select(cost), two_class_set) |> + set_names("scaled_cost") |> mutate( mean = acc_vals_0$mean, cost = acc_vals_0$cost) gp_grid <- - tune:::encode_set(cost_grid, two_class_set) %>% - set_names("scaled_cost") %>% + tune:::encode_set(cost_grid, two_class_set) |> + set_names("scaled_cost") |> mutate(cost = cost_grid$cost) library(GPfit) gp_0 <- GP_fit(X = as.matrix(gp_data_0[,1, drop = FALSE]), Y = gp_data_0$mean) gp_fit_0 <- - predict(gp_0, as.matrix(gp_grid[,1, drop = FALSE]))$complete_data %>% - as_tibble() %>% - setNames(c("scaled_cost", "mean", "var")) %>% + predict(gp_0, as.matrix(gp_grid[,1, drop = FALSE]))$complete_data |> + as_tibble() |> + setNames(c("scaled_cost", "mean", "var")) |> mutate(sd = sqrt(var), lower = mean - 1 * sd, upper = mean + 1 * sd, snr = (mean - max(gp_data_0$mean))/sd, prob_imp = pnorm(snr) - ) %>% - bind_cols(gp_grid %>% select(cost)) + ) |> + bind_cols(gp_grid |> select(cost)) ggplot(gp_fit_0, aes(x = cost, y = sd)) + geom_path() + @@ -147,7 +147,7 @@ tgp_0 <- btgp(X = as.matrix(gp_data_0[,1, drop = FALSE]), Z = gp_data_0$mean) tgp_res_0 <- predict(tgp_0, as.matrix(gp_grid[,1, drop = FALSE])) tgp_fit_0 <- - gp_grid %>% + gp_grid |> mutate( mean = tgp_res_0$ZZ.mean, sd = sqrt(tgp_res_0$ZZ.ks2), diff --git a/inst/examples/verbose_test.R b/inst/examples/verbose_test.R index a5314748f..105c6b429 100644 --- a/inst/examples/verbose_test.R +++ b/inst/examples/verbose_test.R @@ -18,22 +18,22 @@ cv_splits <- vfold_cv(ames_train, v = 10, strata = "Sale_Price") # ------------------------------------------------------------------------------ ames_rec <- - recipe(Sale_Price ~ ., data = ames_train) %>% - step_log(Sale_Price, base = 10) %>% - step_YeoJohnson(Lot_Area, Gr_Liv_Area) %>% - step_other(Neighborhood, threshold = .1) %>% - step_dummy(all_nominal()) %>% + recipe(Sale_Price ~ ., data = ames_train) |> + step_log(Sale_Price, base = 10) |> + step_YeoJohnson(Lot_Area, Gr_Liv_Area) |> + step_other(Neighborhood, threshold = .1) |> + step_dummy(all_nominal()) |> step_zv(all_predictors()) svm_model <- svm_rbf( - mode = "regression", cost = tune(), rbf_sigma = tune()) %>% + mode = "regression", cost = tune(), rbf_sigma = tune()) |> set_engine("kernlab") ames_wflow <- - workflow() %>% - add_recipe(ames_rec) %>% + workflow() |> + add_recipe(ames_rec) |> add_model(svm_model) @@ -42,7 +42,7 @@ ames_set <- parameters(ames_wflow) ames_grid <- - ames_set %>% + ames_set |> grid_max_entropy(size = 3) initial_grid <- tune_grid(ames_wflow, resamples = cv_splits, grid = ames_grid, control = control_grid(verbose = TRUE)) diff --git a/inst/examples/voters.R b/inst/examples/voters.R index edc2d6834..afa713532 100644 --- a/inst/examples/voters.R +++ b/inst/examples/voters.R @@ -8,15 +8,15 @@ library(readr) vote_train <- read_rds(url("https://github.com/juliasilge/supervised-ML-case-studies-course/blob/master/data/c3_training_full.rds?raw=true")) voters_rec <- - recipe(turnout16_2016 ~ ., data = vote_train) %>% + recipe(turnout16_2016 ~ ., data = vote_train) |> step_downsample(turnout16_2016) lr_mod <- - logistic_reg(penalty = tune(), mixture = tune()) %>% + logistic_reg(penalty = tune(), mixture = tune()) |> set_engine("glmnet") rf_mod <- - rand_forest(mode = "classification", mtry = tune(), min_n = tune(), trees = 1000) %>% + rand_forest(mode = "classification", mtry = tune(), min_n = tune(), trees = 1000) |> set_engine("ranger") set.seed(6059) @@ -31,8 +31,8 @@ glmn_search <- tune_grid(voters_rec, lr_mod, resamples = voter_folds, grid = glmn_grid, metrics = metric_set(accuracy, roc_auc)) -summarize(glmn_search) %>% - filter(.metric == "accuracy") %>% +summarize(glmn_search) |> + filter(.metric == "accuracy") |> ggplot(aes(x = penalty, y = mean, col = factor(mixture))) + geom_point() + geom_line() + @@ -43,8 +43,8 @@ summarize(glmn_search) %>% set.seed(606) rf_grid <- - parameters(rf_mod) %>% - update(mtry = mtry(c(1, 41))) %>% + parameters(rf_mod) |> + update(mtry = mtry(c(1, 41))) |> grid_latin_hypercube(size = 20) set.seed(1354) @@ -52,17 +52,17 @@ rf_search <- tune_grid(voters_rec, rf_mod, resamples = voter_folds, grid = rf_grid, metrics = metric_set(accuracy, roc_auc)) -summarize(rf_search) %>% - filter(.metric == "accuracy") %>% - select(mtry, min_n, mean) %>% - pivot_longer(-mean, names_to = "parameter", values_to = "value") %>% +summarize(rf_search) |> + filter(.metric == "accuracy") |> + select(mtry, min_n, mean) |> + pivot_longer(-mean, names_to = "parameter", values_to = "value") |> ggplot(aes(x = value, y = mean)) + geom_point() + geom_smooth(se = FALSE) + facet_wrap( ~ parameter, scales = "free_x") -summarize(rf_search) %>% - filter(.metric == "accuracy") %>% +summarize(rf_search) |> + filter(.metric == "accuracy") |> ggplot(aes(x = mtry, y = min_n, size = mean)) + geom_point() diff --git a/inst/test_objects.R b/inst/test_objects.R index 389af042d..3827c92db 100644 --- a/inst/test_objects.R +++ b/inst/test_objects.R @@ -16,23 +16,23 @@ simple_rec <- recipe(mpg ~ ., data = mtcars) form <- mpg ~ . spline_rec <- - recipe(mpg ~ ., data = mtcars) %>% - step_normalize(all_predictors()) %>% + recipe(mpg ~ ., data = mtcars) |> + step_normalize(all_predictors()) |> step_bs(disp, deg_free = tune()) -lm_mod <- linear_reg() %>% set_engine("lm") +lm_mod <- linear_reg() |> set_engine("lm") knn_mod <- - nearest_neighbor(mode = "regression", neighbors = tune()) %>% + nearest_neighbor(mode = "regression", neighbors = tune()) |> set_engine("kknn") knn_mod_two <- - nearest_neighbor(mode = "regression", neighbors = tune("K"), weight_func = tune()) %>% + nearest_neighbor(mode = "regression", neighbors = tune("K"), weight_func = tune()) |> set_engine("kknn") get_coefs <- function(x) { - x %>% - extract_fit_parsnip() %>% + x |> + extract_fit_parsnip() |> tidy() } @@ -43,18 +43,18 @@ b_ctrl <- control_bayes(verbose = verb, save_pred = TRUE, extract = get_coefs) # ------------------------------------------------------------------------------ mt_spln_lm <- - workflow() %>% - add_recipe(spline_rec) %>% + workflow() |> + add_recipe(spline_rec) |> add_model(lm_mod) mt_spln_knn <- - workflow() %>% - add_recipe(spline_rec) %>% + workflow() |> + add_recipe(spline_rec) |> add_model(knn_mod) mt_knn <- - workflow() %>% - add_recipe(simple_rec) %>% + workflow() |> + add_recipe(simple_rec) |> add_model(knn_mod) # ------------------------------------------------------------------------------ @@ -130,7 +130,7 @@ set.seed(7898) data_folds <- vfold_cv(two_class_dat, repeats = 5) two_class_rec <- - recipe(Class ~ ., data = two_class_dat) %>% + recipe(Class ~ ., data = two_class_dat) |> step_normalize(A, B) knn_model <- @@ -139,22 +139,22 @@ knn_model <- neighbors = tune("K"), weight_func = tune(), dist_power = tune("exponent") - ) %>% + ) |> set_engine("kknn") two_class_wflow <- - workflow() %>% - add_recipe(two_class_rec) %>% + workflow() |> + add_recipe(two_class_rec) |> add_model(knn_model) two_class_set <- - extract_parameter_set_dials(two_class_wflow) %>% - update(K = neighbors(c(1, 50))) %>% + extract_parameter_set_dials(two_class_wflow) |> + update(K = neighbors(c(1, 50))) |> update(exponent = dist_power(c(1 / 10, 2))) set.seed(2494) two_class_grid <- - two_class_set %>% + two_class_set |> grid_max_entropy(size = 10) class_metrics <- metric_set(roc_auc, accuracy, kap, mcc) @@ -221,21 +221,21 @@ svm_model <- cost = tune(), degree = tune("%^*#"), scale_factor = tune() - ) %>% + ) |> set_engine("kernlab") two_class_wflow <- - workflow() %>% - add_recipe(two_class_rec) %>% + workflow() |> + add_recipe(two_class_rec) |> add_model(svm_model) two_class_set <- - extract_parameter_set_dials(two_class_wflow) %>% + extract_parameter_set_dials(two_class_wflow) |> update(cost = cost(c(-10, 4))) set.seed(2494) two_class_grid <- - two_class_set %>% + two_class_set |> grid_max_entropy(size = 5) class_only <- metric_set(accuracy, kap, mcc) @@ -257,7 +257,7 @@ saveRDS( ) two_class_reg_grid <- - two_class_set %>% + two_class_set |> grid_regular(levels = c(5, 4, 2)) svm_reg_results <- @@ -285,34 +285,34 @@ data_folds <- vfold_cv(mtcars, repeats = 2) # "rcv_results" used in test-autoplot.R, test-select_best.R, and test-estimate.R base_rec <- - recipe(mpg ~ ., data = mtcars) %>% + recipe(mpg ~ ., data = mtcars) |> step_normalize(all_predictors()) disp_rec <- - base_rec %>% - step_bs(disp, degree = tune(), deg_free = tune()) %>% + base_rec |> + step_bs(disp, degree = tune(), deg_free = tune()) |> step_bs(wt, degree = tune("wt degree"), deg_free = tune("wt df")) lm_model <- - linear_reg(mode = "regression") %>% + linear_reg(mode = "regression") |> set_engine("lm") cars_wflow <- - workflow() %>% - add_recipe(disp_rec) %>% + workflow() |> + add_recipe(disp_rec) |> add_model(lm_model) cars_set <- - cars_wflow %>% - parameters %>% - update(degree = degree_int(1:2)) %>% - update(deg_free = deg_free(c(2, 10))) %>% - update(`wt degree` = degree_int(1:2)) %>% + cars_wflow |> + parameters |> + update(degree = degree_int(1:2)) |> + update(deg_free = deg_free(c(2, 10))) |> + update(`wt degree` = degree_int(1:2)) |> update(`wt df` = deg_free(c(2, 10))) set.seed(255) cars_grid <- - cars_set %>% + cars_set |> grid_regular(levels = c(3, 2, 3, 2)) @@ -341,7 +341,7 @@ folds <- vfold_cv(mtcars, v = 3) rec <- recipe(mpg ~ ., data = mtcars) -mod <- linear_reg() %>% +mod <- linear_reg() |> set_engine("lm") lm_resamples <- fit_resamples(mod, rec, folds) @@ -361,15 +361,15 @@ saveRDS( set.seed(7898) folds <- vfold_cv(mtcars, v = 2) -rec <- recipe(mpg ~ ., data = mtcars) %>% - step_normalize(all_predictors()) %>% +rec <- recipe(mpg ~ ., data = mtcars) |> + step_normalize(all_predictors()) |> step_ns(disp, deg_free = tune()) -mod <- linear_reg(mode = "regression") %>% +mod <- linear_reg(mode = "regression") |> set_engine("lm") -wflow <- workflow() %>% - add_recipe(rec) %>% +wflow <- workflow() |> + add_recipe(rec) |> add_model(mod) set.seed(2934) @@ -386,8 +386,8 @@ saveRDS( # A single survival model set.seed(1) -sim_dat <- prodlim::SimSurv(200) %>% - mutate(event_time = Surv(time, event)) %>% +sim_dat <- prodlim::SimSurv(200) |> + mutate(event_time = Surv(time, event)) |> select(event_time, X1, X2) set.seed(2) @@ -396,8 +396,8 @@ sim_rs <- vfold_cv(sim_dat) time_points <- c(10, 1, 5, 15) boost_spec <- - boost_tree(trees = tune()) %>% - set_mode("censored regression") %>% + boost_tree(trees = tune()) |> + set_mode("censored regression") |> set_engine("mboost") srv_mtr <- @@ -410,7 +410,7 @@ srv_mtr <- set.seed(2193) surv_boost_tree_res <- - boost_spec %>% + boost_spec |> tune_grid( event_time ~ X1 + X2, resamples = sim_rs, diff --git a/inst/test_objects.Rout b/inst/test_objects.Rout index ee191f133..ee79f9aa6 100644 --- a/inst/test_objects.Rout +++ b/inst/test_objects.Rout @@ -70,23 +70,23 @@ The following object is masked from ‘package:dplyr’: > form <- mpg ~ . > > spline_rec <- -+ recipe(mpg ~ ., data = mtcars) %>% -+ step_normalize(all_predictors()) %>% ++ recipe(mpg ~ ., data = mtcars) |> ++ step_normalize(all_predictors()) |> + step_bs(disp, deg_free = tune()) > -> lm_mod <- linear_reg() %>% set_engine("lm") +> lm_mod <- linear_reg() |> set_engine("lm") > > knn_mod <- -+ nearest_neighbor(mode = "regression", neighbors = tune()) %>% ++ nearest_neighbor(mode = "regression", neighbors = tune()) |> + set_engine("kknn") > > knn_mod_two <- -+ nearest_neighbor(mode = "regression", neighbors = tune("K"), weight_func = tune()) %>% ++ nearest_neighbor(mode = "regression", neighbors = tune("K"), weight_func = tune()) |> + set_engine("kknn") > > get_coefs <- function(x) { -+ x %>% -+ extract_fit_parsnip() %>% ++ x |> ++ extract_fit_parsnip() |> + tidy() + } > @@ -97,18 +97,18 @@ The following object is masked from ‘package:dplyr’: > # ------------------------------------------------------------------------------ > > mt_spln_lm <- -+ workflow() %>% -+ add_recipe(spline_rec) %>% ++ workflow() |> ++ add_recipe(spline_rec) |> + add_model(lm_mod) > > mt_spln_knn <- -+ workflow() %>% -+ add_recipe(spline_rec) %>% ++ workflow() |> ++ add_recipe(spline_rec) |> + add_model(knn_mod) > > mt_knn <- -+ workflow() %>% -+ add_recipe(simple_rec) %>% ++ workflow() |> ++ add_recipe(simple_rec) |> + add_model(knn_mod) > > # ------------------------------------------------------------------------------ @@ -119,8 +119,21 @@ The following object is masked from ‘package:dplyr’: + resamples = folds, + control = g_ctrl) → A | warning: prediction from rank-deficient fit; consider predict(., rankdeficient="NA") - There were issues with some computations A: x1 → B | warning: some 'x' values beyond boundary knots may cause ill-conditioned bases -There were issues with some computations A: x1 There were issues with some computations A: x1 B: x4 There were issues with some computations A: x2 B: x10 There were issues with some computations A: x2 B: x11 There were issues with some computations A: x2 B: x16 There were issues with some computations A: x2 B: x20 +There were issues with some computations A: x1 + + +→ B | warning: some 'x' values beyond boundary knots may cause ill-conditioned bases +There were issues with some computations A: x1 + +There were issues with some computations A: x1 B: x4 + +There were issues with some computations A: x2 B: x10 + +There were issues with some computations A: x2 B: x11 + +There were issues with some computations A: x2 B: x16 + +There were issues with some computations A: x2 B: x20 > > set.seed(8825) > mt_spln_lm_bo <- @@ -131,7 +144,21 @@ There were issues with some computations A: x1 There were issues with some co + control = b_ctrl + ) → A | warning: some 'x' values beyond boundary knots may cause ill-conditioned bases - There were issues with some computations A: x1 There were issues with some computations A: x5 There were issues with some computations A: x6 There were issues with some computations A: x11 There were issues with some computations A: x13 There were issues with some computations A: x14 There were issues with some computations A: x16 There were issues with some computations A: x16 +There were issues with some computations A: x1 + +There were issues with some computations A: x5 + +There were issues with some computations A: x6 + +There were issues with some computations A: x11 + +There were issues with some computations A: x13 + +There were issues with some computations A: x14 + +There were issues with some computations A: x16 + +There were issues with some computations A: x16 > > # ------------------------------------------------------------------------------ > @@ -144,8 +171,17 @@ There were issues with some computations A: x1 There were issues with some co + control = g_ctrl + ) → A | error: No tidy method for objects of class train.kknn - There were issues with some computations A: x2 → B | warning: some 'x' values beyond boundary knots may cause ill-conditioned bases -There were issues with some computations A: x2 There were issues with some computations A: x6 B: x3 There were issues with some computations A: x12 B: x3 There were issues with some computations A: x15 B: x6 +There were issues with some computations A: x2 + + +→ B | warning: some 'x' values beyond boundary knots may cause ill-conditioned bases +There were issues with some computations A: x2 + +There were issues with some computations A: x6 B: x3 + +There were issues with some computations A: x12 B: x3 + +There were issues with some computations A: x15 B: x6 > > set.seed(8825) > mt_spln_knn_bo <- @@ -154,8 +190,31 @@ There were issues with some computations A: x2 There were issues with some co + iter = 3, + control = b_ctrl) → A | error: No tidy method for objects of class train.kknn - There were issues with some computations A: x1 There were issues with some computations A: x4 → B | warning: some 'x' values beyond boundary knots may cause ill-conditioned bases -There were issues with some computations A: x4 There were issues with some computations A: x8 B: x2 There were issues with some computations A: x14 B: x5 There were issues with some computations A: x20 B: x5 There were issues with some computations A: x25 B: x10 There were issues with some computations A: x26 B: x10 There were issues with some computations A: x31 B: x12 There were issues with some computations A: x36 B: x14 There were issues with some computations A: x37 B: x14 There were issues with some computations A: x40 B: x16 +There were issues with some computations A: x1 + +There were issues with some computations A: x4 + + +→ B | warning: some 'x' values beyond boundary knots may cause ill-conditioned bases +There were issues with some computations A: x4 + +There were issues with some computations A: x8 B: x2 + +There were issues with some computations A: x14 B: x5 + +There were issues with some computations A: x20 B: x5 + +There were issues with some computations A: x25 B: x10 + +There were issues with some computations A: x26 B: x10 + +There were issues with some computations A: x31 B: x12 + +There were issues with some computations A: x36 B: x14 + +There were issues with some computations A: x37 B: x14 + +There were issues with some computations A: x40 B: x16 > > set.seed(8825) > mt_spln_knn_bo_sep <- @@ -165,22 +224,49 @@ There were issues with some computations A: x4 There were issues with some co + iter = 3, + control = b_ctrl) → A | error: No tidy method for objects of class train.kknn - There were issues with some computations A: x1 There were issues with some computations A: x6 → B | warning: some 'x' values beyond boundary knots may cause ill-conditioned bases -There were issues with some computations A: x6 There were issues with some computations A: x12 B: x5 There were issues with some computations A: x18 B: x5 There were issues with some computations A: x24 B: x8 ! The Gaussian process model is being fit using 12 features but only has 5 +There were issues with some computations A: x1 + +There were issues with some computations A: x6 + + +→ B | warning: some 'x' values beyond boundary knots may cause ill-conditioned bases +There were issues with some computations A: x6 + +There were issues with some computations A: x12 B: x5 + +There were issues with some computations A: x18 B: x5 + +There were issues with some computations A: x24 B: x8 +! The Gaussian process model is being fit using 12 features but only has 5 data points to do so. This may cause errors or a poor model fit. - → C | warning: did not converge in 10 iterations -There were issues with some computations A: x24 B: x8 There were issues with some computations A: x25 B: x10 C: x1 There were issues with some computations A: x26 B: x10 C: x1 There were issues with some computations A: x29 B: x11 C: x1 ! The Gaussian process model is being fit using 12 features but only has 6 + +→ C | warning: did not converge in 10 iterations +There were issues with some computations A: x24 B: x8 + +There were issues with some computations A: x25 B: x10 C: x1 + +There were issues with some computations A: x26 B: x10 C: x1 + +There were issues with some computations A: x29 B: x11 C: x1 +! The Gaussian process model is being fit using 12 features but only has 6 data points to do so. This may cause errors or a poor model fit. - There were issues with some computations A: x31 B: x12 C: x1 ! The Gaussian process model is being fit using 12 features but only has 7 +There were issues with some computations A: x31 B: x12 C: x1 +! The Gaussian process model is being fit using 12 features but only has 7 data points to do so. This may cause errors or a poor model fit. - There were issues with some computations A: x36 B: x14 C: x1 There were issues with some computations A: x37 B: x14 C: x1 There were issues with some computations A: x40 B: x16 C: x1 +There were issues with some computations A: x36 B: x14 C: x1 + +There were issues with some computations A: x37 B: x14 C: x1 + +There were issues with some computations A: x40 B: x16 C: x1 > > # ------------------------------------------------------------------------------ > > set.seed(8825) > mt_knn_grid <- tune_grid(mt_knn, resamples = folds, control = g_ctrl) → A | error: No tidy method for objects of class train.kknn - There were issues with some computations A: x2 There were issues with some computations A: x5 +There were issues with some computations A: x2 + +There were issues with some computations A: x5 > > set.seed(8825) > mt_knn_bo <- @@ -189,7 +275,15 @@ There were issues with some computations A: x24 B: x8 There were issues wit + iter = 3, + control = b_ctrl) → A | error: No tidy method for objects of class train.kknn - There were issues with some computations A: x1 There were issues with some computations A: x6 There were issues with some computations A: x11 There were issues with some computations A: x17 There were issues with some computations A: x20 +There were issues with some computations A: x1 + +There were issues with some computations A: x6 + +There were issues with some computations A: x11 + +There were issues with some computations A: x17 + +There were issues with some computations A: x20 > > # ------------------------------------------------------------------------------ > @@ -209,7 +303,7 @@ There were issues with some computations A: x24 B: x8 There were issues wit > data_folds <- vfold_cv(two_class_dat, repeats = 5) > > two_class_rec <- -+ recipe(Class ~ ., data = two_class_dat) %>% ++ recipe(Class ~ ., data = two_class_dat) |> + step_normalize(A, B) > > knn_model <- @@ -218,22 +312,22 @@ There were issues with some computations A: x24 B: x8 There were issues wit + neighbors = tune("K"), + weight_func = tune(), + dist_power = tune("exponent") -+ ) %>% ++ ) |> + set_engine("kknn") > > two_class_wflow <- -+ workflow() %>% -+ add_recipe(two_class_rec) %>% ++ workflow() |> ++ add_recipe(two_class_rec) |> + add_model(knn_model) > > two_class_set <- -+ extract_parameter_set_dials(two_class_wflow) %>% -+ update(K = neighbors(c(1, 50))) %>% ++ extract_parameter_set_dials(two_class_wflow) |> ++ update(K = neighbors(c(1, 50))) |> + update(exponent = dist_power(c(1 / 10, 2))) > > set.seed(2494) > two_class_grid <- -+ two_class_set %>% ++ two_class_set |> + grid_max_entropy(size = 10) > > class_metrics <- metric_set(roc_auc, accuracy, kap, mcc) @@ -302,21 +396,21 @@ There were issues with some computations A: x24 B: x8 There were issues wit + cost = tune(), + degree = tune("%^*#"), + scale_factor = tune() -+ ) %>% ++ ) |> + set_engine("kernlab") > > two_class_wflow <- -+ workflow() %>% -+ add_recipe(two_class_rec) %>% ++ workflow() |> ++ add_recipe(two_class_rec) |> + add_model(svm_model) > > two_class_set <- -+ extract_parameter_set_dials(two_class_wflow) %>% ++ extract_parameter_set_dials(two_class_wflow) |> + update(cost = cost(c(-10, 4))) > > set.seed(2494) > two_class_grid <- -+ two_class_set %>% ++ two_class_set |> + grid_max_entropy(size = 5) > > class_only <- metric_set(accuracy, kap, mcc) @@ -338,7 +432,7 @@ There were issues with some computations A: x24 B: x8 There were issues wit + ) > > two_class_reg_grid <- -+ two_class_set %>% ++ two_class_set |> + grid_regular(levels = c(5, 4, 2)) > > svm_reg_results <- @@ -366,29 +460,29 @@ There were issues with some computations A: x24 B: x8 There were issues wit > # "rcv_results" used in test-autoplot.R, test-select_best.R, and test-estimate.R > > base_rec <- -+ recipe(mpg ~ ., data = mtcars) %>% ++ recipe(mpg ~ ., data = mtcars) |> + step_normalize(all_predictors()) > > disp_rec <- -+ base_rec %>% -+ step_bs(disp, degree = tune(), deg_free = tune()) %>% ++ base_rec |> ++ step_bs(disp, degree = tune(), deg_free = tune()) |> + step_bs(wt, degree = tune("wt degree"), deg_free = tune("wt df")) > > lm_model <- -+ linear_reg(mode = "regression") %>% ++ linear_reg(mode = "regression") |> + set_engine("lm") > > cars_wflow <- -+ workflow() %>% -+ add_recipe(disp_rec) %>% ++ workflow() |> ++ add_recipe(disp_rec) |> + add_model(lm_model) > > cars_set <- -+ cars_wflow %>% -+ parameters %>% -+ update(degree = degree_int(1:2)) %>% -+ update(deg_free = deg_free(c(2, 10))) %>% -+ update(`wt degree` = degree_int(1:2)) %>% ++ cars_wflow |> ++ parameters |> ++ update(degree = degree_int(1:2)) |> ++ update(deg_free = deg_free(c(2, 10))) |> ++ update(`wt degree` = degree_int(1:2)) |> + update(`wt df` = deg_free(c(2, 10))) Warning message: `parameters.workflow()` was deprecated in tune 0.1.6.9003. @@ -396,7 +490,7 @@ Warning message: > > set.seed(255) > cars_grid <- -+ cars_set %>% ++ cars_set |> + grid_regular(levels = c(3, 2, 3, 2)) > > @@ -408,9 +502,139 @@ Warning message: + control = control_grid(verbose = FALSE, save_pred = TRUE) + ) → A | warning: prediction from rank-deficient fit; consider predict(., rankdeficient="NA") - There were issues with some computations A: x1 There were issues with some computations A: x3 There were issues with some computations A: x4 → B | warning: some 'x' values beyond boundary knots may cause ill-conditioned bases -There were issues with some computations A: x4 There were issues with some computations A: x4 B: x3 There were issues with some computations A: x4 B: x9 There were issues with some computations A: x4 B: x14 → C | warning: some 'x' values beyond boundary knots may cause ill-conditioned bases, prediction from rank-deficient fit; consider predict(., rankdeficient="NA") -There were issues with some computations A: x4 B: x14 There were issues with some computations A: x4 B: x18 C: x2 There were issues with some computations A: x4 B: x23 C: x2 There were issues with some computations A: x4 B: x29 C: x2 There were issues with some computations A: x4 B: x32 C: x4 There were issues with some computations A: x4 B: x33 C: x4 There were issues with some computations A: x4 B: x39 C: x4 There were issues with some computations A: x4 B: x44 C: x4 There were issues with some computations A: x4 B: x49 C: x4 There were issues with some computations A: x4 B: x54 C: x4 There were issues with some computations A: x4 B: x60 C: x4 There were issues with some computations A: x4 B: x65 C: x4 There were issues with some computations A: x4 B: x71 C: x4 There were issues with some computations A: x4 B: x76 C: x4 There were issues with some computations A: x4 B: x82 C: x4 There were issues with some computations A: x4 B: x88 C: x4 There were issues with some computations A: x4 B: x93 C: x4 There were issues with some computations A: x4 B: x99 C: x4 There were issues with some computations A: x4 B: x104 C: x4 There were issues with some computations A: x4 B: x105 C: x4 There were issues with some computations A: x4 B: x111 C: x4 There were issues with some computations A: x4 B: x117 C: x4 There were issues with some computations A: x4 B: x122 C: x4 There were issues with some computations A: x4 B: x128 C: x4 There were issues with some computations A: x4 B: x133 C: x4 There were issues with some computations A: x4 B: x139 C: x4 There were issues with some computations A: x4 B: x144 C: x4 There were issues with some computations A: x4 B: x150 C: x4 There were issues with some computations A: x4 B: x154 C: x5 There were issues with some computations A: x4 B: x159 C: x6 There were issues with some computations A: x4 B: x164 C: x6 There were issues with some computations A: x4 B: x170 C: x6 There were issues with some computations A: x5 B: x172 C: x8 There were issues with some computations A: x7 B: x172 C: x8 There were issues with some computations A: x8 B: x172 C: x8 There were issues with some computations A: x8 B: x173 C: x8 There were issues with some computations A: x8 B: x178 C: x8 There were issues with some computations A: x8 B: x184 C: x8 There were issues with some computations A: x8 B: x189 C: x8 There were issues with some computations A: x8 B: x195 C: x8 There were issues with some computations A: x8 B: x200 C: x8 There were issues with some computations A: x8 B: x206 C: x8 There were issues with some computations A: x8 B: x209 C: x8 There were issues with some computations A: x8 B: x215 C: x8 There were issues with some computations A: x8 B: x220 C: x8 There were issues with some computations A: x8 B: x226 C: x8 There were issues with some computations A: x8 B: x229 C: x8 There were issues with some computations A: x8 B: x235 C: x8 There were issues with some computations A: x8 B: x241 C: x8 There were issues with some computations A: x8 B: x245 C: x8 There were issues with some computations A: x8 B: x246 C: x8 There were issues with some computations A: x8 B: x251 C: x8 There were issues with some computations A: x8 B: x257 C: x8 There were issues with some computations A: x8 B: x263 C: x8 There were issues with some computations A: x8 B: x269 C: x8 There were issues with some computations A: x8 B: x274 C: x8 There were issues with some computations A: x8 B: x280 C: x8 There were issues with some computations A: x8 B: x280 C: x8 +There were issues with some computations A: x1 + +There were issues with some computations A: x3 + +There were issues with some computations A: x4 + + +→ B | warning: some 'x' values beyond boundary knots may cause ill-conditioned bases +There were issues with some computations A: x4 + +There were issues with some computations A: x4 B: x3 + +There were issues with some computations A: x4 B: x9 + +There were issues with some computations A: x4 B: x14 + + +→ C | warning: some 'x' values beyond boundary knots may cause ill-conditioned bases, prediction from rank-deficient fit; consider predict(., rankdeficient="NA") +There were issues with some computations A: x4 B: x14 + +There were issues with some computations A: x4 B: x18 C: x2 + +There were issues with some computations A: x4 B: x23 C: x2 + +There were issues with some computations A: x4 B: x29 C: x2 + +There were issues with some computations A: x4 B: x32 C: x4 + +There were issues with some computations A: x4 B: x33 C: x4 + +There were issues with some computations A: x4 B: x39 C: x4 + +There were issues with some computations A: x4 B: x44 C: x4 + +There were issues with some computations A: x4 B: x49 C: x4 + +There were issues with some computations A: x4 B: x54 C: x4 + +There were issues with some computations A: x4 B: x60 C: x4 + +There were issues with some computations A: x4 B: x65 C: x4 + +There were issues with some computations A: x4 B: x71 C: x4 + +There were issues with some computations A: x4 B: x76 C: x4 + +There were issues with some computations A: x4 B: x82 C: x4 + +There were issues with some computations A: x4 B: x88 C: x4 + +There were issues with some computations A: x4 B: x93 C: x4 + +There were issues with some computations A: x4 B: x99 C: x4 + +There were issues with some computations A: x4 B: x104 C: x4 + +There were issues with some computations A: x4 B: x105 C: x4 + +There were issues with some computations A: x4 B: x111 C: x4 + +There were issues with some computations A: x4 B: x117 C: x4 + +There were issues with some computations A: x4 B: x122 C: x4 + +There were issues with some computations A: x4 B: x128 C: x4 + +There were issues with some computations A: x4 B: x133 C: x4 + +There were issues with some computations A: x4 B: x139 C: x4 + +There were issues with some computations A: x4 B: x144 C: x4 + +There were issues with some computations A: x4 B: x150 C: x4 + +There were issues with some computations A: x4 B: x154 C: x5 + +There were issues with some computations A: x4 B: x159 C: x6 + +There were issues with some computations A: x4 B: x164 C: x6 + +There were issues with some computations A: x4 B: x170 C: x6 + +There were issues with some computations A: x5 B: x172 C: x8 + +There were issues with some computations A: x7 B: x172 C: x8 + +There were issues with some computations A: x8 B: x172 C: x8 + +There were issues with some computations A: x8 B: x173 C: x8 + +There were issues with some computations A: x8 B: x178 C: x8 + +There were issues with some computations A: x8 B: x184 C: x8 + +There were issues with some computations A: x8 B: x189 C: x8 + +There were issues with some computations A: x8 B: x195 C: x8 + +There were issues with some computations A: x8 B: x200 C: x8 + +There were issues with some computations A: x8 B: x206 C: x8 + +There were issues with some computations A: x8 B: x209 C: x8 + +There were issues with some computations A: x8 B: x215 C: x8 + +There were issues with some computations A: x8 B: x220 C: x8 + +There were issues with some computations A: x8 B: x226 C: x8 + +There were issues with some computations A: x8 B: x229 C: x8 + +There were issues with some computations A: x8 B: x235 C: x8 + +There were issues with some computations A: x8 B: x241 C: x8 + +There were issues with some computations A: x8 B: x245 C: x8 + +There were issues with some computations A: x8 B: x246 C: x8 + +There were issues with some computations A: x8 B: x251 C: x8 + +There were issues with some computations A: x8 B: x257 C: x8 + +There were issues with some computations A: x8 B: x263 C: x8 + +There were issues with some computations A: x8 B: x269 C: x8 + +There were issues with some computations A: x8 B: x274 C: x8 + +There were issues with some computations A: x8 B: x280 C: x8 + +There were issues with some computations A: x8 B: x280 C: x8 > > saveRDS( + rcv_results, @@ -429,7 +653,7 @@ There were issues with some computations A: x4 B: x14 There were issues wit > > rec <- recipe(mpg ~ ., data = mtcars) > -> mod <- linear_reg() %>% +> mod <- linear_reg() |> + set_engine("lm") > > lm_resamples <- fit_resamples(mod, rec, folds) @@ -457,21 +681,25 @@ There were issues with some computations A: x4 B: x14 There were issues wit > set.seed(7898) > folds <- vfold_cv(mtcars, v = 2) > -> rec <- recipe(mpg ~ ., data = mtcars) %>% -+ step_normalize(all_predictors()) %>% +> rec <- recipe(mpg ~ ., data = mtcars) |> ++ step_normalize(all_predictors()) |> + step_ns(disp, deg_free = tune()) > -> mod <- linear_reg(mode = "regression") %>% +> mod <- linear_reg(mode = "regression") |> + set_engine("lm") > -> wflow <- workflow() %>% -+ add_recipe(rec) %>% +> wflow <- workflow() |> ++ add_recipe(rec) |> + add_model(mod) > > set.seed(2934) > lm_bayes <- tune_bayes(wflow, folds, initial = 4, iter = 3) → A | warning: prediction from rank-deficient fit; consider predict(., rankdeficient="NA") - There were issues with some computations A: x2 There were issues with some computations A: x5 There were issues with some computations A: x6 +There were issues with some computations A: x2 + +There were issues with some computations A: x5 + +There were issues with some computations A: x6 > > saveRDS( + lm_bayes, @@ -484,8 +712,8 @@ There were issues with some computations A: x4 B: x14 There were issues wit > # A single survival model > > set.seed(1) -> sim_dat <- prodlim::SimSurv(200) %>% -+ mutate(event_time = Surv(time, event)) %>% +> sim_dat <- prodlim::SimSurv(200) |> ++ mutate(event_time = Surv(time, event)) |> + select(event_time, X1, X2) > > set.seed(2) @@ -494,8 +722,8 @@ There were issues with some computations A: x4 B: x14 There were issues wit > time_points <- c(10, 1, 5, 15) > > boost_spec <- -+ boost_tree(trees = tune()) %>% -+ set_mode("censored regression") %>% ++ boost_tree(trees = tune()) |> ++ set_mode("censored regression") |> + set_engine("mboost") > > srv_mtr <- @@ -508,7 +736,7 @@ There were issues with some computations A: x4 B: x14 There were issues wit > > set.seed(2193) > surv_boost_tree_res <- -+ boost_spec %>% ++ boost_spec |> + tune_grid( + event_time ~ X1 + X2, + resamples = sim_rs, diff --git a/man/collect_predictions.Rd b/man/collect_predictions.Rd index b2ca16bc1..faea494bf 100644 --- a/man/collect_predictions.Rd +++ b/man/collect_predictions.Rd @@ -144,28 +144,28 @@ library(dplyr) library(recipes) library(tibble) -lm_mod <- linear_reg() \%>\% set_engine("lm") +lm_mod <- linear_reg() |> set_engine("lm") set.seed(93599150) car_folds <- vfold_cv(mtcars, v = 2, repeats = 3) ctrl <- control_resamples(save_pred = TRUE, extract = extract_fit_engine) spline_rec <- - recipe(mpg ~ ., data = mtcars) \%>\% + recipe(mpg ~ ., data = mtcars) |> step_spline_natural(disp, deg_free = tune("df")) grid <- tibble(df = 3:6) resampled <- - lm_mod \%>\% + lm_mod |> tune_grid(spline_rec, resamples = car_folds, control = ctrl, grid = grid) -collect_predictions(resampled) \%>\% arrange(.row) -collect_predictions(resampled, summarize = TRUE) \%>\% arrange(.row) +collect_predictions(resampled) |> arrange(.row) +collect_predictions(resampled, summarize = TRUE) |> arrange(.row) collect_predictions( resampled, summarize = TRUE, parameters = grid[1, ] -) \%>\% arrange(.row) +) |> arrange(.row) collect_extracts(resampled) \dontshow{\}) # examplesIf} diff --git a/man/conf_mat_resampled.Rd b/man/conf_mat_resampled.Rd index 6c6b6a202..08307698a 100644 --- a/man/conf_mat_resampled.Rd +++ b/man/conf_mat_resampled.Rd @@ -37,8 +37,8 @@ data(two_class_dat, package = "modeldata") set.seed(2393) res <- - logistic_reg() \%>\% - set_engine("glm") \%>\% + logistic_reg() |> + set_engine("glm") |> fit_resamples( Class ~ ., resamples = vfold_cv(two_class_dat, v = 3), diff --git a/man/dot-use_case_weights_with_yardstick.Rd b/man/dot-use_case_weights_with_yardstick.Rd index cd5bc781a..d231534e0 100644 --- a/man/dot-use_case_weights_with_yardstick.Rd +++ b/man/dot-use_case_weights_with_yardstick.Rd @@ -29,9 +29,9 @@ occur. library(parsnip) library(dplyr) -frequency_weights(1:10) \%>\% +frequency_weights(1:10) |> .use_case_weights_with_yardstick() -importance_weights(seq(1, 10, by = .1))\%>\% +importance_weights(seq(1, 10, by = .1))|> .use_case_weights_with_yardstick() } diff --git a/man/example_ames_knn.Rd b/man/example_ames_knn.Rd index 9640c6876..59ffa3ba0 100644 --- a/man/example_ames_knn.Rd +++ b/man/example_ames_knn.Rd @@ -40,13 +40,13 @@ rs_splits <- vfold_cv(ames_train, strata = "Sale_Price") # ------------------------------------------------------------------------------ ames_rec <- - recipe(Sale_Price ~ ., data = ames_train) \%>\% - step_log(Sale_Price, base = 10) \%>\% - step_YeoJohnson(Lot_Area, Gr_Liv_Area) \%>\% - step_other(Neighborhood, threshold = .1) \%>\% - step_dummy(all_nominal()) \%>\% - step_zv(all_predictors()) \%>\% - step_spline_natural(Longitude, deg_free = tune("lon")) \%>\% + recipe(Sale_Price ~ ., data = ames_train) |> + step_log(Sale_Price, base = 10) |> + step_YeoJohnson(Lot_Area, Gr_Liv_Area) |> + step_other(Neighborhood, threshold = .1) |> + step_dummy(all_nominal()) |> + step_zv(all_predictors()) |> + step_spline_natural(Longitude, deg_free = tune("lon")) |> step_spline_natural(Latitude, deg_free = tune("lat")) knn_model <- @@ -55,21 +55,21 @@ knn_model <- neighbors = tune("K"), weight_func = tune(), dist_power = tune() - ) \%>\% + ) |> set_engine("kknn") ames_wflow <- - workflow() \%>\% - add_recipe(ames_rec) \%>\% + workflow() |> + add_recipe(ames_rec) |> add_model(knn_model) ames_set <- - extract_parameter_set_dials(ames_wflow) \%>\% + extract_parameter_set_dials(ames_wflow) |> update(K = neighbors(c(1, 50))) set.seed(7014) ames_grid <- - ames_set \%>\% + ames_set |> grid_max_entropy(size = 10) ames_grid_search <- diff --git a/man/expo_decay.Rd b/man/expo_decay.Rd index 2956b8f7f..e3a69eec9 100644 --- a/man/expo_decay.Rd +++ b/man/expo_decay.Rd @@ -44,7 +44,7 @@ tibble( limit_val = 0, slope = 1 / 5 ) -) \%>\% +) |> ggplot(aes(x = iter, y = value)) + geom_path() \dontshow{\}) # examplesIf} diff --git a/man/extract-tune.Rd b/man/extract-tune.Rd index 0947109b5..16c929cc1 100644 --- a/man/extract-tune.Rd +++ b/man/extract-tune.Rd @@ -83,10 +83,10 @@ library(parsnip) set.seed(6735) tr_te_split <- initial_split(mtcars) -spline_rec <- recipe(mpg ~ ., data = mtcars) \%>\% +spline_rec <- recipe(mpg ~ ., data = mtcars) |> step_spline_natural(disp) -lin_mod <- linear_reg() \%>\% +lin_mod <- linear_reg() |> set_engine("lm") spline_res <- last_fit(lin_mod, spline_rec, split = tr_te_split) diff --git a/man/filter_parameters.Rd b/man/filter_parameters.Rd index a7800ec1f..7f1bc0453 100644 --- a/man/filter_parameters.Rd +++ b/man/filter_parameters.Rd @@ -45,24 +45,24 @@ data("example_ames_knn") ## ----------------------------------------------------------------------------- # select all combinations using the 'rank' weighting scheme -ames_grid_search \%>\% +ames_grid_search |> collect_metrics() -filter_parameters(ames_grid_search, weight_func == "rank") \%>\% +filter_parameters(ames_grid_search, weight_func == "rank") |> collect_metrics() rank_only <- tibble::tibble(weight_func = "rank") -filter_parameters(ames_grid_search, parameters = rank_only) \%>\% +filter_parameters(ames_grid_search, parameters = rank_only) |> collect_metrics() ## ----------------------------------------------------------------------------- # Keep only the results from the numerically best combination -ames_iter_search \%>\% +ames_iter_search |> collect_metrics() best_param <- select_best(ames_iter_search, metric = "rmse") -ames_iter_search \%>\% - filter_parameters(parameters = best_param) \%>\% +ames_iter_search |> + filter_parameters(parameters = best_param) |> collect_metrics() } diff --git a/man/finalize_model.Rd b/man/finalize_model.Rd index 33548000c..dcfc34416 100644 --- a/man/finalize_model.Rd +++ b/man/finalize_model.Rd @@ -42,7 +42,7 @@ knn_model <- neighbors = tune("K"), weight_func = tune(), dist_power = tune() - ) \%>\% + ) |> set_engine("kknn") lowest_rmse <- select_best(ames_grid_search, metric = "rmse") diff --git a/man/fit_best.Rd b/man/fit_best.Rd index eeac15064..8afa629a6 100644 --- a/man/fit_best.Rd +++ b/man/fit_best.Rd @@ -116,7 +116,7 @@ library(parsnip) library(dplyr) data(meats, package = "modeldata") -meats <- meats \%>\% select(-water, -fat) +meats <- meats |> select(-water, -fat) set.seed(1) meat_split <- initial_split(meats) @@ -127,11 +127,11 @@ set.seed(2) meat_rs <- vfold_cv(meat_train, v = 10) pca_rec <- - recipe(protein ~ ., data = meat_train) \%>\% - step_normalize(all_numeric_predictors()) \%>\% + recipe(protein ~ ., data = meat_train) |> + step_normalize(all_numeric_predictors()) |> step_pca(all_numeric_predictors(), num_comp = tune()) -knn_mod <- nearest_neighbor(neighbors = tune()) \%>\% set_mode("regression") +knn_mod <- nearest_neighbor(neighbors = tune()) |> set_mode("regression") ctrl <- control_grid(save_workflow = TRUE) diff --git a/man/fit_resamples.Rd b/man/fit_resamples.Rd index 1fdc34ceb..d5cb9aefc 100644 --- a/man/fit_resamples.Rd +++ b/man/fit_resamples.Rd @@ -215,11 +215,11 @@ library(workflows) set.seed(6735) folds <- vfold_cv(mtcars, v = 5) -spline_rec <- recipe(mpg ~ ., data = mtcars) \%>\% - step_spline_natural(disp) \%>\% +spline_rec <- recipe(mpg ~ ., data = mtcars) |> + step_spline_natural(disp) |> step_spline_natural(wt) -lin_mod <- linear_reg() \%>\% +lin_mod <- linear_reg() |> set_engine("lm") control <- control_resamples(save_pred = TRUE) @@ -234,8 +234,8 @@ show_best(spline_res, metric = "rmse") # supply that to `fit_resamples()` instead. Here, a workflows "variables" # preprocessor is used, which lets you supply terms using dplyr selectors. # The variables are used as-is, no preprocessing is done to them. -wf <- workflow() \%>\% - add_variables(outcomes = mpg, predictors = everything()) \%>\% +wf <- workflow() |> + add_variables(outcomes = mpg, predictors = everything()) |> add_model(lin_mod) wf_res <- fit_resamples(wf, folds) diff --git a/man/int_pctl.tune_results.Rd b/man/int_pctl.tune_results.Rd index 940f041b3..d93b2c05a 100644 --- a/man/int_pctl.tune_results.Rd +++ b/man/int_pctl.tune_results.Rd @@ -87,7 +87,7 @@ if (rlang::is_installed("modeldata")) { sac_rs <- vfold_cv(Sacramento) lm_res <- - linear_reg() \%>\% + linear_reg() |> fit_resamples( log10(price) ~ beds + baths + sqft + type + latitude + longitude, resamples = sac_rs, diff --git a/man/last_fit.Rd b/man/last_fit.Rd index 0e636105e..cf0dbcbba 100644 --- a/man/last_fit.Rd +++ b/man/last_fit.Rd @@ -160,10 +160,10 @@ library(parsnip) set.seed(6735) tr_te_split <- initial_split(mtcars) -spline_rec <- recipe(mpg ~ ., data = mtcars) \%>\% +spline_rec <- recipe(mpg ~ ., data = mtcars) |> step_spline_natural(disp) -lin_mod <- linear_reg() \%>\% +lin_mod <- linear_reg() |> set_engine("lm") spline_res <- last_fit(lin_mod, spline_rec, split = tr_te_split) @@ -179,8 +179,8 @@ collect_predictions(spline_res) library(workflows) spline_wfl <- - workflow() \%>\% - add_recipe(spline_rec) \%>\% + workflow() |> + add_recipe(spline_rec) |> add_model(lin_mod) last_fit(spline_wfl, split = tr_te_split) diff --git a/man/merge.recipe.Rd b/man/merge.recipe.Rd index e7a6e7f52..99221a246 100644 --- a/man/merge.recipe.Rd +++ b/man/merge.recipe.Rd @@ -33,8 +33,8 @@ library(parsnip) library(dials) pca_rec <- - recipe(mpg ~ ., data = mtcars) \%>\% - step_impute_knn(all_predictors(), neighbors = tune()) \%>\% + recipe(mpg ~ ., data = mtcars) |> + step_impute_knn(all_predictors(), neighbors = tune()) |> step_pca(all_predictors(), num_comp = tune()) pca_grid <- @@ -49,8 +49,8 @@ pca_grid <- merge(pca_rec, pca_grid) spline_rec <- - recipe(mpg ~ ., data = mtcars) \%>\% - step_spline_natural(disp, deg_free = tune("disp df")) \%>\% + recipe(mpg ~ ., data = mtcars) |> + step_spline_natural(disp, deg_free = tune("disp df")) |> step_spline_natural(wt, deg_free = tune("wt df")) spline_grid <- @@ -67,13 +67,13 @@ merge(pca_rec, pca_grid) data(hpc_data, package = "modeldata") xgb_mod <- - boost_tree(trees = tune(), min_n = tune()) \%>\% + boost_tree(trees = tune(), min_n = tune()) |> set_engine("xgboost") set.seed(254) xgb_grid <- - extract_parameter_set_dials(xgb_mod) \%>\% - finalize(hpc_data) \%>\% + extract_parameter_set_dials(xgb_mod) |> + finalize(hpc_data) |> grid_max_entropy(size = 3) merge(xgb_mod, xgb_grid) diff --git a/man/min_grid.Rd b/man/min_grid.Rd index 1eba24ef9..f80718fac 100644 --- a/man/min_grid.Rd +++ b/man/min_grid.Rd @@ -71,13 +71,13 @@ library(parsnip) ## No ability to exploit submodels: svm_spec <- - svm_poly(cost = tune(), degree = tune()) \%>\% - set_engine("kernlab") \%>\% + svm_poly(cost = tune(), degree = tune()) |> + set_engine("kernlab") |> set_mode("regression") svm_grid <- - svm_spec \%>\% - extract_parameter_set_dials() \%>\% + svm_spec |> + extract_parameter_set_dials() |> grid_regular(levels = 3) min_grid(svm_spec, svm_grid) @@ -86,13 +86,13 @@ min_grid(svm_spec, svm_grid) ## Can use submodels xgb_spec <- - boost_tree(trees = tune(), min_n = tune()) \%>\% - set_engine("xgboost") \%>\% + boost_tree(trees = tune(), min_n = tune()) |> + set_engine("xgboost") |> set_mode("regression") xgb_grid <- - xgb_spec \%>\% - extract_parameter_set_dials() \%>\% + xgb_spec |> + extract_parameter_set_dials() |> grid_regular(levels = 3) min_grid(xgb_spec, xgb_grid) diff --git a/man/outcome_names.Rd b/man/outcome_names.Rd index 1066d9aa5..56ade543e 100644 --- a/man/outcome_names.Rd +++ b/man/outcome_names.Rd @@ -34,8 +34,8 @@ Determine names of the outcome data in a workflow } \examples{ library(dplyr) -lm(cbind(mpg, wt) ~ ., data = mtcars) \%>\% - purrr::pluck(terms) \%>\% +lm(cbind(mpg, wt) ~ ., data = mtcars) |> + purrr::pluck(terms) |> outcome_names() } \keyword{internal} diff --git a/man/tune_bayes.Rd b/man/tune_bayes.Rd index e4bf53197..71082373e 100644 --- a/man/tune_bayes.Rd +++ b/man/tune_bayes.Rd @@ -303,13 +303,13 @@ set.seed(6735) folds <- vfold_cv(mtcars, v = 5) car_rec <- - recipe(mpg ~ ., data = mtcars) \%>\% + recipe(mpg ~ ., data = mtcars) |> step_normalize(all_predictors()) # define an svm with parameters to tune svm_mod <- - svm_rbf(cost = tune(), rbf_sigma = tune()) \%>\% - set_engine("kernlab") \%>\% + svm_rbf(cost = tune(), rbf_sigma = tune()) |> + set_engine("kernlab") |> set_mode("regression") # use a space-filling design with 6 points diff --git a/man/tune_grid.Rd b/man/tune_grid.Rd index e686ab700..976829f29 100644 --- a/man/tune_grid.Rd +++ b/man/tune_grid.Rd @@ -292,12 +292,12 @@ folds <- vfold_cv(mtcars, v = 5) # tuning recipe parameters: spline_rec <- - recipe(mpg ~ ., data = mtcars) \%>\% - step_spline_natural(disp, deg_free = tune("disp")) \%>\% + recipe(mpg ~ ., data = mtcars) |> + step_spline_natural(disp, deg_free = tune("disp")) |> step_spline_natural(wt, deg_free = tune("wt")) lin_mod <- - linear_reg() \%>\% + linear_reg() |> set_engine("lm") # manually create a grid @@ -317,12 +317,12 @@ show_best(spline_res, metric = "rmse") # tune model parameters only (example requires the `kernlab` package) car_rec <- - recipe(mpg ~ ., data = mtcars) \%>\% + recipe(mpg ~ ., data = mtcars) |> step_normalize(all_predictors()) svm_mod <- - svm_rbf(cost = tune(), rbf_sigma = tune()) \%>\% - set_engine("kernlab") \%>\% + svm_rbf(cost = tune(), rbf_sigma = tune()) |> + set_engine("kernlab") |> set_mode("regression") # Use a space-filling design with 7 points @@ -343,8 +343,8 @@ autoplot(svm_res, metric = "rmse") + # to `tune_grid()`, you can also wrap them up in a workflow and pass # that along instead (note that this doesn't do any preprocessing to # the variables, it passes them along as-is). -wf <- workflow() \%>\% - add_variables(outcomes = mpg, predictors = everything()) \%>\% +wf <- workflow() |> + add_variables(outcomes = mpg, predictors = everything()) |> add_model(svm_mod) set.seed(3254) diff --git a/revdep/problems.md b/revdep/problems.md index ffe43271f..4267cf130 100644 --- a/revdep/problems.md +++ b/revdep/problems.md @@ -28,7 +28,7 @@ Run `revdepcheck::revdep_details(, "finetune")` for more info Backtrace: ... ▆ - 1. ├─cart_search %>% filter(.iter == iter_val) %>% ... at test-sa-decision.R:16:5 + 1. ├─cart_search |> filter(.iter == iter_val) |> ... at test-sa-decision.R:16:5 2. └─tune:::new_tune_results(...) 3. └─tune:::new_bare_tibble(...) 4. └─tibble::new_tibble(x, nrow = nrow(x), ..., class = class) @@ -62,11 +62,11 @@ Run `revdepcheck::revdep_details(, "tidyclust")` for more info Running the tests in ‘tests/testthat.R’ failed. Last 13 lines of output: Error in `if (metric_info$direction == "maximize") { - summary_res <- summary_res %>% dplyr::arrange(dplyr::desc(mean)) + summary_res <- summary_res |> dplyr::arrange(dplyr::desc(mean)) } else if (metric_info$direction == "minimize") { - summary_res <- summary_res %>% dplyr::arrange(mean) + summary_res <- summary_res |> dplyr::arrange(mean) } else if (metric_info$direction == "zero") { - summary_res <- summary_res %>% dplyr::arrange(abs(mean)) + summary_res <- summary_res |> dplyr::arrange(abs(mean)) }`: argument is of length zero Backtrace: ▆ @@ -115,7 +115,7 @@ Run `revdepcheck::revdep_details(, "tidysdm")` for more info Warning: Unknown or uninitialised column: `metric`. Error in if (!any(mtr_info$metric == metric)) { : missing value where TRUE/FALSE needed - Calls: %>% ... add_member.tune_results -> -> check_metric_in_tune_results + Calls: |> ... add_member.tune_results -> -> check_metric_in_tune_results Execution halted ``` @@ -126,14 +126,14 @@ Run `revdepcheck::revdep_details(, "tidysdm")` for more info ERROR Running the tests in ‘tests/testthat.R’ failed. Last 13 lines of output: - 1. ├─testthat::expect_warning(test_ens <- simple_ensemble() %>% add_member(none_mars)) at test_simple_ensemble.R:14:3 + 1. ├─testthat::expect_warning(test_ens <- simple_ensemble() |> add_member(none_mars)) at test_simple_ensemble.R:14:3 2. │ └─testthat:::expect_condition_matching(...) 3. │ └─testthat:::quasi_capture(...) 4. │ ├─testthat (local) .capture(...) 5. │ │ └─base::withCallingHandlers(...) ... 6. │ └─rlang::eval_bare(quo_get_expr(.quo), quo_get_env(.quo)) - 7. ├─simple_ensemble() %>% add_member(none_mars) + 7. ├─simple_ensemble() |> add_member(none_mars) 8. ├─tidysdm::add_member(., none_mars) 9. └─tidysdm:::add_member.tune_results(., none_mars) 10. └─(utils::getFromNamespace("choose_metric", "tune"))(metric, member) diff --git a/tests/testthat/_snaps/bayes.md b/tests/testthat/_snaps/bayes.md index 0fb736b85..26f33f7b5 100644 --- a/tests/testthat/_snaps/bayes.md +++ b/tests/testthat/_snaps/bayes.md @@ -519,7 +519,7 @@ Code set.seed(1) - res <- mod %>% tune_bayes(Sale_Price ~ Neighborhood + Gr_Liv_Area + Year_Built + + res <- tune_bayes(mod, Sale_Price ~ Neighborhood + Gr_Liv_Area + Year_Built + Bldg_Type + Latitude + Longitude, resamples = folds, initial = 3, metrics = yardstick::metric_set( rsq), param_info = parameters(dials::cost_complexity(c(-2, 0)))) Message @@ -561,8 +561,8 @@ Code set.seed(2) - res_fail <- mod %>% tune_bayes(Sale_Price ~ Neighborhood + Gr_Liv_Area + - Year_Built + Bldg_Type + Latitude + Longitude, resamples = folds, initial = 5, + res_fail <- tune_bayes(mod, Sale_Price ~ Neighborhood + Gr_Liv_Area + + Year_Built + Bldg_Type + Latitude + Longitude, resamples = folds, initial = 5, metrics = yardstick::metric_set(rsq), param_info = parameters(dials::cost_complexity( c(0.5, 0)))) Message diff --git a/tests/testthat/_snaps/censored-reg.md b/tests/testthat/_snaps/censored-reg.md index 53a43be36..765c67a60 100644 --- a/tests/testthat/_snaps/censored-reg.md +++ b/tests/testthat/_snaps/censored-reg.md @@ -1,7 +1,7 @@ # evaluation time Code - spec %>% tune_grid(Surv(time, status) ~ ., resamples = rs, metrics = mtr) + tune_grid(spec, Surv(time, status) ~ ., resamples = rs, metrics = mtr) Condition Error in `check_enough_eval_times()`: ! At least 1 evaluation time is required for the metric type(s) requested: "dynamic_survival_metric". Only 0 unique times were given. @@ -9,7 +9,7 @@ --- Code - spec %>% tune_grid(Surv(time, status) ~ ., resamples = rs, metrics = reg_mtr) + tune_grid(spec, Surv(time, status) ~ ., resamples = rs, metrics = reg_mtr) Condition Error in `tune_grid()`: ! The parsnip model has `mode` value of "censored regression", but the `metrics` is a metric set for a different model mode. @@ -17,8 +17,7 @@ --- Code - linear_reg() %>% tune_grid(age ~ ., resamples = rs, metrics = reg_mtr, - eval_time = 1) + tune_grid(linear_reg(), age ~ ., resamples = rs, metrics = reg_mtr, eval_time = 1) Condition Warning in `tune_grid()`: `eval_time` is only used for models with mode "censored regression". @@ -45,7 +44,7 @@ --- Code - no_usable_times <- spec %>% tune_grid(Surv(time, status) ~ ., resamples = rs, + no_usable_times <- tune_grid(spec, Surv(time, status) ~ ., resamples = rs, metrics = mtr, eval_time = c(-1, Inf)) Condition Error: diff --git a/tests/testthat/_snaps/collect.md b/tests/testthat/_snaps/collect.md index a8e051db0..603ccfa0a 100644 --- a/tests/testthat/_snaps/collect.md +++ b/tests/testthat/_snaps/collect.md @@ -1,7 +1,7 @@ # `collect_predictions()` errors informatively if there is no `.predictions` column Code - collect_predictions(lm_splines %>% dplyr::select(-.predictions)) + collect_predictions(dplyr::select(lm_splines, -.predictions)) Condition Error in `collect_predictions()`: ! The .predictions column does not exist. Please refit with the control argument `save_pred = TRUE` to save predictions. diff --git a/tests/testthat/_snaps/engine-parameters.md b/tests/testthat/_snaps/engine-parameters.md index 09f882128..7b4ef971f 100644 --- a/tests/testthat/_snaps/engine-parameters.md +++ b/tests/testthat/_snaps/engine-parameters.md @@ -1,7 +1,7 @@ # tuning with engine parameters without dials objects Code - rf_tune <- rf_mod %>% tune_grid(mpg ~ ., resamples = rs, grid = 3) + rf_tune <- tune_grid(rf_mod, mpg ~ ., resamples = rs, grid = 3) Condition Error in `check_param_objects()`: ! The workflow has arguments to be tuned that are missing some parameter objects: "corr.bias" @@ -17,7 +17,7 @@ --- Code - rf_search <- rf_mod %>% tune_bayes(mpg ~ ., resamples = rs) + rf_search <- tune_bayes(rf_mod, mpg ~ ., resamples = rs) Condition Error in `check_param_objects()`: ! The workflow has arguments to be tuned that are missing some parameter objects: "corr.bias" diff --git a/tests/testthat/_snaps/finalization.md b/tests/testthat/_snaps/finalization.md index 780f74bac..ee688489d 100644 --- a/tests/testthat/_snaps/finalization.md +++ b/tests/testthat/_snaps/finalization.md @@ -1,7 +1,7 @@ # cannot finalize with recipe parameters Code - mod_1 %>% tune_grid(rec_1, resamples = rs, grid = 3) + tune_grid(mod_1, rec_1, resamples = rs, grid = 3) Condition Error in `check_parameters()`: ! Some model parameters require finalization but there are recipe parameters that require tuning. diff --git a/tests/testthat/_snaps/last-fit.md b/tests/testthat/_snaps/last-fit.md index 6ea966475..e9c3dbc3a 100644 --- a/tests/testthat/_snaps/last-fit.md +++ b/tests/testthat/_snaps/last-fit.md @@ -18,7 +18,7 @@ # ellipses with last_fit Code - linear_reg() %>% set_engine("lm") %>% last_fit(f, split, something = "wrong") + last_fit(set_engine(linear_reg(), "lm"), f, split, something = "wrong") Condition Warning: The `...` are not used in this function but 1 object was passed: "something" diff --git a/tests/testthat/_snaps/logging.md b/tests/testthat/_snaps/logging.md index f2147c7bc..df548044c 100644 --- a/tests/testthat/_snaps/logging.md +++ b/tests/testthat/_snaps/logging.md @@ -129,7 +129,7 @@ # logging search info Code - check_and_log_flow(ctrl_t, tb_1 %>% mutate(.mean = .mean * NA)) + check_and_log_flow(ctrl_t, mutate(tb_1, .mean = .mean * NA)) Message x Skipping to next iteration Condition @@ -139,7 +139,7 @@ --- Code - check_and_log_flow(ctrl_t, tb_1 %>% mutate(.mean = .mean * NA) %>% slice(1)) + check_and_log_flow(ctrl_t, slice(mutate(tb_1, .mean = .mean * NA), 1)) Message x Halting search Condition diff --git a/tests/testthat/_snaps/notes.md b/tests/testthat/_snaps/notes.md index 6eb3e5efd..e1a6b7f8f 100644 --- a/tests/testthat/_snaps/notes.md +++ b/tests/testthat/_snaps/notes.md @@ -1,7 +1,7 @@ # showing notes Code - res_roles <- role_bp_wflow %>% fit_resamples(rs) + res_roles <- fit_resamples(role_bp_wflow, rs) Message x Fold01: preprocessor 1/1, model 1/1 (predictions): Error in `step_date()`: @@ -50,7 +50,7 @@ --- Code - res_simple <- simple_wflow %>% fit_resamples(rs) + res_simple <- fit_resamples(simple_wflow, rs) Message ! Fold01: preprocessor 1/1, model 1/1 (predictions): prediction from rank-deficient fit; consider predict(., rankdeficient="NA") ! Fold02: preprocessor 1/1, model 1/1 (predictions): prediction from rank-deficient fit; consider predict(., rankdeficient="NA") @@ -82,7 +82,7 @@ --- Code - fit_lr <- parsnip::logistic_reg() %>% fit_resamples(class ~ ., rs) + fit_lr <- fit_resamples(parsnip::logistic_reg(), class ~ ., rs) Message ! Fold01: preprocessor 1/1, model 1/1: glm.fit: algorithm did not converge, glm.fit: fitted probabilities numer... ! Fold01: internal: No control observations were detected in `truth` with control level 'cla... diff --git a/tests/testthat/_snaps/resample.md b/tests/testthat/_snaps/resample.md index 835082c94..e44dd3eb7 100644 --- a/tests/testthat/_snaps/resample.md +++ b/tests/testthat/_snaps/resample.md @@ -89,7 +89,7 @@ # `tune_grid()` ignores `grid` if there are no tuning parameters Code - result <- lin_mod %>% tune_grid(mpg ~ ., grid = data.frame(x = 1), folds) + result <- tune_grid(lin_mod, mpg ~ ., grid = data.frame(x = 1), folds) Condition Warning: No tuning parameters have been detected, performance will be evaluated using the resamples with no tuning. @@ -106,7 +106,7 @@ # ellipses with fit_resamples Code - lin_mod %>% fit_resamples(mpg ~ ., folds, something = "wrong") + fit_resamples(lin_mod, mpg ~ ., folds, something = "wrong") Condition Warning: The `...` are not used in this function but 1 object was passed: "something" diff --git a/tests/testthat/helper-merge.R b/tests/testthat/helper-merge.R index 1d1b13f58..f653661d8 100644 --- a/tests/testthat/helper-merge.R +++ b/tests/testthat/helper-merge.R @@ -3,7 +3,7 @@ check_merged_tibble <- function(x, type = "recipe", complete = TRUE) { expect_equal(names(x), "x") expect_true(all(purrr::map_lgl(x$x, inherits, type))) if (complete) { - any_args <- purrr::map_int(x$x, ~ tune_args(.x) %>% nrow()) + any_args <- purrr::map_int(x$x, ~ tune_args(.x) |> nrow()) expect_true(!any(any_args > 0)) } invisible(TRUE) diff --git a/tests/testthat/helper-tune-package.R b/tests/testthat/helper-tune-package.R index c6c1506c3..3d64daa53 100644 --- a/tests/testthat/helper-tune-package.R +++ b/tests/testthat/helper-tune-package.R @@ -6,17 +6,17 @@ rankdeficient_version <- any(names(formals("predict.lm")) == "rankdeficient") helper_objects_tune <- function() { rec_tune_1 <- - recipes::recipe(mpg ~ ., data = mtcars) %>% - recipes::step_normalize(recipes::all_predictors()) %>% + recipes::recipe(mpg ~ ., data = mtcars) |> + recipes::step_normalize(recipes::all_predictors()) |> recipes::step_pca(recipes::all_predictors(), num_comp = tune()) rec_no_tune_1 <- - recipes::recipe(mpg ~ ., data = mtcars) %>% + recipes::recipe(mpg ~ ., data = mtcars) |> recipes::step_normalize(recipes::all_predictors()) - lm_mod <- parsnip::linear_reg() %>% parsnip::set_engine("lm") + lm_mod <- parsnip::linear_reg() |> parsnip::set_engine("lm") - svm_mod <- parsnip::svm_rbf(mode = "regression", cost = tune()) %>% + svm_mod <- parsnip::svm_rbf(mode = "regression", cost = tune()) |> parsnip::set_engine("kernlab") list( @@ -75,13 +75,13 @@ redefer_initialize_catalog <- function(test_env) { if (rlang::is_installed("splines2")) { rec_no_tune <- - recipes::recipe(mpg ~ ., data = mtcars) %>% - recipes::step_corr(all_predictors(), threshold = .1) %>% + recipes::recipe(mpg ~ ., data = mtcars) |> + recipes::step_corr(all_predictors(), threshold = .1) |> recipes::step_spline_natural(disp, deg_free = 5) rec_tune <- - recipes::recipe(mpg ~ ., data = mtcars) %>% - recipes::step_corr(all_predictors(), threshold = tune()) %>% + recipes::recipe(mpg ~ ., data = mtcars) |> + recipes::step_corr(all_predictors(), threshold = tune()) |> recipes::step_spline_natural(disp, deg_free = tune("disp_df")) } @@ -109,15 +109,15 @@ mod_tune_no_submodel <- if (rlang::is_installed("probably")) { tlr_tune <- - tailor::tailor() %>% + tailor::tailor() |> tailor::adjust_numeric_range(lower_limit = tune()) tlr_tune_cal <- - tailor::tailor() %>% - tailor::adjust_numeric_calibration(method = "linear") %>% + tailor::tailor() |> + tailor::adjust_numeric_calibration(method = "linear") |> tailor::adjust_numeric_range(lower_limit = tune()) tlr_no_tune <- - tailor::tailor() %>% + tailor::tailor() |> tailor::adjust_numeric_range(lower_limit = 0) } diff --git a/tests/testthat/test-GP.R b/tests/testthat/test-GP.R index 1b994e7e2..97f052741 100644 --- a/tests/testthat/test-GP.R +++ b/tests/testthat/test-GP.R @@ -54,8 +54,8 @@ test_that("GP scoring", { ctrl <- control_bayes() curr <- - collect_metrics(svm_results) %>% - dplyr::filter(.metric == "accuracy") %>% + collect_metrics(svm_results) |> + dplyr::filter(.metric == "accuracy") |> mutate(.iter = 0) svm_gp <- diff --git a/tests/testthat/test-acquisition.R b/tests/testthat/test-acquisition.R index 7d26c00f6..5343109a8 100644 --- a/tests/testthat/test-acquisition.R +++ b/tests/testthat/test-acquisition.R @@ -15,19 +15,19 @@ test_that("conf_bound calculations", { expect_equal( predict(conf_bound(kappa = 1), test_res, maximize = TRUE, iter = 1), - test_res %>% mutate(objective = .mean + 1 * .sd) %>% select(objective) + test_res |> mutate(objective = .mean + 1 * .sd) |> select(objective) ) expect_equal( predict(conf_bound(2), test_res, maximize = TRUE, iter = 1), - test_res %>% mutate(objective = .mean + 2 * .sd) %>% select(objective) + test_res |> mutate(objective = .mean + 2 * .sd) |> select(objective) ) expect_equal( predict(conf_bound(dbled), test_res, maximize = TRUE, iter = 2), - test_res %>% mutate(objective = .mean + 4 * .sd) %>% select(objective) + test_res |> mutate(objective = .mean + 4 * .sd) |> select(objective) ) expect_equal( predict(conf_bound(kappa = 1), test_res, maximize = FALSE, iter = 1), - test_res %>% mutate(objective = -(.mean + 1 * .sd)) %>% select(objective) + test_res |> mutate(objective = -(.mean + 1 * .sd)) |> select(objective) ) }) @@ -50,40 +50,40 @@ test_that("prob_improve calculations", { expect_equal( predict(prob_improve(), test_res, maximize = TRUE, iter = 1, best = 15), - test_res %>% - mutate(objective = pnorm((.mean - 15) / .sd)) %>% + test_res |> + mutate(objective = pnorm((.mean - 15) / .sd)) |> select(objective) ) expect_equal( predict(prob_improve(), test_res, maximize = FALSE, iter = 1, best = -2), - test_res %>% - mutate(objective = pnorm((-2 - .mean) / .sd)) %>% + test_res |> + mutate(objective = pnorm((-2 - .mean) / .sd)) |> select(objective) ) expect_equal( predict(prob_improve(.1), test_res, maximize = TRUE, iter = 1, best = 15), - test_res %>% - mutate(objective = pnorm((.mean - 15 - .1) / .sd)) %>% + test_res |> + mutate(objective = pnorm((.mean - 15 - .1) / .sd)) |> select(objective) ) expect_equal( predict(prob_improve(.1), test_res, maximize = FALSE, iter = 1, best = -2), - test_res %>% - mutate(objective = pnorm((-2 + .1 - .mean) / .sd)) %>% + test_res |> + mutate(objective = pnorm((-2 + .1 - .mean) / .sd)) |> select(objective) ) expect_equal( predict(prob_improve(dbled), test_res, maximize = TRUE, iter = 2, best = 15), - test_res %>% - mutate(objective = pnorm((.mean - 15 - 4) / .sd)) %>% + test_res |> + mutate(objective = pnorm((.mean - 15 - 4) / .sd)) |> select(objective) ) expect_equal( predict(prob_improve(dbled), test_res, maximize = FALSE, iter = 4, best = -2), - test_res %>% - mutate(objective = pnorm((-2 + 16 - .mean) / .sd)) %>% + test_res |> + mutate(objective = pnorm((-2 + 16 - .mean) / .sd)) |> select(objective) ) }) @@ -107,58 +107,58 @@ test_that("exp_improve calculations", { expect_equal( predict(exp_improve(), test_res, maximize = TRUE, iter = 1, best = 15), - test_res %>% + test_res |> mutate( diff = .mean - 15, objective = (diff * pnorm(diff / .sd)) + (.sd * dnorm(diff / .sd)) - ) %>% + ) |> select(objective) ) expect_equal( predict(exp_improve(), test_res, maximize = FALSE, iter = 1, best = 15), - test_res %>% + test_res |> mutate( diff = 15 - .mean, objective = (diff * pnorm(diff / .sd)) + (.sd * dnorm(diff / .sd)) - ) %>% + ) |> select(objective) ) expect_equal( predict(exp_improve(1), test_res, maximize = TRUE, iter = 1, best = 15), - test_res %>% + test_res |> mutate( diff = .mean - 16, objective = (diff * pnorm(diff / .sd)) + (.sd * dnorm(diff / .sd)) - ) %>% + ) |> select(objective) ) expect_equal( predict(exp_improve(1), test_res, maximize = FALSE, iter = 1, best = 15), - test_res %>% + test_res |> mutate( diff = 16 - .mean, objective = (diff * pnorm(diff / .sd)) + (.sd * dnorm(diff / .sd)) - ) %>% + ) |> select(objective) ) expect_equal( predict(exp_improve(dbled), test_res, maximize = TRUE, iter = 2, best = 15), - test_res %>% + test_res |> mutate( diff = .mean - 19, objective = (diff * pnorm(diff / .sd)) + (.sd * dnorm(diff / .sd)) - ) %>% + ) |> select(objective) ) expect_equal( predict(exp_improve(dbled), test_res, maximize = FALSE, iter = 2, best = 15), - test_res %>% + test_res |> mutate( diff = 19 - .mean, objective = (diff * pnorm(diff / .sd)) + (.sd * dnorm(diff / .sd)) - ) %>% + ) |> select(objective) ) }) diff --git a/tests/testthat/test-augment.R b/tests/testthat/test-augment.R index f9d6ccf25..9ff3d3edf 100644 --- a/tests/testthat/test-augment.R +++ b/tests/testthat/test-augment.R @@ -5,7 +5,7 @@ test_that("augment fit_resamples", { # ------------------------------------------------------------------------------ - lr_spec <- parsnip::logistic_reg() %>% parsnip::set_engine("glm") + lr_spec <- parsnip::logistic_reg() |> parsnip::set_engine("glm") set.seed(1) two_class_dat <- as.data.frame(two_class_dat) @@ -48,7 +48,7 @@ test_that("augment fit_resamples", { # ------------------------------------------------------------------------------ skip_if(new_rng_snapshots) - lr_spec <- parsnip::logistic_reg() %>% parsnip::set_engine("glm") + lr_spec <- parsnip::logistic_reg() |> parsnip::set_engine("glm") set.seed(1) two_class_dat <- as.data.frame(two_class_dat) @@ -86,8 +86,8 @@ test_that("augment fit_resamples", { test_that("augment tune_grid", { skip_if_not_installed("kernlab") - svm_spec <- parsnip::svm_linear(cost = tune(), margin = 0.1) %>% - parsnip::set_engine("kernlab") %>% + svm_spec <- parsnip::svm_linear(cost = tune(), margin = 0.1) |> + parsnip::set_engine("kernlab") |> parsnip::set_mode("regression") set.seed(1) cv1 <- rsample::vfold_cv(mtcars) @@ -164,7 +164,7 @@ test_that("augment last_fit", { # ------------------------------------------------------------------------------ - lr_spec <- parsnip::logistic_reg() %>% parsnip::set_engine("glm") + lr_spec <- parsnip::logistic_reg() |> parsnip::set_engine("glm") set.seed(1) split <- rsample::initial_split(two_class_dat) fit_1 <- last_fit(lr_spec, Class ~ ., split = split) diff --git a/tests/testthat/test-autoplot.R b/tests/testthat/test-autoplot.R index 320bb0c5b..a2220177e 100644 --- a/tests/testthat/test-autoplot.R +++ b/tests/testthat/test-autoplot.R @@ -222,9 +222,9 @@ test_that("1D regular grid x labels", { set.seed(1) res <- - parsnip::svm_rbf(cost = tune()) %>% - parsnip::set_engine("kernlab") %>% - parsnip::set_mode("regression") %>% + parsnip::svm_rbf(cost = tune()) |> + parsnip::set_engine("kernlab") |> + parsnip::set_mode("regression") |> tune_grid(mpg ~ ., resamples = rsample::vfold_cv(mtcars, v = 5), grid = 3) expect_equal(autoplot(res)$labels$x, c(cost = "Cost")) }) @@ -335,18 +335,18 @@ test_that("regular grid plot", { skip_if_not_installed("kernlab") svm_spec <- - parsnip::svm_rbf(cost = tune()) %>% - parsnip::set_engine("kernlab") %>% + parsnip::svm_rbf(cost = tune()) |> + parsnip::set_engine("kernlab") |> parsnip::set_mode("regression") svm_grid <- - svm_spec %>% - extract_parameter_set_dials() %>% + svm_spec |> + extract_parameter_set_dials() |> dials::grid_regular(levels = 1) set.seed(1) res <- - svm_spec %>% + svm_spec |> tune_grid(mpg ~ ., resamples = rsample::vfold_cv(mtcars, v = 5), grid = svm_grid) expect_snapshot( @@ -360,9 +360,9 @@ test_that("evaluation time warning for non-survival model", { set.seed(1) res <- - parsnip::svm_rbf(cost = tune()) %>% - parsnip::set_engine("kernlab") %>% - parsnip::set_mode("regression") %>% + parsnip::svm_rbf(cost = tune()) |> + parsnip::set_engine("kernlab") |> + parsnip::set_mode("regression") |> tune_grid(mpg ~ ., resamples = rsample::vfold_cv(mtcars, v = 5), grid = 2) expect_snapshot(foo <- autoplot(res, metric = "rmse", eval_time = 10)) diff --git a/tests/testthat/test-bayes.R b/tests/testthat/test-bayes.R index e334901f9..5e2874150 100644 --- a/tests/testthat/test-bayes.R +++ b/tests/testthat/test-bayes.R @@ -1,15 +1,15 @@ rec_tune_1 <- - recipes::recipe(mpg ~ ., data = mtcars) %>% - recipes::step_normalize(recipes::all_predictors()) %>% + recipes::recipe(mpg ~ ., data = mtcars) |> + recipes::step_normalize(recipes::all_predictors()) |> recipes::step_pca(recipes::all_predictors(), num_comp = tune()) rec_no_tune_1 <- - recipes::recipe(mpg ~ ., data = mtcars) %>% + recipes::recipe(mpg ~ ., data = mtcars) |> recipes::step_normalize(recipes::all_predictors()) -lm_mod <- parsnip::linear_reg() %>% parsnip::set_engine("lm") +lm_mod <- parsnip::linear_reg() |> parsnip::set_engine("lm") -svm_mod <- parsnip::svm_rbf(mode = "regression", cost = tune()) %>% +svm_mod <- parsnip::svm_rbf(mode = "regression", cost = tune()) |> parsnip::set_engine("kernlab") iter1 <- 2 @@ -20,10 +20,10 @@ iterT <- iter1 + iter2 test_that("tune recipe only", { set.seed(4400) - wflow <- workflow() %>% - add_recipe(rec_tune_1) %>% + wflow <- workflow() |> + add_recipe(rec_tune_1) |> add_model(lm_mod) - pset <- extract_parameter_set_dials(wflow) %>% update(num_comp = dials::num_comp(c(1, 5))) + pset <- extract_parameter_set_dials(wflow) |> update(num_comp = dials::num_comp(c(1, 5))) folds <- rsample::vfold_cv(mtcars) control <- control_bayes(extract = identity) @@ -115,8 +115,8 @@ test_that("tune model only (with recipe)", { skip_if_not_installed("kernlab") set.seed(4400) - wflow <- workflow() %>% - add_recipe(rec_no_tune_1) %>% + wflow <- workflow() |> + add_recipe(rec_no_tune_1) |> add_model(svm_mod) pset <- extract_parameter_set_dials(wflow) folds <- rsample::vfold_cv(mtcars) @@ -147,8 +147,8 @@ test_that("tune model only (with variables)", { set.seed(4400) - wflow <- workflow() %>% - add_variables(mpg, everything()) %>% + wflow <- workflow() |> + add_variables(mpg, everything()) |> add_model(svm_mod) pset <- extract_parameter_set_dials(wflow) @@ -184,8 +184,8 @@ test_that("tune model only (with recipe, multi-predict)", { skip_on_cran() set.seed(4400) - wflow <- workflow() %>% - add_recipe(rec_no_tune_1) %>% + wflow <- workflow() |> + add_recipe(rec_no_tune_1) |> add_model(svm_mod) pset <- extract_parameter_set_dials(wflow) folds <- rsample::vfold_cv(mtcars) @@ -218,10 +218,10 @@ test_that("tune model and recipe", { skip_if_not_installed("kernlab") set.seed(4400) - wflow <- workflow() %>% - add_recipe(rec_tune_1) %>% + wflow <- workflow() |> + add_recipe(rec_tune_1) |> add_model(svm_mod) - pset <- extract_parameter_set_dials(wflow) %>% update(num_comp = dials::num_comp(c(1, 3))) + pset <- extract_parameter_set_dials(wflow) |> update(num_comp = dials::num_comp(c(1, 3))) folds <- rsample::vfold_cv(mtcars) suppressMessages({ res <- tune_bayes( @@ -253,10 +253,10 @@ test_that("tune model and recipe (multi-predict)", { skip_on_cran() set.seed(4400) - wflow <- workflow() %>% - add_recipe(rec_tune_1) %>% + wflow <- workflow() |> + add_recipe(rec_tune_1) |> add_model(svm_mod) - pset <- extract_parameter_set_dials(wflow) %>% update(num_comp = dials::num_comp(c(2, 3))) + pset <- extract_parameter_set_dials(wflow) |> update(num_comp = dials::num_comp(c(2, 3))) grid <- dials::grid_regular(pset, levels = c(3, 2)) folds <- rsample::vfold_cv(mtcars) suppressMessages({ @@ -290,10 +290,10 @@ test_that("tune recipe only - failure in recipe is caught elegantly", { set.seed(7898) data_folds <- rsample::vfold_cv(mtcars, v = 2) - rec <- recipes::recipe(mpg ~ ., data = mtcars) %>% + rec <- recipes::recipe(mpg ~ ., data = mtcars) |> recipes::step_spline_b(disp, deg_free = tune()) - model <- parsnip::linear_reg(mode = "regression") %>% + model <- parsnip::linear_reg(mode = "regression") |> parsnip::set_engine("lm") # NA values not allowed in recipe @@ -339,7 +339,7 @@ test_that("tune model only - failure in recipe is caught elegantly", { data_folds <- rsample::vfold_cv(mtcars, v = 2) # NA values not allowed in recipe - rec <- recipes::recipe(mpg ~ ., data = mtcars) %>% + rec <- recipes::recipe(mpg ~ ., data = mtcars) |> recipes::step_spline_b(disp, deg_free = NA_real_) expect_snapshot({ @@ -360,8 +360,8 @@ test_that("tune model only - failure in formula is caught elegantly", { data_folds <- rsample::vfold_cv(mtcars, v = 2) # these terms don't exist! - wflow <- workflow() %>% - add_formula(y ~ z) %>% + wflow <- workflow() |> + add_formula(y ~ z) |> add_model(svm_mod) expect_snapshot({ @@ -386,7 +386,7 @@ test_that("tune model and recipe - failure in recipe is caught elegantly", { set.seed(7898) data_folds <- rsample::vfold_cv(mtcars, v = 2) - rec <- recipes::recipe(mpg ~ ., data = mtcars) %>% + rec <- recipes::recipe(mpg ~ ., data = mtcars) |> recipes::step_spline_b(disp, deg_free = tune()) @@ -451,10 +451,10 @@ test_that("argument order gives an error for formula", { test_that("retain extra attributes and saved GP candidates", { set.seed(4400) - wflow <- workflow() %>% - add_recipe(rec_tune_1) %>% + wflow <- workflow() |> + add_recipe(rec_tune_1) |> add_model(lm_mod) - pset <- extract_parameter_set_dials(wflow) %>% + pset <- extract_parameter_set_dials(wflow) |> update(num_comp = dials::num_comp(c(1, 5))) folds <- rsample::vfold_cv(mtcars) ctrl <- control_bayes(save_gp_scoring = TRUE) @@ -527,7 +527,7 @@ test_that("missing performance values", { data(ames, package = "modeldata") - mod <- parsnip::decision_tree(cost_complexity = tune()) %>% + mod <- parsnip::decision_tree(cost_complexity = tune()) |> parsnip::set_mode("regression") set.seed(1) @@ -537,7 +537,7 @@ test_that("missing performance values", { expect_snapshot({ set.seed(1) res <- - mod %>% + mod |> tune_bayes( Sale_Price ~ Neighborhood + Gr_Liv_Area + Year_Built + Bldg_Type + Latitude + Longitude, @@ -551,7 +551,7 @@ test_that("missing performance values", { expect_snapshot(error = TRUE, { set.seed(2) res_fail <- - mod %>% + mod |> tune_bayes( Sale_Price ~ Neighborhood + Gr_Liv_Area + Year_Built + Bldg_Type + Latitude + Longitude, @@ -585,7 +585,7 @@ test_that("tune_bayes() output for `iter` edge cases (#721)", { res_grid <- tune_grid(wf, boots) expect_equal( - collect_metrics(res_bayes) %>% dplyr::select(-.iter), + collect_metrics(res_bayes) |> dplyr::select(-.iter), collect_metrics(res_grid) ) diff --git a/tests/testthat/test-censored-reg.R b/tests/testthat/test-censored-reg.R index 12a9558f4..4385f16b6 100644 --- a/tests/testthat/test-censored-reg.R +++ b/tests/testthat/test-censored-reg.R @@ -15,18 +15,18 @@ test_that("evaluation time", { reg_mtr <- metric_set(rmse) expect_snapshot(error = TRUE, - spec %>% tune_grid(Surv(time, status) ~ ., resamples = rs, metrics = mtr) + spec |> tune_grid(Surv(time, status) ~ ., resamples = rs, metrics = mtr) ) expect_snapshot(error = TRUE, - spec %>% tune_grid(Surv(time, status) ~ ., resamples = rs, metrics = reg_mtr) + spec |> tune_grid(Surv(time, status) ~ ., resamples = rs, metrics = reg_mtr) ) expect_snapshot( - linear_reg() %>% tune_grid(age ~ ., resamples = rs, metrics = reg_mtr, eval_time = 1) + linear_reg() |> tune_grid(age ~ ., resamples = rs, metrics = reg_mtr, eval_time = 1) ) expect_snapshot(error = TRUE, no_usable_times <- - spec %>% + spec |> tune_grid(Surv(time, status) ~ ., resamples = rs, metrics = mtr, eval_time = c(-1, Inf)) ) diff --git a/tests/testthat/test-checks.R b/tests/testthat/test-checks.R index 4a88ff2e5..39799ebd7 100644 --- a/tests/testthat/test-checks.R +++ b/tests/testthat/test-checks.R @@ -18,22 +18,22 @@ test_that("grid objects", { data("Chicago", package = "modeldata") data("Chicago", package = "modeldata") spline_rec <- - recipes::recipe(ridership ~ ., data = head(Chicago)) %>% - recipes::step_date(date) %>% - recipes::step_holiday(date) %>% - recipes::step_rm(date, dplyr::ends_with("away")) %>% - recipes::step_impute_knn(recipes::all_predictors(), neighbors = tune("imputation")) %>% - recipes::step_other(recipes::all_nominal(), threshold = tune()) %>% - recipes::step_dummy(recipes::all_nominal()) %>% - recipes::step_normalize(recipes::all_numeric_predictors()) %>% + recipes::recipe(ridership ~ ., data = head(Chicago)) |> + recipes::step_date(date) |> + recipes::step_holiday(date) |> + recipes::step_rm(date, dplyr::ends_with("away")) |> + recipes::step_impute_knn(recipes::all_predictors(), neighbors = tune("imputation")) |> + recipes::step_other(recipes::all_nominal(), threshold = tune()) |> + recipes::step_dummy(recipes::all_nominal()) |> + recipes::step_normalize(recipes::all_numeric_predictors()) |> recipes::step_spline_b(recipes::all_predictors(), deg_free = tune(), degree = tune()) - glmn <- parsnip::linear_reg(penalty = tune(), mixture = tune()) %>% + glmn <- parsnip::linear_reg(penalty = tune(), mixture = tune()) |> parsnip::set_engine("glmnet") chi_wflow <- - workflows::workflow() %>% - workflows::add_recipe(spline_rec) %>% + workflows::workflow() |> + workflows::add_recipe(spline_rec) |> workflows::add_model(glmn) grid_1 <- tibble::tibble( @@ -42,7 +42,7 @@ test_that("grid objects", { ) set_1 <- extract_parameter_set_dials(chi_wflow) - set_2 <- set_1 %>% update(deg_free = dials::deg_free(c(1, 3))) + set_2 <- set_1 |> update(deg_free = dials::deg_free(c(1, 3))) expect_equal(tune:::check_grid(grid_1, chi_wflow), grid_1) @@ -60,13 +60,13 @@ test_that("grid objects", { recipes::recipe(ridership ~ ., data = head(Chicago)) svm_mod <- - parsnip::svm_rbf(cost = tune()) %>% - parsnip::set_engine("kernlab") %>% + parsnip::svm_rbf(cost = tune()) |> + parsnip::set_engine("kernlab") |> parsnip::set_mode("regression") wflow_1 <- - workflow() %>% - add_model(svm_mod) %>% + workflow() |> + add_model(svm_mod) |> add_recipe(bare_rec) expect_no_error(grid_2 <- tune:::check_grid(6, wflow_1)) @@ -145,24 +145,24 @@ test_that("workflow objects", { recipes::recipe(ridership ~ ., data = head(Chicago)) svm_mod <- - parsnip::svm_rbf(cost = tune()) %>% - parsnip::set_engine("kernlab") %>% + parsnip::svm_rbf(cost = tune()) |> + parsnip::set_engine("kernlab") |> parsnip::set_mode("regression") wflow_1 <- - workflow() %>% - add_model(svm_mod) %>% + workflow() |> + add_model(svm_mod) |> add_recipe(bare_rec) expect_null(tune:::check_workflow(x = wflow_1)) wflow_2 <- - workflow() %>% + workflow() |> add_model( - parsnip::boost_tree(mtry = tune()) %>% - parsnip::set_engine("xgboost") %>% + parsnip::boost_tree(mtry = tune()) |> + parsnip::set_engine("xgboost") |> parsnip::set_mode("regression") - ) %>% + ) |> add_recipe(bare_rec) expect_null(tune:::check_workflow(x = wflow_2)) @@ -170,18 +170,18 @@ test_that("workflow objects", { tune:::check_workflow(x = wflow_2, check_dials = TRUE) }) - glmn <- parsnip::linear_reg(penalty = tune(), mixture = tune()) %>% + glmn <- parsnip::linear_reg(penalty = tune(), mixture = tune()) |> parsnip::set_engine("glmnet") wflow_3 <- - workflow() %>% + workflow() |> add_model(glmn) expect_snapshot(error = TRUE, { tune:::check_workflow(wflow_3) }) wflow_4 <- - workflow() %>% + workflow() |> add_recipe(bare_rec) expect_snapshot(error = TRUE, { tune:::check_workflow(wflow_4) @@ -211,7 +211,7 @@ test_that("workflow objects (will not tune, tidymodels/tune#548)", { # one recipe without tuning, one with: rec_bare <- recipes::recipe(ridership ~ ., data = head(Chicago, 30)) - rec_tune <- rec_bare %>% recipes::step_spline_natural(temp_max, deg_free = tune()) + rec_tune <- rec_bare |> recipes::step_spline_natural(temp_max, deg_free = tune()) # well-defined: lr_lm_0 <- parsnip::linear_reg() @@ -221,9 +221,9 @@ test_that("workflow objects (will not tune, tidymodels/tune#548)", { lr_lm_2 <- parsnip::linear_reg(penalty = tune(), mixture = tune()) # well-defined: - lr_glmnet_0 <- lr_lm_0 %>% parsnip::set_engine("glmnet") - lr_glmnet_1 <- lr_lm_1 %>% parsnip::set_engine("glmnet") - lr_glmnet_2 <- lr_lm_2 %>% parsnip::set_engine("glmnet") + lr_glmnet_0 <- lr_lm_0 |> parsnip::set_engine("glmnet") + lr_glmnet_1 <- lr_lm_1 |> parsnip::set_engine("glmnet") + lr_glmnet_2 <- lr_lm_2 |> parsnip::set_engine("glmnet") # don't error when supplied tune args make sense given engine / steps expect_no_error(check_workflow(workflow(rec_bare, lr_lm_0))) @@ -270,22 +270,22 @@ test_that("yardstick objects", { skip_if_not_installed("splines2") spline_rec <- - recipes::recipe(ridership ~ ., data = head(Chicago)) %>% - recipes::step_date(date) %>% - recipes::step_holiday(date) %>% - recipes::step_rm(date, dplyr::ends_with("away")) %>% - recipes::step_impute_knn(recipes::all_predictors(), neighbors = tune("imputation")) %>% - recipes::step_other(recipes::all_nominal(), threshold = tune()) %>% - recipes::step_dummy(recipes::all_nominal()) %>% - recipes::step_normalize(recipes::all_numeric_predictors()) %>% + recipes::recipe(ridership ~ ., data = head(Chicago)) |> + recipes::step_date(date) |> + recipes::step_holiday(date) |> + recipes::step_rm(date, dplyr::ends_with("away")) |> + recipes::step_impute_knn(recipes::all_predictors(), neighbors = tune("imputation")) |> + recipes::step_other(recipes::all_nominal(), threshold = tune()) |> + recipes::step_dummy(recipes::all_nominal()) |> + recipes::step_normalize(recipes::all_numeric_predictors()) |> recipes::step_spline_b(recipes::all_predictors(), deg_free = tune(), degree = tune()) - glmn <- parsnip::linear_reg(penalty = tune(), mixture = tune()) %>% + glmn <- parsnip::linear_reg(penalty = tune(), mixture = tune()) |> parsnip::set_engine("glmnet") chi_wflow <- - workflows::workflow() %>% - workflows::add_recipe(spline_rec) %>% + workflows::workflow() |> + workflows::add_recipe(spline_rec) |> workflows::add_model(glmn) metrics_1 <- tune:::check_metrics(NULL, chi_wflow) @@ -373,12 +373,12 @@ test_that("initial values", { skip_if_not_installed("kernlab") svm_mod <- - parsnip::svm_rbf(cost = tune()) %>% - parsnip::set_engine("kernlab") %>% + parsnip::svm_rbf(cost = tune()) |> + parsnip::set_engine("kernlab") |> parsnip::set_mode("regression") wflow_1 <- - workflow() %>% - add_model(svm_mod) %>% + workflow() |> + add_model(svm_mod) |> add_recipe(recipes::recipe(mpg ~ ., data = mtcars)) mtfolds <- rsample::vfold_cv(mtcars) @@ -428,21 +428,21 @@ test_that("check parameter finalization", { skip_if_not_installed("splines2") rec <- - recipes::recipe(mpg ~ ., data = mtcars) %>% + recipes::recipe(mpg ~ ., data = mtcars) |> recipes::step_spline_natural(disp, deg_free = 3) - rec_tune <- rec %>% recipes::step_pca(recipes::all_predictors(), num_comp = tune()) + rec_tune <- rec |> recipes::step_pca(recipes::all_predictors(), num_comp = tune()) f <- mpg ~ . rf1 <- - parsnip::rand_forest(mtry = tune(), min_n = tune()) %>% - parsnip::set_engine("ranger") %>% + parsnip::rand_forest(mtry = tune(), min_n = tune()) |> + parsnip::set_engine("ranger") |> parsnip::set_mode("regression") lm1 <- - parsnip::linear_reg(penalty = tune()) %>% + parsnip::linear_reg(penalty = tune()) |> parsnip::set_engine("glmnet") w1 <- - workflow() %>% - add_formula(f) %>% + workflow() |> + add_formula(f) |> add_model(rf1) expect_snapshot( @@ -457,8 +457,8 @@ test_that("check parameter finalization", { ) w2 <- - workflow() %>% - add_recipe(rec) %>% + workflow() |> + add_recipe(rec) |> add_model(rf1) expect_snapshot( @@ -469,8 +469,8 @@ test_that("check parameter finalization", { expect_false(any(dials::has_unknowns(p2$object))) w3 <- - workflow() %>% - add_recipe(rec) %>% + workflow() |> + add_recipe(rec) |> add_model(rf1) p3 <- extract_parameter_set_dials(w3) @@ -482,8 +482,8 @@ test_that("check parameter finalization", { expect_false(any(dials::has_unknowns(p3_a$object))) w4 <- - workflow() %>% - add_recipe(rec_tune) %>% + workflow() |> + add_recipe(rec_tune) |> add_model(rf1) expect_snapshot(error = TRUE, { @@ -491,7 +491,7 @@ test_that("check parameter finalization", { }) p4_a <- - extract_parameter_set_dials(w4) %>% + extract_parameter_set_dials(w4) |> update(mtry = dials::mtry(c(1, 10))) expect_no_error( @@ -500,8 +500,8 @@ test_that("check parameter finalization", { expect_true(inherits(p4_b, "parameters")) w5 <- - workflow() %>% - add_recipe(rec_tune) %>% + workflow() |> + add_recipe(rec_tune) |> add_model(lm1) expect_no_error( diff --git a/tests/testthat/test-collect.R b/tests/testthat/test-collect.R index 6f42d44f8..f7c250217 100644 --- a/tests/testthat/test-collect.R +++ b/tests/testthat/test-collect.R @@ -7,10 +7,10 @@ if (rlang::is_installed(c("modeldata", "splines2", "kernlab"))) { set.seed(6735) rep_folds <- rsample::vfold_cv(mtcars, v = 2, repeats = 2) - spline_rec <- recipes::recipe(mpg ~ ., data = mtcars) %>% + spline_rec <- recipes::recipe(mpg ~ ., data = mtcars) |> recipes::step_spline_natural(disp, deg_free = 3) - lin_mod <- parsnip::linear_reg() %>% + lin_mod <- parsnip::linear_reg() |> parsnip::set_engine("lm") lm_splines <- @@ -25,8 +25,8 @@ if (rlang::is_installed(c("modeldata", "splines2", "kernlab"))) { rep_folds_class <- rsample::vfold_cv(two_class_dat, v = 2, repeats = 3) svm_mod <- - parsnip::svm_rbf(cost = tune("cost value")) %>% - parsnip::set_engine("kernlab") %>% + parsnip::svm_rbf(cost = tune("cost value")) |> + parsnip::set_engine("kernlab") |> parsnip::set_mode("classification") suppressMessages( @@ -45,11 +45,11 @@ if (rlang::is_installed(c("modeldata", "splines2", "kernlab"))) { svm_tune_class$.predictions <- purrr::map( svm_tune_class$.predictions, - ~ .x %>% dplyr::select(-.pred_Class1, -.pred_Class2) + ~ .x |> dplyr::select(-.pred_Class1, -.pred_Class2) ) attr(svm_tune_class, "metrics") <- yardstick::metric_set(yardstick::kap) - svm_grd <- show_best(svm_tune, metric = "roc_auc") %>% dplyr::select(`cost value`) + svm_grd <- show_best(svm_tune, metric = "roc_auc") |> dplyr::select(`cost value`) } # ------------------------------------------------------------------------------ @@ -59,7 +59,7 @@ test_that("`collect_predictions()` errors informatively if there is no `.predict skip_if_not_installed("splines2") expect_snapshot(error = TRUE, { - collect_predictions(lm_splines %>% dplyr::select(-.predictions)) + collect_predictions(lm_splines |> dplyr::select(-.predictions)) }) }) @@ -78,17 +78,17 @@ test_that("`collect_predictions()`, un-averaged", { res <- collect_predictions(lm_splines) exp_res <- - unnest(lm_splines %>% dplyr::select(.predictions, starts_with("id")), + unnest(lm_splines |> dplyr::select(.predictions, starts_with("id")), cols = c(.predictions) - ) %>% dplyr::select(all_of(names(res))) + ) |> dplyr::select(all_of(names(res))) expect_equal(res, exp_res) res <- collect_predictions(svm_tune) exp_res <- unnest( - svm_tune %>% dplyr::select(.predictions, starts_with("id"), .iter), + svm_tune |> dplyr::select(.predictions, starts_with("id"), .iter), cols = c(.predictions) - ) %>% + ) |> dplyr::select(all_of(names(res))) res_subset <- collect_predictions(svm_tune, parameters = svm_grd[1, ]) exp_res_subset <- dplyr::filter(exp_res, `cost value` == svm_grd$`cost value`[[1]]) @@ -186,11 +186,11 @@ test_that("collecting notes - fit_resamples", { skip_if_not_installed("modeldata") skip_if_not_installed("splines2") - mtcars2 <- mtcars %>% mutate(wt2 = wt) + mtcars2 <- mtcars |> mutate(wt2 = wt) set.seed(1) flds <- rsample::bootstraps(mtcars2, times = 2) - lin_mod <- parsnip::linear_reg() %>% + lin_mod <- parsnip::linear_reg() |> parsnip::set_engine("lm") expect_snapshot( @@ -209,11 +209,11 @@ test_that("collecting notes - last_fit", { options(pillar.advice = FALSE, pillar.min_title_chars = Inf) - mtcars2 <- mtcars %>% mutate(wt2 = wt) + mtcars2 <- mtcars |> mutate(wt2 = wt) set.seed(1) split <- rsample::initial_split(mtcars2) - lin_mod <- parsnip::linear_reg() %>% + lin_mod <- parsnip::linear_reg() |> parsnip::set_engine("lm") expect_snapshot( @@ -287,7 +287,7 @@ test_that("`collect_metrics(type)` errors informatively with bad input", { test_that("`pivot_metrics()`, grid search, typical metrics, summarized", { expect_equal( - pivot_metrics(ames_grid_search, collect_metrics(ames_grid_search)) %>% + pivot_metrics(ames_grid_search, collect_metrics(ames_grid_search)) |> dplyr::slice(), tibble::tibble( K = integer(0), @@ -307,7 +307,7 @@ test_that("`pivot_metrics()`, grid search, typical metrics, unsummarized", { pivot_metrics( ames_grid_search, collect_metrics(ames_grid_search, summarize = FALSE) - ) %>% + ) |> dplyr::slice(), tibble::tibble( K = integer(0), @@ -325,7 +325,7 @@ test_that("`pivot_metrics()`, grid search, typical metrics, unsummarized", { test_that("`pivot_metrics()`, iterative search, typical metrics, summarized", { expect_equal( - pivot_metrics(ames_iter_search, collect_metrics(ames_iter_search)) %>% + pivot_metrics(ames_iter_search, collect_metrics(ames_iter_search)) |> dplyr::slice(), tibble::tibble( K = integer(0), @@ -365,7 +365,7 @@ test_that("`pivot_metrics()`, resampled fits, fairness metrics, summarized", { ) expect_equal( - pivot_metrics(res, collect_metrics(res)) %>% slice(), + pivot_metrics(res, collect_metrics(res)) |> slice(), tibble::tibble( .config = character(0), `demographic_parity(am)` = integer(0), diff --git a/tests/testthat/test-compat-vctrs.R b/tests/testthat/test-compat-vctrs.R index 7653814ed..b142b2eaf 100644 --- a/tests/testthat/test-compat-vctrs.R +++ b/tests/testthat/test-compat-vctrs.R @@ -112,7 +112,7 @@ test_that("vec_rbind() returns a bare tibble", { expect_identical(vec_rbind(x, tbl), vec_rbind(tbl, tbl)) expect_s3_class_bare_tibble(vec_rbind(x)) - expect_s3_class_bare_tibble(vec_cbind(x, x)) %>% suppressMessages() + expect_s3_class_bare_tibble(vec_cbind(x, x)) |> suppressMessages() } }) @@ -121,10 +121,10 @@ test_that("vec_cbind() returns a bare tibble", { tbl <- new_bare_tibble(x) expect_identical(vec_cbind(x), vec_cbind(tbl)) - expect_identical(vec_cbind(x, x), vec_cbind(tbl, tbl)) %>% suppressMessages() - expect_identical(vec_cbind(x, tbl), vec_cbind(tbl, tbl)) %>% suppressMessages() + expect_identical(vec_cbind(x, x), vec_cbind(tbl, tbl)) |> suppressMessages() + expect_identical(vec_cbind(x, tbl), vec_cbind(tbl, tbl)) |> suppressMessages() expect_s3_class_bare_tibble(vec_cbind(x)) - expect_s3_class_bare_tibble(vec_cbind(x, x)) %>% suppressMessages() + expect_s3_class_bare_tibble(vec_cbind(x, x)) |> suppressMessages() } }) diff --git a/tests/testthat/test-conf-mat-resampled.R b/tests/testthat/test-conf-mat-resampled.R index 4ae3000f0..482b40fc1 100644 --- a/tests/testthat/test-conf-mat-resampled.R +++ b/tests/testthat/test-conf-mat-resampled.R @@ -53,7 +53,7 @@ test_that("bad argss", { broke_results$.predictions <- purrr::map( broke_results$.predictions, - ~ .x %>% dplyr::select(-.pred_class) + ~ .x |> dplyr::select(-.pred_class) ) expect_snapshot(error = TRUE, { diff --git a/tests/testthat/test-engine-parameters.R b/tests/testthat/test-engine-parameters.R index 956aa1926..f5989498c 100644 --- a/tests/testthat/test-engine-parameters.R +++ b/tests/testthat/test-engine-parameters.R @@ -27,8 +27,8 @@ test_that("tuning with engine parameters with dials objects", { skip_if(utils::packageVersion("dials") <= "0.0.7") rf_mod <- - parsnip::rand_forest(min_n = tune()) %>% - parsnip::set_engine("randomForest", maxnodes = tune()) %>% + parsnip::rand_forest(min_n = tune()) |> + parsnip::set_engine("randomForest", maxnodes = tune()) |> parsnip::set_mode("regression") set.seed(192) @@ -37,7 +37,7 @@ test_that("tuning with engine parameters with dials objects", { set.seed(19828) expect_no_error( suppressMessages( - rf_tune <- rf_mod %>% tune_grid(mpg ~ ., resamples = rs, grid = 3) + rf_tune <- rf_mod |> tune_grid(mpg ~ ., resamples = rs, grid = 3) ) ) expect_no_error( @@ -47,7 +47,7 @@ test_that("tuning with engine parameters with dials objects", { set.seed(283) expect_no_error( suppressMessages( - rf_search <- rf_mod %>% tune_bayes(mpg ~ ., resamples = rs, initial = 3, iter = 2) + rf_search <- rf_mod |> tune_bayes(mpg ~ ., resamples = rs, initial = 3, iter = 2) ) ) expect_no_error( @@ -64,8 +64,8 @@ test_that("tuning with engine parameters without dials objects", { ## --------------------------------------------------------------------------- rf_mod <- - parsnip::rand_forest(min_n = tune()) %>% - parsnip::set_engine("randomForest", corr.bias = tune()) %>% + parsnip::rand_forest(min_n = tune()) |> + parsnip::set_engine("randomForest", corr.bias = tune()) |> parsnip::set_mode("regression") grid <- @@ -80,14 +80,14 @@ test_that("tuning with engine parameters without dials objects", { ## --------------------------------------------------------------------------- expect_snapshot(error = TRUE, { - rf_tune <- rf_mod %>% tune_grid(mpg ~ ., resamples = rs, grid = 3) + rf_tune <- rf_mod |> tune_grid(mpg ~ ., resamples = rs, grid = 3) }) ## --------------------------------------------------------------------------- expect_no_error( suppressMessages( - rf_tune <- rf_mod %>% tune_grid(mpg ~ ., resamples = rs, grid = grid) + rf_tune <- rf_mod |> tune_grid(mpg ~ ., resamples = rs, grid = grid) ) ) expect_snapshot(error = TRUE, { @@ -98,6 +98,6 @@ test_that("tuning with engine parameters without dials objects", { set.seed(283) expect_snapshot(error = TRUE, { - rf_search <- rf_mod %>% tune_bayes(mpg ~ ., resamples = rs) + rf_search <- rf_mod |> tune_bayes(mpg ~ ., resamples = rs) }) }) diff --git a/tests/testthat/test-estimate.R b/tests/testthat/test-estimate.R index bb0a7699e..ce798807c 100644 --- a/tests/testthat/test-estimate.R +++ b/tests/testthat/test-estimate.R @@ -4,21 +4,21 @@ opt <- getOption("dplyr.summarise.inform", default = "FALSE") options(dplyr.summarise.inform = FALSE) compl <- - unnest(rcv_results, .metrics) %>% - group_by(deg_free, degree, `wt df`, `wt degree`, .config, .metric, .estimator) %>% + unnest(rcv_results, .metrics) |> + group_by(deg_free, degree, `wt df`, `wt degree`, .config, .metric, .estimator) |> summarize( mean = mean(.estimate, na.rm = TRUE), n = sum(!is.na(.estimator)), std_err = sd(.estimate, na.rm = TRUE) / sqrt(n) - ) %>% - ungroup() %>% + ) |> + ungroup() |> arrange(.config) options(dplyr.summarise.inform = opt) test_that("estimate method", { expect_equal( - collect_metrics(rcv_results)[, names(compl)] %>% arrange(.config), + collect_metrics(rcv_results)[, names(compl)] |> arrange(.config), compl ) }) @@ -43,13 +43,13 @@ test_that("estimate method (with apparent resample)", { ) collected_sum <- - collect_metrics(res) %>% + collect_metrics(res) |> select(mean, n, std_err) collected_manual <- - res %>% - dplyr::filter(id != "Apparent") %>% - tidyr::unnest(.metrics) %>% + res |> + dplyr::filter(id != "Apparent") |> + tidyr::unnest(.metrics) |> summarize( mean = mean(.estimate), n = sum(!is.na(.estimator)), diff --git a/tests/testthat/test-eval-time-args.R b/tests/testthat/test-eval-time-args.R index 6df2a4b82..9b1da56ab 100644 --- a/tests/testthat/test-eval-time-args.R +++ b/tests/testthat/test-eval-time-args.R @@ -9,7 +9,7 @@ test_that("eval time inputs are checked for regression models", { # ------------------------------------------------------------------------------ wflow <- workflow(mpg ~ ., linear_reg()) - knn_spec <- nearest_neighbor(neighbors = tune()) %>% set_mode("regression") + knn_spec <- nearest_neighbor(neighbors = tune()) |> set_mode("regression") wflow_tune <- workflow(mpg ~ ., knn_spec) set.seed(1) @@ -68,7 +68,7 @@ test_that("eval time are checked for classification models", { data(two_class_dat, package = "modeldata") wflow <- workflow(Class ~ A + B, logistic_reg()) - knn_spec <- nearest_neighbor(neighbors = tune()) %>% set_mode("classification") + knn_spec <- nearest_neighbor(neighbors = tune()) |> set_mode("classification") wflow_tune <- workflow(Class ~ A + B, knn_spec) set.seed(1) diff --git a/tests/testthat/test-event-level.R b/tests/testthat/test-event-level.R index 2e3b89777..52dcd82ca 100644 --- a/tests/testthat/test-event-level.R +++ b/tests/testthat/test-event-level.R @@ -7,7 +7,7 @@ test_that("`event_level` is passed through in tune_grid()", { data("two_class_dat", package = "modeldata") dat <- two_class_dat[1:50, ] - spec <- parsnip::svm_linear(mode = "classification", cost = tune()) %>% + spec <- parsnip::svm_linear(mode = "classification", cost = tune()) |> parsnip::set_engine("kernlab") control <- control_grid(event_level = "second", save_pred = TRUE) @@ -53,7 +53,7 @@ test_that("`event_level` is passed through in fit_resamples()", { data("two_class_dat", package = "modeldata") dat <- two_class_dat[1:50, ] - spec <- parsnip::svm_linear(mode = "classification", cost = 2) %>% + spec <- parsnip::svm_linear(mode = "classification", cost = 2) |> parsnip::set_engine("kernlab") control <- control_resamples(event_level = "second", save_pred = TRUE) @@ -97,7 +97,7 @@ test_that("`event_level` is passed through in tune_bayes()", { data("two_class_dat", package = "modeldata") dat <- two_class_dat[1:50, ] - spec <- parsnip::svm_linear(mode = "classification", cost = tune()) %>% + spec <- parsnip::svm_linear(mode = "classification", cost = tune()) |> parsnip::set_engine("kernlab") control_grid <- control_grid(save_pred = TRUE) @@ -145,7 +145,7 @@ test_that("`event_level` is passed through in last_fit()", { data("two_class_dat", package = "modeldata") dat <- two_class_dat[1:10, ] - spec <- parsnip::svm_linear(mode = "classification", cost = 0.5) %>% + spec <- parsnip::svm_linear(mode = "classification", cost = 0.5) |> parsnip::set_engine("kernlab") control <- control_last_fit(event_level = "second") diff --git a/tests/testthat/test-extract-helpers.R b/tests/testthat/test-extract-helpers.R index 64dcb6bb4..718eadf46 100644 --- a/tests/testthat/test-extract-helpers.R +++ b/tests/testthat/test-extract-helpers.R @@ -1,9 +1,9 @@ test_that("extract methods for last_fit objects", { - lm_spec <- parsnip::linear_reg() %>% parsnip::set_engine("lm") + lm_spec <- parsnip::linear_reg() |> parsnip::set_engine("lm") lm_prn_fit <- parsnip::fit(lm_spec, mpg ~ ., data = mtcars) lm_wflow <- - workflow() %>% - add_model(lm_spec) %>% + workflow() |> + add_model(lm_spec) |> add_formula(mpg ~ .) lm_res <- last_fit(lm_wflow, split = rsample::initial_split(mtcars)) @@ -16,11 +16,11 @@ test_that("extract methods for last_fit objects", { }) test_that("extract methods for resample_results objects", { - lm_spec <- parsnip::linear_reg() %>% parsnip::set_engine("lm") + lm_spec <- parsnip::linear_reg() |> parsnip::set_engine("lm") lm_rec_wflow <- - workflow() %>% - add_model(lm_spec) %>% - add_recipe(recipes::recipe(mpg ~ ., data = mtcars) %>% + workflow() |> + add_model(lm_spec) |> + add_recipe(recipes::recipe(mpg ~ ., data = mtcars) |> recipes::step_normalize(recipes::all_numeric_predictors())) lm_rec_res <- fit_resamples( lm_rec_wflow, diff --git a/tests/testthat/test-extract.R b/tests/testthat/test-extract.R index 55595a840..53caab2e0 100644 --- a/tests/testthat/test-extract.R +++ b/tests/testthat/test-extract.R @@ -7,14 +7,14 @@ test_that("tune recipe only", { mt_folds <- rsample::vfold_cv(mtcars, v = 5) extr_1_1 <- function(x) { - extract_recipe(x) %>% tidy(number = 2) + extract_recipe(x) |> tidy(number = 2) } before_kind <- RNGkind()[[1]] expect_no_error( res_1_1 <- - workflow() %>% - add_recipe(helper_objects$rec_tune_1) %>% - add_model(helper_objects$lm_mod) %>% + workflow() |> + add_recipe(helper_objects$rec_tune_1) |> + add_model(helper_objects$lm_mod) |> tune_grid(resamples = mt_folds, control = control_grid(extract = extr_1_1)) ) after_kind <- RNGkind()[[1]] @@ -43,9 +43,9 @@ test_that("tune model only", { expect_no_error( res_2_1 <- - workflow() %>% - add_recipe(helper_objects$rec_no_tune_1) %>% - add_model(helper_objects$svm_mod) %>% + workflow() |> + add_recipe(helper_objects$rec_no_tune_1) |> + add_model(helper_objects$svm_mod) |> tune_grid( resamples = mt_folds, grid = 2, @@ -69,9 +69,9 @@ test_that("tune model only", { # should not fail: expect_no_error( res_2_2 <- - workflow() %>% - add_recipe(helper_objects$rec_tune_1) %>% - add_model(helper_objects$lm_mod) %>% + workflow() |> + add_recipe(helper_objects$rec_tune_1) |> + add_model(helper_objects$lm_mod) |> tune_grid( resamples = mt_folds, grid = 2, @@ -81,7 +81,7 @@ test_that("tune model only", { expect_no_error( extract_2_2 <- - dplyr::bind_rows(res_2_2$.extracts) %>% + dplyr::bind_rows(res_2_2$.extracts) |> tidyr::unnest(cols = c(.extracts)) ) expect_true(all(!extract_2_2$is_null_rec)) @@ -165,13 +165,13 @@ test_that("tune model and recipe", { } wflow_3 <- - workflow() %>% - add_recipe(helper_objects$rec_tune_1) %>% + workflow() |> + add_recipe(helper_objects$rec_tune_1) |> add_model(helper_objects$svm_mod) set.seed(35) grid_3 <- - extract_parameter_set_dials(wflow_3) %>% - update(num_comp = dials::num_comp(c(2, 5))) %>% + extract_parameter_set_dials(wflow_3) |> + update(num_comp = dials::num_comp(c(2, 5))) |> dials::grid_space_filling(size = 4) expect_no_error( @@ -207,14 +207,14 @@ test_that("check .config in extracts", { recipe_only_configs <- full_join( - mt_spln_lm_bo %>% - filter(id == first(id)) %>% - select(.iter, .metrics) %>% - unnest(cols = .metrics) %>% + mt_spln_lm_bo |> + filter(id == first(id)) |> + select(.iter, .metrics) |> + unnest(cols = .metrics) |> filter(.metric == first(.metric)), - mt_spln_lm_bo %>% - filter(id == first(id)) %>% - select(.iter, .extracts) %>% + mt_spln_lm_bo |> + filter(id == first(id)) |> + select(.iter, .extracts) |> unnest(cols = .extracts), by = c(".iter", "deg_free") ) diff --git a/tests/testthat/test-filter-parameters.R b/tests/testthat/test-filter-parameters.R index f497d6a2a..d201e9556 100644 --- a/tests/testthat/test-filter-parameters.R +++ b/tests/testthat/test-filter-parameters.R @@ -26,22 +26,22 @@ test_that("basic functionality", { expect_true(all(purrr::map_lgl(best_grid$.predictions, ~ all(.x$neighbors == best_param$neighbors)))) rec_tune_1 <- - recipes::recipe(mpg ~ ., data = mtcars) %>% - recipes::step_normalize(recipes::all_predictors()) %>% + recipes::recipe(mpg ~ ., data = mtcars) |> + recipes::step_normalize(recipes::all_predictors()) |> recipes::step_pca(recipes::all_predictors(), num_comp = tune()) - lm_mod <- parsnip::linear_reg() %>% parsnip::set_engine("lm") + lm_mod <- parsnip::linear_reg() |> parsnip::set_engine("lm") set.seed(363) mt_folds <- rsample::vfold_cv(mtcars, v = 3) extr_rec <- function(x) { - extract_recipe(x) %>% tidy(number = 2) + extract_recipe(x) |> tidy(number = 2) } lm_res <- - workflow() %>% - add_recipe(rec_tune_1) %>% - add_model(lm_mod) %>% + workflow() |> + add_recipe(rec_tune_1) |> + add_model(lm_mod) |> tune_grid( resamples = mt_folds, control = control_grid(extract = extr_rec), diff --git a/tests/testthat/test-finalization.R b/tests/testthat/test-finalization.R index 4df6394ff..a0ccf5f03 100644 --- a/tests/testthat/test-finalization.R +++ b/tests/testthat/test-finalization.R @@ -6,25 +6,25 @@ test_that("cannot finalize with recipe parameters", { rs <- rsample::vfold_cv(mtcars) mod_1 <- - parsnip::rand_forest(mtry = tune(), trees = 20, min_n = tune()) %>% - parsnip::set_engine("randomForest") %>% + parsnip::rand_forest(mtry = tune(), trees = 20, min_n = tune()) |> + parsnip::set_engine("randomForest") |> parsnip::set_mode("regression") rec_1 <- - recipes::recipe(mpg ~ ., data = mtcars) %>% + recipes::recipe(mpg ~ ., data = mtcars) |> recipes::step_spline_natural(disp, deg_free = tune()) rec_2 <- - recipes::recipe(mpg ~ ., data = mtcars) %>% + recipes::recipe(mpg ~ ., data = mtcars) |> recipes::step_spline_natural(disp, deg_free = 3) expect_snapshot(error = TRUE, { - mod_1 %>% tune_grid(rec_1, resamples = rs, grid = 3) + mod_1 |> tune_grid(rec_1, resamples = rs, grid = 3) }) set.seed(987323) expect_no_error( - suppressMessages(mod_1 %>% tune_grid(rec_2, resamples = rs, grid = 3)) + suppressMessages(mod_1 |> tune_grid(rec_2, resamples = rs, grid = 3)) ) }) @@ -38,19 +38,19 @@ test_that("skip error if grid is supplied", { rs <- rsample::vfold_cv(mtcars) mod_1 <- - parsnip::rand_forest(mtry = tune(), trees = 20, min_n = tune()) %>% - parsnip::set_engine("randomForest") %>% + parsnip::rand_forest(mtry = tune(), trees = 20, min_n = tune()) |> + parsnip::set_engine("randomForest") |> parsnip::set_mode("regression") rec_1 <- - recipes::recipe(mpg ~ ., data = mtcars) %>% + recipes::recipe(mpg ~ ., data = mtcars) |> recipes::step_spline_natural(disp, deg_free = tune()) grid <- tibble::tibble(mtry = 1:3, deg_free = c(3, 3, 4), min_n = c(5, 4, 6)) set.seed(987323) expect_no_error( - mod_1 %>% tune_grid(rec_1, resamples = rs, grid = grid) + mod_1 |> tune_grid(rec_1, resamples = rs, grid = grid) ) }) @@ -61,10 +61,10 @@ test_that("finalize recipe step with multiple tune parameters", { data(biomass, package = "modeldata") - model_spec <- parsnip::linear_reg() %>% + model_spec <- parsnip::linear_reg() |> parsnip::set_engine("lm") - rec <- recipes::recipe(HHV ~ carbon + hydrogen + oxygen + nitrogen + sulfur, data = biomass) %>% + rec <- recipes::recipe(HHV ~ carbon + hydrogen + oxygen + nitrogen + sulfur, data = biomass) |> recipes::step_spline_b(carbon, hydrogen, deg_free = tune(), degree = tune()) best <- tibble(deg_free = 2, degree = 1, .config = "Preprocessor1_Model1") @@ -83,7 +83,7 @@ test_that("finalize tailors", { library(tailor) adjust_rng <- - tailor() %>% + tailor() |> adjust_numeric_range(lower_limit = tune(), upper_limit = tune()) adj_1 <- finalize_tailor(adjust_rng, tibble(lower_limit = 2)) @@ -114,66 +114,66 @@ test_that("finalize workflows with tailors", { library(purrr) adjust_rng <- - tailor() %>% + tailor() |> adjust_numeric_range(lower_limit = tune(), upper_limit = tune()) wflow <- workflow(y ~ ., linear_reg(), adjust_rng) wflow_1 <- finalize_workflow(wflow, tibble(lower_limit = 2)) expect_equal( - wflow_1 %>% - extract_postprocessor() %>% - pluck("adjustments") %>% - pluck(1) %>% - pluck("arguments") %>% + wflow_1 |> + extract_postprocessor() |> + pluck("adjustments") |> + pluck(1) |> + pluck("arguments") |> pluck("lower_limit"), 2 ) expect_equal( - wflow_1 %>% - extract_postprocessor() %>% - pluck("adjustments") %>% - pluck(1) %>% - pluck("arguments") %>% + wflow_1 |> + extract_postprocessor() |> + pluck("adjustments") |> + pluck(1) |> + pluck("arguments") |> pluck("upper_limit"), tune() ) wflow_2 <- finalize_workflow(wflow, tibble(lower_limit = 2, upper_limit = 3)) expect_equal( - wflow_2 %>% - extract_postprocessor() %>% - pluck("adjustments") %>% - pluck(1) %>% - pluck("arguments") %>% + wflow_2 |> + extract_postprocessor() |> + pluck("adjustments") |> + pluck(1) |> + pluck("arguments") |> pluck("lower_limit"), 2 ) expect_equal( - wflow_2 %>% - extract_postprocessor() %>% - pluck("adjustments") %>% - pluck(1) %>% - pluck("arguments") %>% + wflow_2 |> + extract_postprocessor() |> + pluck("adjustments") |> + pluck(1) |> + pluck("arguments") |> pluck("upper_limit"), 3 ) wflow_3 <- finalize_workflow(wflow, tibble(lower_limit = 2, upper_limit = 3, a = 2)) expect_equal( - wflow_3 %>% - extract_postprocessor() %>% - pluck("adjustments") %>% - pluck(1) %>% - pluck("arguments") %>% + wflow_3 |> + extract_postprocessor() |> + pluck("adjustments") |> + pluck(1) |> + pluck("arguments") |> pluck("lower_limit"), 2 ) expect_equal( - wflow_3 %>% - extract_postprocessor() %>% - pluck("adjustments") %>% - pluck(1) %>% - pluck("arguments") %>% + wflow_3 |> + extract_postprocessor() |> + pluck("adjustments") |> + pluck(1) |> + pluck("arguments") |> pluck("upper_limit"), 3 ) diff --git a/tests/testthat/test-fit_best.R b/tests/testthat/test-fit_best.R index 98a77769c..6c865e7f7 100644 --- a/tests/testthat/test-fit_best.R +++ b/tests/testthat/test-fit_best.R @@ -8,7 +8,7 @@ test_that("fit_best", { library(parsnip) data(meats, package = "modeldata") - meats <- meats %>% select(-water, -fat) + meats <- meats |> select(-water, -fat) set.seed(1) meat_split <- initial_split(meats) @@ -19,10 +19,10 @@ test_that("fit_best", { meat_rs <- vfold_cv(meat_train, v = 3) pca_rec <- - recipe(protein ~ ., data = meat_train) %>% + recipe(protein ~ ., data = meat_train) |> step_pca(all_predictors(), num_comp = tune()) - knn_mod <- nearest_neighbor(neighbors = tune()) %>% set_mode("regression") + knn_mod <- nearest_neighbor(neighbors = tune()) |> set_mode("regression") ctrl <- control_grid(save_workflow = TRUE) @@ -75,7 +75,7 @@ test_that("fit_best() works with validation split: 3-way split", { val_set <- validation_set(initial_val_split) f <- Sale_Price ~ Gr_Liv_Area + Year_Built - knn_mod <- nearest_neighbor(neighbors = tune()) %>% set_mode("regression") + knn_mod <- nearest_neighbor(neighbors = tune()) |> set_mode("regression") wflow <- workflow(f, knn_mod) tune_res <- tune_grid( @@ -83,14 +83,14 @@ test_that("fit_best() works with validation split: 3-way split", { grid = tibble(neighbors = c(1, 5)), resamples = val_set, control = control_grid(save_workflow = TRUE) - ) %>% suppressWarnings() + ) |> suppressWarnings() set.seed(3) fit_on_train <- fit_best(tune_res) pred <- predict(fit_on_train, testing(initial_val_split)) set.seed(3) - exp_fit_on_train <- nearest_neighbor(neighbors = 5) %>% - set_mode("regression") %>% + exp_fit_on_train <- nearest_neighbor(neighbors = 5) |> + set_mode("regression") |> fit(f, training(initial_val_split)) exp_pred <- predict(exp_fit_on_train, testing(initial_val_split)) @@ -108,7 +108,7 @@ test_that("fit_best() works with validation split: 2x 2-way splits", { val_set <- validation_set(split) f <- Sale_Price ~ Gr_Liv_Area + Year_Built - knn_mod <- nearest_neighbor(neighbors = tune()) %>% set_mode("regression") + knn_mod <- nearest_neighbor(neighbors = tune()) |> set_mode("regression") wflow <- workflow(f, knn_mod) tune_res <- tune_grid( @@ -122,8 +122,8 @@ test_that("fit_best() works with validation split: 2x 2-way splits", { pred <- predict(fit_on_train_and_val, testing(split)) set.seed(3) - exp_fit_on_train_and_val <- nearest_neighbor(neighbors = 5) %>% - set_mode("regression") %>% + exp_fit_on_train_and_val <- nearest_neighbor(neighbors = 5) |> + set_mode("regression") |> fit(f, train) exp_pred <- predict(exp_fit_on_train_and_val, testing(split)) @@ -134,7 +134,7 @@ test_that( "fit_best() warns when metric or eval_time are specified in addition to parameters", { skip_if_not_installed("kknn") - knn_mod <- nearest_neighbor(neighbors = tune()) %>% set_mode("regression") + knn_mod <- nearest_neighbor(neighbors = tune()) |> set_mode("regression") res <- tune_grid( workflow(mpg ~ ., knn_mod), diff --git a/tests/testthat/test-grid.R b/tests/testthat/test-grid.R index 89352f0c1..830466a10 100644 --- a/tests/testthat/test-grid.R +++ b/tests/testthat/test-grid.R @@ -4,10 +4,10 @@ test_that("tune recipe only", { helper_objects <- helper_objects_tune() set.seed(4400) - wflow <- workflow() %>% - add_recipe(helper_objects$rec_tune_1) %>% + wflow <- workflow() |> + add_recipe(helper_objects$rec_tune_1) |> add_model(helper_objects$lm_mod) - pset <- extract_parameter_set_dials(wflow) %>% + pset <- extract_parameter_set_dials(wflow) |> update(num_comp = dials::num_comp(c(1, 3))) grid <- dials::grid_regular(pset, levels = 3) folds <- rsample::vfold_cv(mtcars) @@ -41,8 +41,8 @@ test_that("tune model only (with recipe)", { helper_objects <- helper_objects_tune() set.seed(4400) - wflow <- workflow() %>% - add_recipe(helper_objects$rec_no_tune_1) %>% + wflow <- workflow() |> + add_recipe(helper_objects$rec_no_tune_1) |> add_model(helper_objects$svm_mod) pset <- extract_parameter_set_dials(wflow) grid <- dials::grid_regular(pset, levels = 3) @@ -75,8 +75,8 @@ test_that("tune model only (with variables)", { set.seed(4400) - wflow <- workflow() %>% - add_variables(mpg, everything()) %>% + wflow <- workflow() |> + add_variables(mpg, everything()) |> add_model(helper_objects$svm_mod) pset <- extract_parameter_set_dials(wflow) @@ -104,8 +104,8 @@ test_that("tune model only (with recipe, multi-predict)", { helper_objects <- helper_objects_tune() set.seed(4400) - wflow <- workflow() %>% - add_recipe(helper_objects$rec_no_tune_1) %>% + wflow <- workflow() |> + add_recipe(helper_objects$rec_no_tune_1) |> add_model(helper_objects$svm_mod) pset <- extract_parameter_set_dials(wflow) grid <- dials::grid_regular(pset, levels = 3) @@ -354,10 +354,10 @@ test_that("tune model and recipe", { helper_objects <- helper_objects_tune() set.seed(4400) - wflow <- workflow() %>% - add_recipe(helper_objects$rec_tune_1) %>% + wflow <- workflow() |> + add_recipe(helper_objects$rec_tune_1) |> add_model(helper_objects$svm_mod) - pset <- extract_parameter_set_dials(wflow) %>% + pset <- extract_parameter_set_dials(wflow) |> update(num_comp = dials::num_comp(c(1, 3))) grid <- dials::grid_regular(pset, levels = 3) folds <- rsample::vfold_cv(mtcars) @@ -396,10 +396,10 @@ test_that("tune model and recipe (multi-predict)", { helper_objects <- helper_objects_tune() set.seed(4400) - wflow <- workflow() %>% - add_recipe(helper_objects$rec_tune_1) %>% + wflow <- workflow() |> + add_recipe(helper_objects$rec_tune_1) |> add_model(helper_objects$svm_mod) - pset <- extract_parameter_set_dials(wflow) %>% + pset <- extract_parameter_set_dials(wflow) |> update(num_comp = dials::num_comp(c(2, 3))) grid <- dials::grid_regular(pset, levels = c(3, 2)) folds <- rsample::vfold_cv(mtcars) @@ -420,10 +420,10 @@ test_that('tune model and recipe (parallel_over = "everything")', { helper_objects <- helper_objects_tune() set.seed(4400) - wflow <- workflow() %>% - add_recipe(helper_objects$rec_tune_1) %>% + wflow <- workflow() |> + add_recipe(helper_objects$rec_tune_1) |> add_model(helper_objects$svm_mod) - pset <- extract_parameter_set_dials(wflow) %>% + pset <- extract_parameter_set_dials(wflow) |> update(num_comp = dials::num_comp(c(1, 3))) grid <- dials::grid_regular(pset, levels = 3) folds <- rsample::vfold_cv(mtcars) @@ -454,10 +454,10 @@ test_that("tune recipe only - failure in recipe is caught elegantly", { set.seed(7898) data_folds <- rsample::vfold_cv(mtcars, v = 2) - rec <- recipe(mpg ~ ., data = mtcars) %>% + rec <- recipe(mpg ~ ., data = mtcars) |> step_spline_b(disp, deg_free = tune()) - model <- linear_reg(mode = "regression") %>% + model <- linear_reg(mode = "regression") |> set_engine("lm") # NA values not allowed in recipe @@ -506,7 +506,7 @@ test_that("tune model only - failure in recipe is caught elegantly", { data_folds <- rsample::vfold_cv(mtcars, v = 2) # NA values not allowed in recipe - rec <- recipe(mpg ~ ., data = mtcars) %>% + rec <- recipe(mpg ~ ., data = mtcars) |> step_spline_b(disp, deg_free = NA_real_) cars_grid <- tibble(cost = c(0.01, 0.02)) @@ -577,7 +577,7 @@ test_that("tune model and recipe - failure in recipe is caught elegantly", { set.seed(7898) data_folds <- rsample::vfold_cv(mtcars, v = 2) - rec <- recipe(mpg ~ ., data = mtcars) %>% + rec <- recipe(mpg ~ ., data = mtcars) |> step_spline_b(disp, deg_free = tune()) @@ -642,8 +642,8 @@ test_that("ellipses with tune_grid", { helper_objects <- helper_objects_tune() - wflow <- workflow() %>% - add_recipe(helper_objects$rec_tune_1) %>% + wflow <- workflow() |> + add_recipe(helper_objects$rec_tune_1) |> add_model(helper_objects$lm_mod) folds <- rsample::vfold_cv(mtcars) expect_snapshot( @@ -670,8 +670,8 @@ test_that("retain extra attributes", { helper_objects <- helper_objects_tune() set.seed(4400) - wflow <- workflow() %>% - add_recipe(helper_objects$rec_no_tune_1) %>% + wflow <- workflow() |> + add_recipe(helper_objects$rec_no_tune_1) |> add_model(helper_objects$svm_mod) pset <- extract_parameter_set_dials(wflow) grid <- dials::grid_regular(pset, levels = 3) @@ -690,8 +690,8 @@ test_that("retain extra attributes", { expect_true(inherits(att$metrics, "metric_set")) set.seed(4400) - wflow <- workflow() %>% - add_formula(mpg ~ .) %>% + wflow <- workflow() |> + add_formula(mpg ~ .) |> add_model(helper_objects$svm_mod) pset <- extract_parameter_set_dials(wflow) grid <- dials::grid_regular(pset, levels = 3) @@ -718,8 +718,8 @@ test_that("retain extra attributes", { expect_null(attr(res, "workflow")) expect_true(inherits(attr(res2, "workflow"), "workflow")) - wflow2 <- workflow() %>% - add_recipe(recipes::recipe(mpg ~ ., mtcars[rep(1:32, 3000), ])) %>% + wflow2 <- workflow() |> + add_recipe(recipes::recipe(mpg ~ ., mtcars[rep(1:32, 3000), ])) |> add_model(helper_objects$svm_mod) pset2 <- extract_parameter_set_dials(wflow2) grid2 <- dials::grid_regular(pset2, levels = 3) diff --git a/tests/testthat/test-grid_helpers.R b/tests/testthat/test-grid_helpers.R index dff242c97..55b1c26ea 100644 --- a/tests/testthat/test-grid_helpers.R +++ b/tests/testthat/test-grid_helpers.R @@ -4,7 +4,7 @@ test_that("compute_grid_info - recipe only", { library(parsnip) library(dials) - rec <- recipe(mpg ~ ., mtcars) %>% step_spline_natural(deg_free = tune()) + rec <- recipe(mpg ~ ., mtcars) |> step_spline_natural(deg_free = tune()) wflow <- workflow() wflow <- add_model(wflow, boost_tree(mode = "regression")) @@ -97,7 +97,7 @@ test_that("compute_grid_info - recipe and model (no submodels)", { library(recipes) library(dials) - rec <- recipe(mpg ~ ., mtcars) %>% step_spline_natural(deg_free = tune()) + rec <- recipe(mpg ~ ., mtcars) |> step_spline_natural(deg_free = tune()) spec <- boost_tree(mode = "regression", learn_rate = tune()) wflow <- workflow() @@ -131,7 +131,7 @@ test_that("compute_grid_info - recipe and model (with submodels)", { library(recipes) library(dials) - rec <- recipe(mpg ~ ., mtcars) %>% step_spline_natural(deg_free = tune()) + rec <- recipe(mpg ~ ., mtcars) |> step_spline_natural(deg_free = tune()) spec <- boost_tree(mode = "regression", trees = tune()) wflow <- workflow() @@ -179,7 +179,7 @@ test_that("compute_grid_info - recipe and model (with and without submodels)", { library(recipes) library(dials) - rec <- recipe(mpg ~ ., mtcars) %>% step_spline_natural(deg_free = tune()) + rec <- recipe(mpg ~ ., mtcars) |> step_spline_natural(deg_free = tune()) spec <- boost_tree(mode = "regression", trees = tune(), loss_reduction = tune()) wflow <- workflow() @@ -190,7 +190,7 @@ test_that("compute_grid_info - recipe and model (with and without submodels)", { set.seed(1) param_set <- extract_parameter_set_dials(wflow) grid <- - bind_rows(grid_regular(param_set), grid_space_filling(param_set)) %>% + bind_rows(grid_regular(param_set), grid_space_filling(param_set)) |> arrange(deg_free, loss_reduction, trees) res <- compute_grid_info(wflow, grid) @@ -222,10 +222,10 @@ test_that("compute_grid_info - recipe and model (with and without submodels)", { ) ) expect_equal( - res %>% - mutate(num_models = purrr::map_int(.iter_config, length)) %>% + res |> + mutate(num_models = purrr::map_int(.iter_config, length)) |> summarize(n = sum(num_models), .by = c(deg_free)), - grid %>% count(deg_free) + grid |> count(deg_free) ) expect_named( res, @@ -243,8 +243,8 @@ test_that("compute_grid_info - model (with and without submodels)", { library(dials) rec <- recipe(mpg ~ ., mtcars) - spec <- mars(num_terms = tune(), prod_degree = tune(), prune_method = tune()) %>% - set_mode("classification") %>% + spec <- mars(num_terms = tune(), prod_degree = tune(), prune_method = tune()) |> + set_mode("classification") |> set_engine("earth") wflow <- workflow() @@ -253,7 +253,7 @@ test_that("compute_grid_info - model (with and without submodels)", { set.seed(123) params_grid <- grid_space_filling( - num_terms() %>% range_set(c(1L, 12L)), + num_terms() |> range_set(c(1L, 12L)), prod_degree(), prune_method(values = c("backward", "none", "forward")), size = 7, @@ -305,11 +305,11 @@ test_that("compute_grid_info - recipe and model (no submodels but has inner grid helper_objects <- helper_objects_tune() - wflow <- workflow() %>% - add_recipe(helper_objects$rec_tune_1) %>% + wflow <- workflow() |> + add_recipe(helper_objects$rec_tune_1) |> add_model(helper_objects$svm_mod) - pset <- extract_parameter_set_dials(wflow) %>% + pset <- extract_parameter_set_dials(wflow) |> update(num_comp = dials::num_comp(c(1, 3))) grid <- dials::grid_regular(pset, levels = 3) diff --git a/tests/testthat/test-int_pctl.R b/tests/testthat/test-int_pctl.R index 512fccb0f..4c4fbba19 100644 --- a/tests/testthat/test-int_pctl.R +++ b/tests/testthat/test-int_pctl.R @@ -9,7 +9,7 @@ test_that("percentile intervals - resamples only", { set.seed(13) sac_rs <- vfold_cv(Sacramento) lm_res <- - linear_reg() %>% + linear_reg() |> fit_resamples( log10(price) ~ beds + baths + sqft + type + latitude + longitude, resamples = sac_rs, @@ -50,7 +50,7 @@ test_that("percentile intervals - last fit", { sac_split <- initial_split(Sacramento) lm_res <- - linear_reg() %>% + linear_reg() |> last_fit( log10(price) ~ beds + baths + sqft + type + latitude + longitude, metrics = metric_set(mae), @@ -88,9 +88,9 @@ test_that("percentile intervals - grid + bayes tuning", { cls_rs <- vfold_cv(two_class_dat) c5_res <- - decision_tree(min_n = tune()) %>% - set_engine("C5.0") %>% - set_mode("classification") %>% + decision_tree(min_n = tune()) |> + set_engine("C5.0") |> + set_mode("classification") |> tune_grid( Class ~., resamples = cls_rs, @@ -116,9 +116,9 @@ test_that("percentile intervals - grid + bayes tuning", { set.seed(92) c5_bo_res <- - decision_tree(min_n = tune()) %>% - set_engine("C5.0") %>% - set_mode("classification") %>% + decision_tree(min_n = tune()) |> + set_engine("C5.0") |> + set_mode("classification") |> tune_bayes( Class ~., resamples = cls_rs, @@ -148,9 +148,9 @@ test_that("percentile intervals - grid + bayes tuning", { # ------------------------------------------------------------------------------ c5_mixed_res <- - decision_tree(min_n = tune()) %>% - set_engine("C5.0") %>% - set_mode("classification") %>% + decision_tree(min_n = tune()) |> + set_engine("C5.0") |> + set_mode("classification") |> tune_grid( Class ~., resamples = cls_rs, @@ -190,9 +190,9 @@ test_that("percentile intervals - grid tuning with validation set", { cls_rs <- validation_set(cls_split) c5_res <- - decision_tree(min_n = tune()) %>% - set_engine("C5.0") %>% - set_mode("classification") %>% + decision_tree(min_n = tune()) |> + set_engine("C5.0") |> + set_mode("classification") |> tune_grid( Class ~., resamples = cls_rs, diff --git a/tests/testthat/test-last-fit.R b/tests/testthat/test-last-fit.R index f9b29e8ea..f52bf95e1 100644 --- a/tests/testthat/test-last-fit.R +++ b/tests/testthat/test-last-fit.R @@ -5,10 +5,10 @@ test_that("formula method", { f <- mpg ~ cyl + poly(disp, 2) + hp + drat + wt + qsec + vs + am + gear + carb lm_fit <- lm(f, data = rsample::training(split)) test_pred <- predict(lm_fit, rsample::testing(split)) - rmse_test <- yardstick::rsq_vec(rsample::testing(split) %>% pull(mpg), test_pred) + rmse_test <- yardstick::rsq_vec(rsample::testing(split) |> pull(mpg), test_pred) - res <- parsnip::linear_reg() %>% - parsnip::set_engine("lm") %>% + res <- parsnip::linear_reg() |> + parsnip::set_engine("lm") |> last_fit(f, split) expect_equal(res, .Last.tune.result) @@ -37,12 +37,12 @@ test_that("recipe method", { f <- mpg ~ cyl + poly(disp, 2) + hp + drat + wt + qsec + vs + am + gear + carb lm_fit <- lm(f, data = rsample::training(split)) test_pred <- predict(lm_fit, rsample::testing(split)) - rmse_test <- yardstick::rsq_vec(rsample::testing(split) %>% pull(mpg), test_pred) + rmse_test <- yardstick::rsq_vec(rsample::testing(split) |> pull(mpg), test_pred) - rec <- recipes::recipe(mpg ~ ., data = mtcars) %>% + rec <- recipes::recipe(mpg ~ ., data = mtcars) |> recipes::step_poly(disp) - res <- parsnip::linear_reg() %>% - parsnip::set_engine("lm") %>% + res <- parsnip::linear_reg() |> + parsnip::set_engine("lm") |> last_fit(rec, split) expect_equal( @@ -62,7 +62,7 @@ test_that("recipe method", { test_that("model_fit method", { library(parsnip) - lm_fit <- linear_reg() %>% fit(mpg ~ ., data = mtcars) + lm_fit <- linear_reg() |> fit(mpg ~ ., data = mtcars) expect_snapshot(last_fit(lm_fit), error = TRUE) }) @@ -70,7 +70,7 @@ test_that("model_fit method", { test_that("workflow method", { library(parsnip) - lm_fit <- workflows::workflow(mpg ~ ., linear_reg()) %>% fit(data = mtcars) + lm_fit <- workflows::workflow(mpg ~ ., linear_reg()) |> fit(data = mtcars) expect_snapshot(last_fit(lm_fit), error = TRUE) }) @@ -79,8 +79,8 @@ test_that("collect metrics of last fit", { set.seed(23598723) split <- rsample::initial_split(mtcars) f <- mpg ~ cyl + poly(disp, 2) + hp + drat + wt + qsec + vs + am + gear + carb - res <- parsnip::linear_reg() %>% - parsnip::set_engine("lm") %>% + res <- parsnip::linear_reg() |> + parsnip::set_engine("lm") |> last_fit(f, split) met <- collect_metrics(res) expect_true(inherits(met, "tbl_df")) @@ -96,7 +96,7 @@ test_that("ellipses with last_fit", { f <- mpg ~ cyl + poly(disp, 2) + hp + drat + wt + qsec + vs + am + gear + carb expect_snapshot( - linear_reg() %>% set_engine("lm") %>% last_fit(f, split, something = "wrong") + linear_reg() |> set_engine("lm") |> last_fit(f, split, something = "wrong") ) }) @@ -108,8 +108,8 @@ test_that("argument order gives errors for recipe/formula", { f <- mpg ~ cyl + poly(disp, 2) + hp + drat + wt + qsec + vs + am + gear + carb - rec <- recipes::recipe(mpg ~ ., data = mtcars) %>% recipes::step_poly(disp) - lin_mod <- parsnip::linear_reg() %>% + rec <- recipes::recipe(mpg ~ ., data = mtcars) |> recipes::step_poly(disp) + lin_mod <- parsnip::linear_reg() |> parsnip::set_engine("lm") expect_snapshot(error = TRUE, { @@ -123,12 +123,12 @@ test_that("argument order gives errors for recipe/formula", { test_that("same results of last_fit() and fit() (#300)", { skip_if_not_installed("randomForest") - rf <- parsnip::rand_forest(mtry = 2, trees = 5) %>% - parsnip::set_engine("randomForest") %>% + rf <- parsnip::rand_forest(mtry = 2, trees = 5) |> + parsnip::set_engine("randomForest") |> parsnip::set_mode("regression") - wflow <- workflows::workflow() %>% - workflows::add_model(rf) %>% + wflow <- workflows::workflow() |> + workflows::add_model(rf) |> workflows::add_formula(mpg ~ .) set.seed(23598723) @@ -153,8 +153,8 @@ test_that("`last_fit()` when objects need tuning", { options(width = 200, pillar.advice = FALSE, pillar.min_title_chars = Inf) - rec <- recipe(mpg ~ ., data = mtcars) %>% step_spline_natural(disp, deg_free = tune()) - spec_1 <- linear_reg(penalty = tune()) %>% set_engine("glmnet") + rec <- recipe(mpg ~ ., data = mtcars) |> step_spline_natural(disp, deg_free = tune()) + spec_1 <- linear_reg(penalty = tune()) |> set_engine("glmnet") spec_2 <- linear_reg() wflow_1 <- workflow(rec, spec_1) wflow_2 <- workflow(mpg ~ ., spec_1) @@ -176,10 +176,10 @@ test_that("last_fit() excludes validation set for initial_validation_split objec f <- Sale_Price ~ Gr_Liv_Area + Year_Built lm_fit <- lm(f, data = rsample::training(split)) test_pred <- predict(lm_fit, rsample::testing(split)) - rmse_test <- yardstick::rsq_vec(rsample::testing(split) %>% pull(Sale_Price), test_pred) + rmse_test <- yardstick::rsq_vec(rsample::testing(split) |> pull(Sale_Price), test_pred) - res <- parsnip::linear_reg() %>% - parsnip::set_engine("lm") %>% + res <- parsnip::linear_reg() |> + parsnip::set_engine("lm") |> last_fit(f, split) expect_equal(res, .Last.tune.result) @@ -209,10 +209,10 @@ test_that("last_fit() can include validation set for initial_validation_split ob train_val <- rbind(rsample::training(split), rsample::validation(split)) lm_fit <- lm(f, data = train_val) test_pred <- predict(lm_fit, rsample::testing(split)) - rmse_test <- yardstick::rsq_vec(rsample::testing(split) %>% pull(Sale_Price), test_pred) + rmse_test <- yardstick::rsq_vec(rsample::testing(split) |> pull(Sale_Price), test_pred) - res <- parsnip::linear_reg() %>% - parsnip::set_engine("lm") %>% + res <- parsnip::linear_reg() |> + parsnip::set_engine("lm") |> last_fit(f, split, add_validation_set = TRUE) expect_equal(res, .Last.tune.result) @@ -248,9 +248,9 @@ test_that("can use `last_fit()` with a workflow - postprocessor (requires traini workflows::workflow( y ~ x, parsnip::linear_reg() - ) %>% + ) |> workflows::add_tailor( - tailor::tailor() %>% tailor::adjust_numeric_calibration("linear") + tailor::tailor() |> tailor::adjust_numeric_calibration("linear") ) set.seed(1) @@ -293,9 +293,9 @@ test_that("can use `last_fit()` with a workflow - postprocessor (does not requir workflows::workflow( y ~ x, parsnip::linear_reg() - ) %>% + ) |> workflows::add_tailor( - tailor::tailor() %>% tailor::adjust_numeric_range(lower_limit = 1) + tailor::tailor() |> tailor::adjust_numeric_range(lower_limit = 1) ) set.seed(1) diff --git a/tests/testthat/test-logging.R b/tests/testthat/test-logging.R index b3f12014f..5d89383ab 100644 --- a/tests/testthat/test-logging.R +++ b/tests/testthat/test-logging.R @@ -152,11 +152,11 @@ test_that("logging search info", { expect_silent(check_and_log_flow(ctrl_t, tb_1)) expect_snapshot( error = TRUE, - check_and_log_flow(ctrl_t, tb_1 %>% mutate(.mean = .mean * NA)) + check_and_log_flow(ctrl_t, tb_1 |> mutate(.mean = .mean * NA)) ) expect_snapshot( error = TRUE, - check_and_log_flow(ctrl_t, tb_1 %>% mutate(.mean = .mean * NA) %>% slice(1)) + check_and_log_flow(ctrl_t, tb_1 |> mutate(.mean = .mean * NA) |> slice(1)) ) }) diff --git a/tests/testthat/test-merge.R b/tests/testthat/test-merge.R index 1f62fdeb1..eb81654ee 100644 --- a/tests/testthat/test-merge.R +++ b/tests/testthat/test-merge.R @@ -4,14 +4,14 @@ test_that("recipe merges", { data("Chicago", package = "modeldata") spline_rec <- - recipes::recipe(ridership ~ ., data = head(Chicago)) %>% - recipes::step_date(date) %>% - recipes::step_holiday(date) %>% - recipes::step_rm(date, dplyr::ends_with("away")) %>% - recipes::step_impute_knn(recipes::all_predictors(), neighbors = tune("imputation")) %>% - recipes::step_other(recipes::all_nominal(), threshold = tune()) %>% - recipes::step_dummy(recipes::all_nominal()) %>% - recipes::step_normalize(recipes::all_numeric_predictors()) %>% + recipes::recipe(ridership ~ ., data = head(Chicago)) |> + recipes::step_date(date) |> + recipes::step_holiday(date) |> + recipes::step_rm(date, dplyr::ends_with("away")) |> + recipes::step_impute_knn(recipes::all_predictors(), neighbors = tune("imputation")) |> + recipes::step_other(recipes::all_nominal(), threshold = tune()) |> + recipes::step_dummy(recipes::all_nominal()) |> + recipes::step_normalize(recipes::all_numeric_predictors()) |> recipes::step_spline_b(recipes::all_predictors(), deg_free = tune(), degree = tune()) spline_grid <- tibble::tribble( @@ -54,14 +54,14 @@ test_that("partially recipe merge", { data("Chicago", package = "modeldata") spline_rec <- - recipes::recipe(ridership ~ ., data = head(Chicago)) %>% - recipes::step_date(date) %>% - recipes::step_holiday(date) %>% - recipes::step_rm(date, dplyr::ends_with("away")) %>% - recipes::step_impute_knn(recipes::all_predictors(), neighbors = tune("imputation")) %>% - recipes::step_other(recipes::all_nominal(), threshold = tune()) %>% - recipes::step_dummy(recipes::all_nominal()) %>% - recipes::step_normalize(recipes::all_numeric_predictors()) %>% + recipes::recipe(ridership ~ ., data = head(Chicago)) |> + recipes::step_date(date) |> + recipes::step_holiday(date) |> + recipes::step_rm(date, dplyr::ends_with("away")) |> + recipes::step_impute_knn(recipes::all_predictors(), neighbors = tune("imputation")) |> + recipes::step_other(recipes::all_nominal(), threshold = tune()) |> + recipes::step_dummy(recipes::all_nominal()) |> + recipes::step_normalize(recipes::all_numeric_predictors()) |> recipes::step_spline_b(recipes::all_predictors(), deg_free = tune(), degree = tune()) spline_grid <- tibble::tribble( @@ -104,14 +104,14 @@ test_that("umerged recipe merge", { data("Chicago", package = "modeldata") spline_rec <- - recipes::recipe(ridership ~ ., data = head(Chicago)) %>% - recipes::step_date(date) %>% - recipes::step_holiday(date) %>% - recipes::step_rm(date, dplyr::ends_with("away")) %>% - recipes::step_impute_knn(recipes::all_predictors(), neighbors = tune("imputation")) %>% - recipes::step_other(recipes::all_nominal(), threshold = tune()) %>% - recipes::step_dummy(recipes::all_nominal()) %>% - recipes::step_normalize(recipes::all_numeric_predictors()) %>% + recipes::recipe(ridership ~ ., data = head(Chicago)) |> + recipes::step_date(date) |> + recipes::step_holiday(date) |> + recipes::step_rm(date, dplyr::ends_with("away")) |> + recipes::step_impute_knn(recipes::all_predictors(), neighbors = tune("imputation")) |> + recipes::step_other(recipes::all_nominal(), threshold = tune()) |> + recipes::step_dummy(recipes::all_nominal()) |> + recipes::step_normalize(recipes::all_numeric_predictors()) |> recipes::step_spline_b(recipes::all_predictors(), deg_free = tune(), degree = tune()) bst_grid <- tibble::tibble("funky name \n" = 1:4, rules = rep(c(TRUE, FALSE), each = 2)) @@ -142,7 +142,7 @@ test_that("umerged recipe merge", { test_that("model spec merges", { library(parsnip) bst_model <- - parsnip::boost_tree(mode = "classification", trees = tune("funky name \n")) %>% + parsnip::boost_tree(mode = "classification", trees = tune("funky name \n")) |> parsnip::set_engine("C5.0", rules = tune(), noGlobalPruning = TRUE) bst_grid <- tibble::tibble("funky name \n" = 1:4, rules = rep(c(TRUE, FALSE), each = 2)) @@ -163,7 +163,7 @@ test_that("model spec merges", { # ensure that `grid` can handle list-columns bst_model_obj <- - boost_tree(mode = "classification") %>% + boost_tree(mode = "classification") |> set_args(objective = tune()) bst_grid_obj <- tibble::tibble(objective = list("hey", "there")) @@ -182,7 +182,7 @@ test_that("model spec merges", { test_that("partially model spec merge", { bst_model <- - parsnip::boost_tree(mode = "classification", trees = tune("funky name \n")) %>% + parsnip::boost_tree(mode = "classification", trees = tune("funky name \n")) |> parsnip::set_engine("C5.0", rules = tune(), noGlobalPruning = TRUE) bst_grid <- tibble::tibble("funky name \n" = 1:4, rules = rep(c(TRUE, FALSE), each = 2)) @@ -203,7 +203,7 @@ test_that("partially model spec merge", { test_that("umerged model spec merge", { bst_model <- - parsnip::boost_tree(mode = "classification", trees = tune("funky name \n")) %>% + parsnip::boost_tree(mode = "classification", trees = tune("funky name \n")) |> parsnip::set_engine("C5.0", rules = tune(), noGlobalPruning = TRUE) bst_grid <- tibble::tibble("funky name \n" = 1:4, rules = rep(c(TRUE, FALSE), each = 2)) diff --git a/tests/testthat/test-metric-args.R b/tests/testthat/test-metric-args.R index bf633699b..12e2dc5af 100644 --- a/tests/testthat/test-metric-args.R +++ b/tests/testthat/test-metric-args.R @@ -8,7 +8,7 @@ test_that("metric inputs are checked for regression models", { library(rsample) wflow <- workflow(y ~ X1 + X2, linear_reg()) - knn_spec <- nearest_neighbor(neighbors = tune()) %>% set_mode("regression") + knn_spec <- nearest_neighbor(neighbors = tune()) |> set_mode("regression") wflow_tune <- workflow(y ~ X1 + X2, knn_spec) set.seed(1) @@ -68,7 +68,7 @@ test_that("metric inputs are checked for classification models", { data(two_class_dat, package = "modeldata") wflow <- workflow(Class ~ A + B, logistic_reg()) - knn_spec <- nearest_neighbor(neighbors = tune()) %>% set_mode("classification") + knn_spec <- nearest_neighbor(neighbors = tune()) |> set_mode("classification") wflow_tune <- workflow(Class ~ A + B, knn_spec) set.seed(1) diff --git a/tests/testthat/test-metric-single-selection.R b/tests/testthat/test-metric-single-selection.R index 024b0fd82..5666fedca 100644 --- a/tests/testthat/test-metric-single-selection.R +++ b/tests/testthat/test-metric-single-selection.R @@ -34,20 +34,20 @@ test_that("identify survival metrics", { library(yardstick) expect_false( - metric_set(rmse) %>% - tibble::as_tibble() %>% + metric_set(rmse) |> + tibble::as_tibble() |> tune:::contains_survival_metric() ) expect_true( - metric_set(brier_survival_integrated) %>% - tibble::as_tibble() %>% + metric_set(brier_survival_integrated) |> + tibble::as_tibble() |> tune:::contains_survival_metric() ) expect_true( - metric_set(brier_survival, concordance_survival) %>% - tibble::as_tibble() %>% + metric_set(brier_survival, concordance_survival) |> + tibble::as_tibble() |> tune:::contains_survival_metric() ) diff --git a/tests/testthat/test-min-grid.R b/tests/testthat/test-min-grid.R index 73d44cb02..8faac56f5 100644 --- a/tests/testthat/test-min-grid.R +++ b/tests/testthat/test-min-grid.R @@ -1,7 +1,7 @@ # ------------------------------------------------------------------------------ test_that("boosted tree grid reduction - xgboost", { - mod <- parsnip::boost_tree() %>% + mod <- parsnip::boost_tree() |> parsnip::set_engine("xgboost") # A typical grid @@ -53,7 +53,7 @@ test_that("boosted tree grid reduction - xgboost", { } # different id names - mod_1 <- parsnip::boost_tree(trees = tune("Amos")) %>% + mod_1 <- parsnip::boost_tree(trees = tune("Amos")) |> parsnip::set_engine("xgboost") reg_grid <- expand.grid(Amos = 1:3, min_n = 1:2) reg_grid_smol <- min_grid(mod_1, reg_grid) @@ -70,7 +70,7 @@ test_that("boosted tree grid reduction - xgboost", { expect_equal(all_sub_smol$Amos, 3) expect_equal(all_sub_smol$.submodels[[1]], list(Amos = 1:2)) - mod_2 <- parsnip::boost_tree(trees = tune("Ade Tukunbo")) %>% + mod_2 <- parsnip::boost_tree(trees = tune("Ade Tukunbo")) |> parsnip::set_engine("xgboost") reg_grid <- expand.grid(`Ade Tukunbo` = 1:3, min_n = 1:2, ` \t123` = 10:11) reg_grid_smol <- min_grid(mod_2, reg_grid) @@ -86,7 +86,7 @@ test_that("boosted tree grid reduction - xgboost", { # ------------------------------------------------------------------------------ test_that("boosted tree grid reduction - C5.0", { - mod <- parsnip::boost_tree() %>% parsnip::set_engine("C5.0") + mod <- parsnip::boost_tree() |> parsnip::set_engine("C5.0") # A typical grid reg_grid <- expand.grid(trees = 1:3, min_n = 1:2) @@ -137,7 +137,7 @@ test_that("boosted tree grid reduction - C5.0", { } # different id names - mod_1 <- parsnip::boost_tree(trees = tune("Marco")) %>% + mod_1 <- parsnip::boost_tree(trees = tune("Marco")) |> parsnip::set_engine("C5.0") reg_grid <- expand.grid(Marco = 1:3, min_n = 1:2) reg_grid_smol <- min_grid(mod_1, reg_grid) @@ -154,7 +154,7 @@ test_that("boosted tree grid reduction - C5.0", { expect_equal(all_sub_smol$Marco, 3) expect_equal(all_sub_smol$.submodels[[1]], list(Marco = 1:2)) - mod_2 <- parsnip::boost_tree(trees = tune("Anderson Dawes")) %>% + mod_2 <- parsnip::boost_tree(trees = tune("Anderson Dawes")) |> parsnip::set_engine("C5.0") reg_grid <- expand.grid(`Anderson Dawes` = 1:3, min_n = 1:2, ` \t123` = 10:11) reg_grid_smol <- min_grid(mod_2, reg_grid) @@ -175,7 +175,7 @@ test_that("linear regression grid reduction - glmnet", { # glmnet depends on >= 3.6.0 so we only test locally skip_if_not_installed("glmnet") - mod <- parsnip::linear_reg() %>% parsnip::set_engine("glmnet") + mod <- parsnip::linear_reg() |> parsnip::set_engine("glmnet") # A typical grid reg_grid <- expand.grid(penalty = 1:3, mixture = (1:5) / 5) @@ -232,7 +232,7 @@ test_that("linear regression grid reduction - glmnet", { } # different id names - mod_1 <- parsnip::linear_reg(penalty = tune("Shaddid")) %>% + mod_1 <- parsnip::linear_reg(penalty = tune("Shaddid")) |> parsnip::set_engine("glmnet") reg_grid <- expand.grid(Shaddid = 1:3, mixture = 1:2) reg_grid_smol <- min_grid(mod_1, reg_grid) @@ -249,7 +249,7 @@ test_that("linear regression grid reduction - glmnet", { expect_equal(all_sub_smol$Shaddid, 3) expect_equal(all_sub_smol$.submodels[[1]], list(Shaddid = 1:2)) - mod_2 <- parsnip::linear_reg(penalty = tune("Josephus Miller")) %>% + mod_2 <- parsnip::linear_reg(penalty = tune("Josephus Miller")) |> parsnip::set_engine("glmnet") reg_grid <- expand.grid(`Josephus Miller` = 1:3, mixture = 1:2, ` \t123` = 10:11) reg_grid_smol <- min_grid(mod_2, reg_grid) @@ -269,7 +269,7 @@ test_that("logistic regression grid reduction - glmnet", { # glmnet depends on >= 3.6.0 so we only test locally skip_if_not_installed("glmnet") - mod <- parsnip::logistic_reg() %>% parsnip::set_engine("glmnet") + mod <- parsnip::logistic_reg() |> parsnip::set_engine("glmnet") # A typical grid reg_grid <- expand.grid(penalty = 1:3, mixture = (1:5) / 5) @@ -327,7 +327,7 @@ test_that("logistic regression grid reduction - glmnet", { # different id names - mod_1 <- parsnip::logistic_reg(penalty = tune("Prax")) %>% + mod_1 <- parsnip::logistic_reg(penalty = tune("Prax")) |> parsnip::set_engine("glmnet") reg_grid <- expand.grid(Prax = 1:3, mixture = 1:2) reg_grid_smol <- min_grid(mod_1, reg_grid) @@ -344,7 +344,7 @@ test_that("logistic regression grid reduction - glmnet", { expect_equal(all_sub_smol$Prax, 3) expect_equal(all_sub_smol$.submodels[[1]], list(Prax = 1:2)) - mod_2 <- parsnip::logistic_reg(penalty = tune("Samara Rosenberg")) %>% + mod_2 <- parsnip::logistic_reg(penalty = tune("Samara Rosenberg")) |> parsnip::set_engine("glmnet") reg_grid <- expand.grid(`Samara Rosenberg` = 1:3, mixture = 1:2, ` \t123` = 10:11) reg_grid_smol <- min_grid(mod_2, reg_grid) @@ -360,7 +360,7 @@ test_that("logistic regression grid reduction - glmnet", { # more of a negative control test test_that("logistic regression grid reduction - spark", { reg_grid <- expand.grid(penalty = 1:3, mixture = (1:5) / 5) - reg_grid_smol <- min_grid(parsnip::logistic_reg() %>% + reg_grid_smol <- min_grid(parsnip::logistic_reg() |> parsnip::set_engine("spark"), reg_grid) expect_equal(reg_grid_smol$penalty, reg_grid$penalty) @@ -373,7 +373,7 @@ test_that("logistic regression grid reduction - spark", { # ------------------------------------------------------------------------------ test_that("MARS grid reduction - earth", { - mod <- parsnip::mars() %>% parsnip::set_engine("earth") + mod <- parsnip::mars() |> parsnip::set_engine("earth") # A typical grid reg_grid <- expand.grid(num_terms = 1:3, prod_degree = 1:2) @@ -425,7 +425,7 @@ test_that("MARS grid reduction - earth", { # different id names - mod_1 <- parsnip::mars(num_terms = tune("Filip")) %>% + mod_1 <- parsnip::mars(num_terms = tune("Filip")) |> parsnip::set_engine("earth") reg_grid <- expand.grid(Filip = 1:3, prod_degree = 1:2) reg_grid_smol <- min_grid(mod_1, reg_grid) @@ -442,7 +442,7 @@ test_that("MARS grid reduction - earth", { expect_equal(all_sub_smol$Filip, 3) expect_equal(all_sub_smol$.submodels[[1]], list(Filip = 1:2)) - mod_2 <- parsnip::mars(num_terms = tune("Elvi Okoye")) %>% + mod_2 <- parsnip::mars(num_terms = tune("Elvi Okoye")) |> parsnip::set_engine("earth") reg_grid <- expand.grid(`Elvi Okoye` = 1:3, prod_degree = 1:2, ` \t123` = 10:11) reg_grid_smol <- min_grid(mod_2, reg_grid) @@ -462,7 +462,7 @@ test_that("multinomial regression grid reduction - glmnet", { # glmnet depends on >= 3.6.0 so we only test locally skip_if_not_installed("glmnet") - mod <- parsnip::multinom_reg() %>% parsnip::set_engine("glmnet") + mod <- parsnip::multinom_reg() |> parsnip::set_engine("glmnet") # A typical grid reg_grid <- expand.grid(penalty = 1:3, mixture = (1:5) / 5) @@ -519,7 +519,7 @@ test_that("multinomial regression grid reduction - glmnet", { } # different id names - mod_1 <- parsnip::multinom_reg(penalty = tune("Cortazar")) %>% + mod_1 <- parsnip::multinom_reg(penalty = tune("Cortazar")) |> parsnip::set_engine("glmnet") reg_grid <- expand.grid(Cortazar = 1:3, mixture = 1:2) reg_grid_smol <- min_grid(mod_1, reg_grid) @@ -536,7 +536,7 @@ test_that("multinomial regression grid reduction - glmnet", { expect_equal(all_sub_smol$Cortazar, 3) expect_equal(all_sub_smol$.submodels[[1]], list(Cortazar = 1:2)) - mod_2 <- parsnip::multinom_reg(penalty = tune("Shed Garvey")) %>% + mod_2 <- parsnip::multinom_reg(penalty = tune("Shed Garvey")) |> parsnip::set_engine("glmnet") reg_grid <- expand.grid(`Shed Garvey` = 1:3, mixture = 1:2, ` \t123` = 10:11) reg_grid_smol <- min_grid(mod_2, reg_grid) @@ -554,7 +554,7 @@ test_that("multinomial regression grid reduction - glmnet", { test_that("nearest neighbors grid reduction - kknn", { - mod <- parsnip::nearest_neighbor() %>% parsnip::set_engine("kknn") + mod <- parsnip::nearest_neighbor() |> parsnip::set_engine("kknn") # A typical grid reg_grid <- expand.grid(neighbors = 1:3, dist_power = 1:2) @@ -607,7 +607,7 @@ test_that("nearest neighbors grid reduction - kknn", { # different id names - mod_1 <- parsnip::nearest_neighbor(neighbors = tune("Nami")) %>% + mod_1 <- parsnip::nearest_neighbor(neighbors = tune("Nami")) |> parsnip::set_engine("kknn") reg_grid <- expand.grid(Nami = 1:3, dist_power = 1:2) reg_grid_smol <- min_grid(mod_1, reg_grid) @@ -624,7 +624,7 @@ test_that("nearest neighbors grid reduction - kknn", { expect_equal(all_sub_smol$Nami, 3) expect_equal(all_sub_smol$.submodels[[1]], list(Nami = 1:2)) - mod_2 <- parsnip::nearest_neighbor(neighbors = tune("Michio Pa")) %>% + mod_2 <- parsnip::nearest_neighbor(neighbors = tune("Michio Pa")) |> parsnip::set_engine("kknn") reg_grid <- expand.grid(`Michio Pa` = 1:3, dist_power = 1:2, ` \t123` = 10:11) reg_grid_smol <- min_grid(mod_2, reg_grid) diff --git a/tests/testthat/test-misc.R b/tests/testthat/test-misc.R index 6de3e8af0..ccc578dbe 100644 --- a/tests/testthat/test-misc.R +++ b/tests/testthat/test-misc.R @@ -5,20 +5,20 @@ test_that("determine foreach operator", { data("Chicago", package = "modeldata") spline_rec <- - recipes::recipe(ridership ~ ., data = head(Chicago)) %>% - recipes::step_date(date) %>% - recipes::step_holiday(date) %>% - recipes::step_rm(date, dplyr::ends_with("away")) %>% - recipes::step_impute_knn(recipes::all_predictors(), neighbors = tune("imputation")) %>% - recipes::step_other(recipes::all_nominal(), threshold = tune()) %>% - recipes::step_dummy(recipes::all_nominal()) %>% - recipes::step_normalize(recipes::all_numeric_predictors()) %>% + recipes::recipe(ridership ~ ., data = head(Chicago)) |> + recipes::step_date(date) |> + recipes::step_holiday(date) |> + recipes::step_rm(date, dplyr::ends_with("away")) |> + recipes::step_impute_knn(recipes::all_predictors(), neighbors = tune("imputation")) |> + recipes::step_other(recipes::all_nominal(), threshold = tune()) |> + recipes::step_dummy(recipes::all_nominal()) |> + recipes::step_normalize(recipes::all_numeric_predictors()) |> recipes::step_spline_b(recipes::all_predictors(), deg_free = tune(), degree = tune()) - glmn <- parsnip::linear_reg(penalty = tune(), mixture = tune()) %>% + glmn <- parsnip::linear_reg(penalty = tune(), mixture = tune()) |> parsnip::set_engine("glmnet") chi_wflow <- - workflows::workflow() %>% - workflows::add_recipe(spline_rec) %>% + workflows::workflow() |> + workflows::add_recipe(spline_rec) |> workflows::add_model(glmn) expect_equal(tune:::get_operator(object = chi_wflow)[[1]], foreach::`%do%`) @@ -45,9 +45,9 @@ test_that("in-line formulas on outcome", { # see issues 121 w1 <- - workflow() %>% - add_formula(log(mpg) ~ .) %>% - add_model(parsnip::linear_reg() %>% parsnip::set_engine("lm")) + workflow() |> + add_formula(log(mpg) ~ .) |> + add_model(parsnip::linear_reg() |> parsnip::set_engine("lm")) expect_no_error( f1 <- fit_resamples(w1, resamples = rsample::vfold_cv(mtcars)) @@ -55,9 +55,9 @@ test_that("in-line formulas on outcome", { expect_true(inherits(f1, "resample_results")) w2 <- - workflow() %>% - add_recipe(recipes::recipe(mpg ~ ., data = mtcars) %>% recipes::step_log(mpg)) %>% - add_model(parsnip::linear_reg() %>% parsnip::set_engine("lm")) + workflow() |> + add_recipe(recipes::recipe(mpg ~ ., data = mtcars) |> recipes::step_log(mpg)) |> + add_model(parsnip::linear_reg() |> parsnip::set_engine("lm")) expect_no_error( f2 <- fit_resamples(w2, resamples = rsample::vfold_cv(mtcars)) diff --git a/tests/testthat/test-notes.R b/tests/testthat/test-notes.R index ce475dd17..49615c0b7 100644 --- a/tests/testthat/test-notes.R +++ b/tests/testthat/test-notes.R @@ -12,39 +12,39 @@ test_that("showing notes", { data(Chicago, package = "modeldata") base_wflow <- - workflow() %>% + workflow() |> add_model(linear_reg()) role_rec <- - recipe(ridership ~ ., data = Chicago) %>% - step_date(date, id = "step_date") %>% - update_role(date, new_role = "date") %>% + recipe(ridership ~ ., data = Chicago) |> + step_date(date, id = "step_date") |> + update_role(date, new_role = "date") |> update_role_requirements("date", bake = FALSE) role_bp_wflow <- - base_wflow %>% + base_wflow |> add_recipe(role_rec) set.seed(1) rs <- vfold_cv(Chicago) skip_if(packageVersion("dplyr") < "1.1.1") - expect_snapshot(res_roles <- role_bp_wflow %>% fit_resamples(rs)) + expect_snapshot(res_roles <- role_bp_wflow |> fit_resamples(rs)) expect_snapshot(show_notes(res_roles)) # example with warnings simple_rec <- - recipe(ridership ~ ., data = Chicago) %>% - step_holiday(date) %>% - step_date(date, keep_original_cols = FALSE) %>% + recipe(ridership ~ ., data = Chicago) |> + step_holiday(date) |> + step_date(date, keep_original_cols = FALSE) |> step_dummy(all_nominal_predictors(), one_hot = TRUE) simple_wflow <- - base_wflow %>% + base_wflow |> add_recipe(simple_rec) - expect_snapshot(res_simple <- simple_wflow %>% fit_resamples(rs)) + expect_snapshot(res_simple <- simple_wflow |> fit_resamples(rs)) expect_snapshot(show_notes(res_simple)) # nothing to show @@ -53,10 +53,10 @@ test_that("showing notes", { recipe(ridership ~ Austin + Belmont, data = Chicago) clean_wflow <- - base_wflow %>% + base_wflow |> add_recipe(clean_rec) - res_clean <- clean_wflow %>% fit_resamples(rs) + res_clean <- clean_wflow |> fit_resamples(rs) expect_snapshot(show_notes(.Last.tune.result)) # Get cli lines right @@ -65,7 +65,7 @@ test_that("showing notes", { rs <- rsample::vfold_cv(dat) expect_snapshot( fit_lr <- - parsnip::logistic_reg() %>% + parsnip::logistic_reg() |> fit_resamples(class ~ ., rs) ) expect_snapshot(show_notes(fit_lr)) diff --git a/tests/testthat/test-outcome-names.R b/tests/testthat/test-outcome-names.R index d1c66f36c..6b4c88e2a 100644 --- a/tests/testthat/test-outcome-names.R +++ b/tests/testthat/test-outcome-names.R @@ -30,7 +30,7 @@ test_that("recipes", { expect_equal(outcome_names(rec_3), character(0)) expect_equal(outcome_names(recipes::prep(rec_3)), character(0)) - rec_4 <- recipes::recipe(mpg ~ ., data = mtcars) %>% recipes::step_rm(mpg) + rec_4 <- recipes::recipe(mpg ~ ., data = mtcars) |> recipes::step_rm(mpg) expect_equal(outcome_names(rec_4), "mpg") expect_equal(outcome_names(recipes::prep(rec_4)), character(0)) @@ -38,18 +38,18 @@ test_that("recipes", { expect_equal(outcome_names(rec_5), character()) expect_equal(outcome_names(recipes::prep(rec_5)), character(0)) - rec_6 <- recipes::recipe(mtcars) %>% + rec_6 <- recipes::recipe(mtcars) |> recipes::update_role(mpg, new_role = "outcome") expect_equal(outcome_names(rec_6), "mpg") expect_equal(outcome_names(recipes::prep(rec_6)), "mpg") - rec_7 <- recipes::recipe(mtcars) %>% + rec_7 <- recipes::recipe(mtcars) |> recipes::update_role(mpg, disp, new_role = "outcome") expect_equal(outcome_names(rec_7), c("mpg", "disp")) expect_equal(outcome_names(recipes::prep(rec_7)), c("mpg", "disp")) - rec_8 <- recipes::recipe(mtcars) %>% - recipes::update_role(mpg, disp, new_role = "outcome") %>% + rec_8 <- recipes::recipe(mtcars) |> + recipes::update_role(mpg, disp, new_role = "outcome") |> recipes::step_rm(mpg) expect_equal(outcome_names(rec_8), c("mpg", "disp")) expect_equal(outcome_names(recipes::prep(rec_8)), "disp") @@ -61,20 +61,20 @@ test_that("recipes", { test_that("workflows + recipes", { rec_1 <- recipes::recipe(mpg ~ ., data = mtcars) rec_2 <- recipes::recipe(mpg + wt ~ ., data = mtcars) - rec_3 <- recipes::recipe(mtcars) %>% + rec_3 <- recipes::recipe(mtcars) |> recipes::update_role(mpg, new_role = "outcome") - lm_mod <- parsnip::linear_reg() %>% parsnip::set_engine("lm") - wflow <- workflow() %>% add_model(lm_mod) + lm_mod <- parsnip::linear_reg() |> parsnip::set_engine("lm") + wflow <- workflow() |> add_model(lm_mod) - wflow_1 <- wflow %>% add_recipe(rec_1) + wflow_1 <- wflow |> add_recipe(rec_1) expect_equal(outcome_names(wflow_1), "mpg") expect_equal(outcome_names(parsnip::fit(wflow_1, mtcars)), "mpg") - wflow_2 <- wflow %>% add_recipe(rec_2) + wflow_2 <- wflow |> add_recipe(rec_2) expect_equal(outcome_names(wflow_2), c("mpg", "wt")) expect_equal(outcome_names(parsnip::fit(wflow_2, mtcars)), c("mpg", "wt")) - wflow_3 <- wflow %>% add_recipe(rec_3) + wflow_3 <- wflow |> add_recipe(rec_3) expect_equal(outcome_names(wflow_3), "mpg") expect_equal(outcome_names(parsnip::fit(wflow_3, mtcars)), "mpg") }) @@ -83,14 +83,14 @@ test_that("workflows + recipes", { ## ----------------------------------------------------------------------------- test_that("workflows + formulas", { - lm_mod <- parsnip::linear_reg() %>% parsnip::set_engine("lm") - wflow <- workflow() %>% add_model(lm_mod) + lm_mod <- parsnip::linear_reg() |> parsnip::set_engine("lm") + wflow <- workflow() |> add_model(lm_mod) - wflow_1 <- wflow %>% add_formula(mpg ~ .) + wflow_1 <- wflow |> add_formula(mpg ~ .) expect_equal(outcome_names(wflow_1), "mpg") expect_equal(outcome_names(parsnip::fit(wflow_1, mtcars)), "mpg") - wflow_2 <- wflow %>% add_formula(mpg + wt ~ .) + wflow_2 <- wflow |> add_formula(mpg + wt ~ .) expect_equal(outcome_names(wflow_2), c("mpg", "wt")) expect_equal(outcome_names(parsnip::fit(wflow_2, mtcars)), c("mpg", "wt")) }) @@ -102,11 +102,11 @@ test_that("tune_results objects", { set.seed(6735) folds <- rsample::vfold_cv(mtcars, v = 2) - lin_mod <- parsnip::linear_reg() %>% + lin_mod <- parsnip::linear_reg() |> parsnip::set_engine("lm") - res <- lin_mod %>% - fit_resamples(mpg ~ ., folds) %>% + res <- lin_mod |> + fit_resamples(mpg ~ ., folds) |> outcome_names() expect_equal(res, "mpg") }) diff --git a/tests/testthat/test-param_set.R b/tests/testthat/test-param_set.R index 2bfdc91fb..eb76172a5 100644 --- a/tests/testthat/test-param_set.R +++ b/tests/testthat/test-param_set.R @@ -24,9 +24,9 @@ test_that("parameters.recipe() still works after deprecation", { data("Chicago", package = "modeldata") spline_rec <- - recipes::recipe(ridership ~ ., data = head(Chicago)) %>% - recipes::step_impute_knn(recipes::all_predictors(), neighbors = tune("imputation")) %>% - recipes::step_other(recipes::all_nominal(), threshold = tune()) %>% + recipes::recipe(ridership ~ ., data = head(Chicago)) |> + recipes::step_impute_knn(recipes::all_predictors(), neighbors = tune("imputation")) |> + recipes::step_other(recipes::all_nominal(), threshold = tune()) |> recipes::step_spline_b(recipes::all_predictors(), deg_free = tune(), degree = tune()) spline_info <- dials::parameters(spline_rec) @@ -41,7 +41,7 @@ test_that("parameters.model_spec() still works after deprecation", { skip_if_not_installed("parsnip") bst_model <- - parsnip::boost_tree(mode = "classification", trees = tune("funky name \n")) %>% + parsnip::boost_tree(mode = "classification", trees = tune("funky name \n")) |> parsnip::set_engine("C5.0", rules = tune(), noGlobalPruning = TRUE) c5_info <- dials::parameters(bst_model) diff --git a/tests/testthat/test-predictions.R b/tests/testthat/test-predictions.R index 718515790..17687c91f 100644 --- a/tests/testthat/test-predictions.R +++ b/tests/testthat/test-predictions.R @@ -1,7 +1,7 @@ test_that("recipe only", { load(test_path("data", "test_objects.RData")) - grid <- collect_metrics(mt_spln_lm_grid) %>% - dplyr::select(deg_free) %>% + grid <- collect_metrics(mt_spln_lm_grid) |> + dplyr::select(deg_free) |> dplyr::distinct() purrr::map2( @@ -12,11 +12,11 @@ test_that("recipe only", { ) # initial values for Bayes opt - init <- mt_spln_lm_bo %>% dplyr::filter(.iter == 0) + init <- mt_spln_lm_bo |> dplyr::filter(.iter == 0) init_grid <- - collect_metrics(mt_spln_lm_bo) %>% - dplyr::filter(.iter == 0) %>% - dplyr::select(deg_free) %>% + collect_metrics(mt_spln_lm_bo) |> + dplyr::filter(.iter == 0) |> + dplyr::select(deg_free) |> dplyr::distinct() purrr::map2( @@ -27,8 +27,8 @@ test_that("recipe only", { ) # Now search iterations with a dummy grid - bo <- mt_spln_lm_bo %>% dplyr::filter(.iter > 0) - bo_grid <- init_grid %>% dplyr::slice(1) + bo <- mt_spln_lm_bo |> dplyr::filter(.iter > 0) + bo_grid <- init_grid |> dplyr::slice(1) purrr::map2( bo$splits, @@ -43,8 +43,8 @@ test_that("recipe only", { test_that("model only", { load(test_path("data", "test_objects.RData")) grid <- - collect_metrics(mt_knn_grid) %>% - dplyr::select(neighbors) %>% + collect_metrics(mt_knn_grid) |> + dplyr::select(neighbors) |> dplyr::distinct() purrr::map2( @@ -55,11 +55,11 @@ test_that("model only", { ) # initial values for Bayes opt - init <- mt_knn_bo %>% dplyr::filter(.iter == 0) + init <- mt_knn_bo |> dplyr::filter(.iter == 0) init_grid <- - collect_metrics(mt_knn_bo) %>% - dplyr::filter(.iter == 0) %>% - dplyr::select(neighbors) %>% + collect_metrics(mt_knn_bo) |> + dplyr::filter(.iter == 0) |> + dplyr::select(neighbors) |> distinct() purrr::map2( @@ -70,8 +70,8 @@ test_that("model only", { ) # Now search iterations with a dummy grid - bo <- mt_knn_bo %>% dplyr::filter(.iter > 0) - bo_grid <- init_grid %>% dplyr::slice(1) + bo <- mt_knn_bo |> dplyr::filter(.iter > 0) + bo_grid <- init_grid |> dplyr::slice(1) purrr::map2( bo$splits, @@ -87,8 +87,8 @@ test_that("model only", { test_that("model and recipe", { load(test_path("data", "test_objects.RData")) grid <- - collect_metrics(mt_spln_knn_grid) %>% - dplyr::select(deg_free, neighbors) %>% + collect_metrics(mt_spln_knn_grid) |> + dplyr::select(deg_free, neighbors) |> dplyr::distinct() purrr::map2( @@ -99,11 +99,11 @@ test_that("model and recipe", { ) # initial values for Bayes opt - init <- mt_spln_knn_bo %>% dplyr::filter(.iter == 0) + init <- mt_spln_knn_bo |> dplyr::filter(.iter == 0) init_grid <- - collect_metrics(mt_spln_knn_bo) %>% - dplyr::filter(.iter == 0) %>% - dplyr::select(deg_free, neighbors) %>% + collect_metrics(mt_spln_knn_bo) |> + dplyr::filter(.iter == 0) |> + dplyr::select(deg_free, neighbors) |> dplyr::distinct() purrr::map2( @@ -114,8 +114,8 @@ test_that("model and recipe", { ) # Now search iterations with a dummy grid - bo <- mt_spln_knn_bo %>% dplyr::filter(.iter > 0) - bo_grid <- init_grid %>% dplyr::slice(1) + bo <- mt_spln_knn_bo |> dplyr::filter(.iter > 0) + bo_grid <- init_grid |> dplyr::slice(1) purrr::map2( bo$splits, diff --git a/tests/testthat/test-pretty.R b/tests/testthat/test-pretty.R index efc9f2245..59b9ab0f0 100644 --- a/tests/testthat/test-pretty.R +++ b/tests/testthat/test-pretty.R @@ -1,14 +1,14 @@ test_that("pretty tune objects", { expect_equal( - readRDS(test_path("data", "knn_results.rds")) %>% pretty(), + readRDS(test_path("data", "knn_results.rds")) |> pretty(), "10-fold cross-validation repeated 5 times" ) expect_equal( - ames_grid_search %>% pretty(), + ames_grid_search |> pretty(), "10-fold cross-validation using stratification" ) expect_equal( - ames_iter_search %>% pretty(), + ames_iter_search |> pretty(), "10-fold cross-validation using stratification" ) }) diff --git a/tests/testthat/test-resample.R b/tests/testthat/test-resample.R index f3222ebb2..2ed4ed1b5 100644 --- a/tests/testthat/test-resample.R +++ b/tests/testthat/test-resample.R @@ -4,10 +4,10 @@ test_that("`fit_resamples()` returns a `resample_result` object", { set.seed(6735) folds <- rsample::vfold_cv(mtcars, v = 2) - lin_mod <- parsnip::linear_reg() %>% + lin_mod <- parsnip::linear_reg() |> parsnip::set_engine("lm") - result <- lin_mod %>% + result <- lin_mod |> fit_resamples(mpg ~ ., folds) expect_s3_class(result, "resample_results") @@ -22,11 +22,11 @@ test_that("can use `fit_resamples()` with a formula", { set.seed(6735) folds <- rsample::vfold_cv(mtcars, v = 2) - lin_mod <- parsnip::linear_reg() %>% + lin_mod <- parsnip::linear_reg() |> parsnip::set_engine("lm") expect_warning( - result <- lin_mod %>% + result <- lin_mod |> fit_resamples(mpg ~ ., folds), NA ) @@ -40,11 +40,11 @@ test_that("can use `fit_resamples()` with a recipe", { set.seed(6735) folds <- rsample::vfold_cv(mtcars, v = 2) - rec <- recipes::recipe(mpg ~ ., data = mtcars) %>% - recipes::step_spline_natural(disp) %>% + rec <- recipes::recipe(mpg ~ ., data = mtcars) |> + recipes::step_spline_natural(disp) |> recipes::step_spline_natural(wt) - lin_mod <- linear_reg() %>% + lin_mod <- linear_reg() |> set_engine("lm") # Ensure the recipes are prepped and returned @@ -66,15 +66,15 @@ test_that("can use `fit_resamples()` with a workflow - recipe", { set.seed(6735) folds <- rsample::vfold_cv(mtcars, v = 2) - rec <- recipes::recipe(mpg ~ ., data = mtcars) %>% - recipes::step_spline_natural(disp) %>% + rec <- recipes::recipe(mpg ~ ., data = mtcars) |> + recipes::step_spline_natural(disp) |> recipes::step_spline_natural(wt) - lin_mod <- parsnip::linear_reg() %>% + lin_mod <- parsnip::linear_reg() |> parsnip::set_engine("lm") - workflow <- workflow() %>% - add_recipe(rec) %>% + workflow <- workflow() |> + add_recipe(rec) |> add_model(lin_mod) expect <- fit_resamples(lin_mod, rec, folds) @@ -88,11 +88,11 @@ test_that("can use `fit_resamples()` with a workflow - variables", { set.seed(6735) folds <- rsample::vfold_cv(mtcars, v = 2) - lin_mod <- parsnip::linear_reg() %>% + lin_mod <- parsnip::linear_reg() |> parsnip::set_engine("lm") - workflow <- workflow() %>% - add_variables(mpg, c(cyl, disp)) %>% + workflow <- workflow() |> + add_variables(mpg, c(cyl, disp)) |> add_model(lin_mod) expect <- fit_resamples(lin_mod, mpg ~ cyl + disp, folds) @@ -106,11 +106,11 @@ test_that("can use `fit_resamples()` with a workflow - formula", { set.seed(6735) folds <- rsample::vfold_cv(mtcars, v = 2) - lin_mod <- parsnip::linear_reg() %>% + lin_mod <- parsnip::linear_reg() |> parsnip::set_engine("lm") - workflow <- workflow() %>% - add_formula(mpg ~ cyl + disp) %>% + workflow <- workflow() |> + add_formula(mpg ~ cyl + disp) |> add_model(lin_mod) expect <- fit_resamples(lin_mod, mpg ~ cyl + disp, folds) @@ -124,11 +124,11 @@ test_that("extracted workflow is finalized", { set.seed(6735) folds <- rsample::vfold_cv(mtcars, v = 2) - lin_mod <- parsnip::linear_reg() %>% + lin_mod <- parsnip::linear_reg() |> parsnip::set_engine("lm") - workflow <- workflow() %>% - add_variables(mpg, c(cyl, disp)) %>% + workflow <- workflow() |> + add_variables(mpg, c(cyl, disp)) |> add_model(lin_mod) control <- control_resamples(extract = identity) @@ -154,9 +154,9 @@ test_that("can use `fit_resamples()` with a workflow - postprocessor (requires t workflows::workflow( y ~ x, parsnip::linear_reg() - ) %>% + ) |> workflows::add_tailor( - tailor::tailor() %>% tailor::adjust_numeric_calibration("linear") + tailor::tailor() |> tailor::adjust_numeric_calibration("linear") ) set.seed(1) @@ -168,13 +168,13 @@ test_that("can use `fit_resamples()` with a workflow - postprocessor (requires t ) tune_preds <- - collect_predictions(tune_res) %>% + collect_predictions(tune_res) |> dplyr::filter(id == "Fold1") tune_wflow <- - collect_extracts(tune_res) %>% - pull(.extracts) %>% - `[[`(1) + collect_extracts(tune_res) |> + pull(.extracts) |> + purrr::pluck(1) # mock `tune::tune_grid_loop_iter`'s RNG scheme set.seed(1) @@ -215,9 +215,9 @@ test_that("can use `fit_resamples()` with a workflow - postprocessor (no trainin workflows::workflow( y ~ x, parsnip::linear_reg() - ) %>% + ) |> workflows::add_tailor( - tailor::tailor() %>% tailor::adjust_numeric_range(lower_limit = 1) + tailor::tailor() |> tailor::adjust_numeric_range(lower_limit = 1) ) set.seed(1) @@ -229,13 +229,13 @@ test_that("can use `fit_resamples()` with a workflow - postprocessor (no trainin ) tune_preds <- - collect_predictions(tune_res) %>% + collect_predictions(tune_res) |> dplyr::filter(id == "Fold1") tune_wflow <- - collect_extracts(tune_res) %>% - pull(.extracts) %>% - `[[`(1) + collect_extracts(tune_res) |> + pull(.extracts) |> + purrr::pluck(1) # mock `tune::tune_grid_loop_iter`'s RNG scheme set.seed(1) @@ -259,10 +259,10 @@ test_that("failure in recipe is caught elegantly", { set.seed(6735) folds <- rsample::vfold_cv(mtcars, v = 2) - rec <- recipes::recipe(mpg ~ ., data = mtcars) %>% + rec <- recipes::recipe(mpg ~ ., data = mtcars) |> recipes::step_spline_natural(disp, deg_free = NA_real_) - lin_mod <- parsnip::linear_reg() %>% + lin_mod <- parsnip::linear_reg() |> parsnip::set_engine("lm") control <- control_resamples(extract = function(x) x, save_pred = TRUE) @@ -289,11 +289,11 @@ test_that("failure in variables tidyselect specification is caught elegantly", { set.seed(6735) folds <- rsample::vfold_cv(mtcars, v = 2) - lin_mod <- parsnip::linear_reg() %>% + lin_mod <- parsnip::linear_reg() |> parsnip::set_engine("lm") - workflow <- workflow() %>% - add_model(lin_mod) %>% + workflow <- workflow() |> + add_model(lin_mod) |> add_variables(mpg, foobar) control <- control_resamples(extract = function(x) x, save_pred = TRUE) @@ -322,7 +322,7 @@ test_that("classification models generate correct error message", { rec <- recipes::recipe(vs ~ ., data = mtcars) - log_mod <- parsnip::logistic_reg() %>% + log_mod <- parsnip::logistic_reg() |> parsnip::set_engine("glm") control <- control_resamples(extract = function(x) x, save_pred = TRUE) @@ -352,7 +352,7 @@ test_that("`tune_grid()` falls back to `fit_resamples()` - formula", { set.seed(6735) folds <- rsample::vfold_cv(mtcars, v = 2) - lin_mod <- parsnip::linear_reg() %>% + lin_mod <- parsnip::linear_reg() |> parsnip::set_engine("lm") expect <- fit_resamples(lin_mod, mpg ~ ., folds) @@ -368,11 +368,11 @@ test_that("`tune_grid()` falls back to `fit_resamples()` - workflow variables", set.seed(6735) folds <- rsample::vfold_cv(mtcars, v = 2) - lin_mod <- parsnip::linear_reg() %>% + lin_mod <- parsnip::linear_reg() |> parsnip::set_engine("lm") - wf <- workflow() %>% - add_model(lin_mod) %>% + wf <- workflow() |> + add_model(lin_mod) |> add_variables(mpg, c(cyl, disp)) expect <- fit_resamples(wf, folds) @@ -388,14 +388,14 @@ test_that("`tune_grid()` ignores `grid` if there are no tuning parameters", { set.seed(6735) folds <- rsample::vfold_cv(mtcars, v = 2) - lin_mod <- parsnip::linear_reg() %>% + lin_mod <- parsnip::linear_reg() |> parsnip::set_engine("lm") - expect <- lin_mod %>% + expect <- lin_mod |> fit_resamples(mpg ~ ., folds) expect_snapshot( - result <- lin_mod %>% tune_grid(mpg ~ ., grid = data.frame(x = 1), folds) + result <- lin_mod |> tune_grid(mpg ~ ., grid = data.frame(x = 1), folds) ) expect_equal(collect_metrics(expect), collect_metrics(result)) @@ -408,10 +408,10 @@ test_that("cannot autoplot `fit_resamples()` results", { set.seed(6735) folds <- rsample::vfold_cv(mtcars, v = 2) - lin_mod <- parsnip::linear_reg() %>% + lin_mod <- parsnip::linear_reg() |> parsnip::set_engine("lm") - result <- lin_mod %>% + result <- lin_mod |> fit_resamples(mpg ~ ., folds) expect_snapshot(error = TRUE, { @@ -422,11 +422,11 @@ test_that("cannot autoplot `fit_resamples()` results", { test_that("ellipses with fit_resamples", { folds <- rsample::vfold_cv(mtcars, v = 2) - lin_mod <- parsnip::linear_reg() %>% + lin_mod <- parsnip::linear_reg() |> parsnip::set_engine("lm") expect_snapshot( - lin_mod %>% fit_resamples(mpg ~ ., folds, something = "wrong") + lin_mod |> fit_resamples(mpg ~ ., folds, something = "wrong") ) }) @@ -436,11 +436,11 @@ test_that("argument order gives errors for recipe/formula", { set.seed(6735) folds <- rsample::vfold_cv(mtcars, v = 2) - rec <- recipes::recipe(mpg ~ ., data = mtcars) %>% - recipes::step_spline_natural(disp) %>% + rec <- recipes::recipe(mpg ~ ., data = mtcars) |> + recipes::step_spline_natural(disp) |> recipes::step_spline_natural(wt) - lin_mod <- parsnip::linear_reg() %>% + lin_mod <- parsnip::linear_reg() |> parsnip::set_engine("lm") expect_snapshot(error = TRUE, { @@ -459,10 +459,10 @@ test_that("retain extra attributes", { set.seed(6735) folds <- rsample::vfold_cv(mtcars, v = 2) - lin_mod <- parsnip::linear_reg() %>% + lin_mod <- parsnip::linear_reg() |> parsnip::set_engine("lm") - res <- lin_mod %>% + res <- lin_mod |> fit_resamples(mpg ~ ., folds) att <- attributes(res) @@ -476,7 +476,7 @@ test_that("retain extra attributes", { expect_true(inherits(att$parameters, "parameters")) expect_true(inherits(att$metrics, "metric_set")) - res2 <- lin_mod %>% + res2 <- lin_mod |> fit_resamples( mpg ~ ., folds, @@ -497,8 +497,8 @@ test_that("retain extra attributes", { test_that("`fit_resamples()` when objects need tuning", { - rec <- recipe(mpg ~ ., data = mtcars) %>% step_spline_natural(disp, deg_free = tune()) - spec_1 <- linear_reg(penalty = tune()) %>% set_engine("glmnet") + rec <- recipe(mpg ~ ., data = mtcars) |> step_spline_natural(disp, deg_free = tune()) + spec_1 <- linear_reg(penalty = tune()) |> set_engine("glmnet") spec_2 <- linear_reg() wflow_1 <- workflow(rec, spec_1) wflow_2 <- workflow(mpg ~ ., spec_1) diff --git a/tests/testthat/test-schedule.R b/tests/testthat/test-schedule.R index 5eda57f33..e716c666d 100644 --- a/tests/testthat/test-schedule.R +++ b/tests/testthat/test-schedule.R @@ -94,7 +94,7 @@ test_that("`schedule_predict_stage_i()` works with: with submodel, with post-pro grid_predict_stage <- list( tibble::tibble(trees = 1L, lower_limit = 1L), tibble::tibble(trees = c(2L, 2L), lower_limit = 1:2) - ) %>% + ) |> purrr::list_rbind() schedule <- schedule_predict_stage_i(grid_predict_stage, param_info) @@ -166,7 +166,7 @@ test_that("`schedule_model_stage_i()` works with only submodel: no non-submodel, list(c("trees", "post_stage")) ) expect_identical( - schedule$predict_stage[[1]] %>% pull("trees"), + schedule$predict_stage[[1]] |> pull("trees"), 1:2 ) }) @@ -190,7 +190,7 @@ test_that("`schedule_model_stage_i()` works with only post: no non-submodel, no list(c("post_stage")) ) expect_identical( - schedule$predict_stage[[1]] %>% pull("post_stage"), + schedule$predict_stage[[1]] |> pull("post_stage"), list(tibble::tibble(lower_limit = 1:2)) ) }) @@ -205,7 +205,7 @@ test_that("`schedule_model_stage_i()` works with both model types only: with non tibble::tibble(trees = 1:2, min_n = 1L), tibble::tibble(trees = 2L, min_n = 2L), tibble::tibble(trees = 2:3, min_n = 3L) - ) %>% + ) |> purrr::list_rbind() schedule <- schedule_model_stage_i(grid_model_stage, param_info, wflow) @@ -215,7 +215,7 @@ test_that("`schedule_model_stage_i()` works with both model types only: with non expect_identical(schedule$trees, c(2L, 2L, 3L)) expect_identical(schedule$min_n, 1:3) expect_identical( - purrr::map_chr(schedule$predict_stage, ~ class(.)[1]) %>% unique(), + purrr::map_chr(schedule$predict_stage, ~ class(.)[1]) |> unique(), "tbl_df" ) expect_identical( @@ -227,26 +227,26 @@ test_that("`schedule_model_stage_i()` works with both model types only: with non ) ) expect_identical( - schedule %>% - filter(min_n == 1) %>% - pull("predict_stage") %>% - purrr::pluck(1) %>% + schedule |> + filter(min_n == 1) |> + pull("predict_stage") |> + purrr::pluck(1) |> pull("trees"), 1:2 ) expect_identical( - schedule %>% - filter(min_n == 2) %>% - pull("predict_stage") %>% - purrr::pluck(1) %>% + schedule |> + filter(min_n == 2) |> + pull("predict_stage") |> + purrr::pluck(1) |> pull("trees"), 2L ) expect_identical( - schedule %>% - filter(min_n == 3) %>% - pull("predict_stage") %>% - purrr::pluck(1) %>% + schedule |> + filter(min_n == 3) |> + pull("predict_stage") |> + purrr::pluck(1) |> pull("trees"), 2:3 ) @@ -262,7 +262,7 @@ test_that("`schedule_model_stage_i()` works without submodel: with non-submodel, tibble::tibble(min_n = 1L, lower_limit = 1:2), tibble::tibble(min_n = 2L, lower_limit = 1:3), tibble::tibble(min_n = 3L, lower_limit = 3L) - ) %>% + ) |> purrr::list_rbind() schedule <- schedule_model_stage_i(grid_model_stage, param_info, wflow) @@ -270,42 +270,42 @@ test_that("`schedule_model_stage_i()` works without submodel: with non-submodel, expect_identical(nrow(schedule), 3L) expect_identical(schedule$min_n, 1:3) expect_identical( - purrr::map_chr(schedule$predict_stage, ~ class(.)[1]) %>% unique(), + purrr::map_chr(schedule$predict_stage, ~ class(.)[1]) |> unique(), "tbl_df" ) expect_identical( - purrr::map(schedule$predict_stage, names) %>% - purrr::list_c() %>% + purrr::map(schedule$predict_stage, names) |> + purrr::list_c() |> unique(), "post_stage" ) expect_identical( - schedule %>% - filter(min_n == 1) %>% - pull("predict_stage") %>% - purrr::pluck(1) %>% - pull("post_stage") %>% - purrr::pluck(1) %>% + schedule |> + filter(min_n == 1) |> + pull("predict_stage") |> + purrr::pluck(1) |> + pull("post_stage") |> + purrr::pluck(1) |> pull("lower_limit"), 1:2 ) expect_identical( - schedule %>% - filter(min_n == 2) %>% - pull("predict_stage") %>% - purrr::pluck(1) %>% - pull("post_stage") %>% - purrr::pluck(1) %>% + schedule |> + filter(min_n == 2) |> + pull("predict_stage") |> + purrr::pluck(1) |> + pull("post_stage") |> + purrr::pluck(1) |> pull("lower_limit"), 1:3 ) expect_identical( - schedule %>% - filter(min_n == 3) %>% - pull("predict_stage") %>% - purrr::pluck(1) %>% - pull("post_stage") %>% - purrr::pluck(1) %>% + schedule |> + filter(min_n == 3) |> + pull("predict_stage") |> + purrr::pluck(1) |> + pull("post_stage") |> + purrr::pluck(1) |> pull("lower_limit"), 3L ) @@ -330,7 +330,7 @@ test_that("`schedule_model_stage_i()` works everything: with non-submodel, with ), # another row to be combined under min_n = 1 tibble::tibble(min_n = 1L, trees = 3L, lower_limit = 4L) - ) %>% + ) |> purrr::list_rbind() schedule <- schedule_model_stage_i(grid_model_stage, param_info, wflow) @@ -338,60 +338,60 @@ test_that("`schedule_model_stage_i()` works everything: with non-submodel, with expect_identical(nrow(schedule), 2L) expect_identical(schedule$min_n, 1:2) expect_identical( - purrr::map_chr(schedule$predict_stage, ~ class(.)[1]) %>% unique(), + purrr::map_chr(schedule$predict_stage, ~ class(.)[1]) |> unique(), "tbl_df" ) expect_identical( - purrr::map(schedule$predict_stage, names) %>% - purrr::list_c() %>% + purrr::map(schedule$predict_stage, names) |> + purrr::list_c() |> unique(), c("trees", "post_stage") ) expect_identical( - schedule %>% - filter(min_n == 1) %>% + schedule |> + filter(min_n == 1) |> select(-predict_stage), tibble::tibble(trees = 3L, min_n = 1L) ) expect_identical( - schedule %>% - filter(min_n == 1) %>% - pull(predict_stage) %>% - purrr::pluck(1) %>% + schedule |> + filter(min_n == 1) |> + pull(predict_stage) |> + purrr::pluck(1) |> pull(trees), 1:3 ) expect_identical( - schedule %>% - filter(min_n == 1) %>% - pull("predict_stage") %>% - purrr::pluck(1) %>% - pull("post_stage") %>% - purrr::list_rbind() %>% + schedule |> + filter(min_n == 1) |> + pull("predict_stage") |> + purrr::pluck(1) |> + pull("post_stage") |> + purrr::list_rbind() |> pull("lower_limit"), c(1:2, 1:2, 4L) ) expect_identical( - schedule %>% - filter(min_n == 2) %>% + schedule |> + filter(min_n == 2) |> select(-predict_stage), tibble::tibble(trees = 2L, min_n = 2L) ) expect_identical( - schedule %>% - filter(min_n == 2) %>% - pull(predict_stage) %>% - purrr::pluck(1) %>% + schedule |> + filter(min_n == 2) |> + pull(predict_stage) |> + purrr::pluck(1) |> pull(trees), 1:2 ) expect_identical( - schedule %>% - filter(min_n == 2) %>% - pull("predict_stage") %>% - purrr::pluck(1) %>% - pull("post_stage") %>% - purrr::list_rbind() %>% + schedule |> + filter(min_n == 2) |> + pull("predict_stage") |> + purrr::pluck(1) |> + pull("post_stage") |> + purrr::list_rbind() |> pull("lower_limit"), c(1L, 1:3) ) @@ -425,7 +425,7 @@ test_that("`schedule_stages()` works with preprocessing", { expect_named(schedule, c("threshold", "disp_df", "model_stage")) expect_identical(nrow(schedule), 4L) expect_identical( - schedule %>% dplyr::select(-model_stage), + schedule |> dplyr::select(-model_stage), grid ) }) @@ -491,8 +491,8 @@ test_that("grid processing schedule - recipe only", { wflow_pre_only <- workflow(rec_tune, parsnip::logistic_reg()) grid_pre_only <- - extract_parameter_set_dials(wflow_pre_only) %>% - dials::grid_regular(levels = 3) %>% + extract_parameter_set_dials(wflow_pre_only) |> + dials::grid_regular(levels = 3) |> arrange(threshold, disp_df) sched_pre_only <- schedule_grid(grid_pre_only, wflow_pre_only) @@ -502,9 +502,9 @@ test_that("grid processing schedule - recipe only", { # All of the other nested tibbles should be empty expect_equal( - sched_pre_only %>% - tidyr::unnest(model_stage) %>% - tidyr::unnest(predict_stage) %>% + sched_pre_only |> + tidyr::unnest(model_stage) |> + tidyr::unnest(predict_stage) |> tidyr::unnest(post_stage), grid_pre_only ) @@ -519,7 +519,7 @@ test_that("grid processing schedule - recipe only", { test_that("grid processing schedule - model only, no submodels", { wflow_rf_only <- workflow(outcome ~ ., mod_tune_no_submodel) grid_rf_only <- - extract_parameter_set_dials(wflow_rf_only) %>% + extract_parameter_set_dials(wflow_rf_only) |> dials::grid_regular(levels = 3) sched_rf_only <- schedule_grid(grid_rf_only, wflow_rf_only) @@ -532,8 +532,8 @@ test_that("grid processing schedule - model only, no submodels", { # No real need for the loop here expect_named(sched_rf_only$model_stage[[i]], c("min_n", "predict_stage")) expect_equal( - sched_rf_only$model_stage[[i]] %>% - tidyr::unnest(predict_stage) %>% + sched_rf_only$model_stage[[i]] |> + tidyr::unnest(predict_stage) |> tidyr::unnest(post_stage), grid_rf_only ) @@ -548,11 +548,11 @@ test_that("grid processing schedule - model only, no submodels", { test_that("grid processing schedule - model only, submodels, regular grid", { wflow_bst <- workflow(outcome ~ ., mod_tune) - grid_bst <- extract_parameter_set_dials(wflow_bst) %>% + grid_bst <- extract_parameter_set_dials(wflow_bst) |> dials::grid_regular(levels = 3) - min_n_only <- grid_bst %>% dplyr::distinct(min_n) %>% dplyr::arrange(min_n) - trees_only <- grid_bst %>% dplyr::distinct(trees) %>% dplyr::arrange(trees) + min_n_only <- grid_bst |> dplyr::distinct(min_n) |> dplyr::arrange(min_n) + trees_only <- grid_bst |> dplyr::distinct(trees) |> dplyr::arrange(trees) # regular grid sched_bst <- schedule_grid(grid_bst, wflow_bst) @@ -565,7 +565,7 @@ test_that("grid processing schedule - model only, submodels, regular grid", { expect_named(sched_bst$model_stage[[i]], c("trees", "min_n", "predict_stage")) expect_equal( - sched_bst$model_stage[[i]] %>% + sched_bst$model_stage[[i]] |> dplyr::select(-trees, -predict_stage), min_n_only ) @@ -575,17 +575,17 @@ test_that("grid processing schedule - model only, submodels, regular grid", { sched_bst$model_stage[[i]]$predict_stage[[j]], c("trees", "post_stage")) expect_equal( - sched_bst$model_stage[[i]]$predict_stage[[j]] %>% + sched_bst$model_stage[[i]]$predict_stage[[j]] |> dplyr::select(trees), trees_only ) } expect_equal( - sched_bst$model_stage[[i]] %>% - dplyr::select(-trees) %>% - tidyr::unnest(predict_stage) %>% - tidyr::unnest(post_stage) %>% + sched_bst$model_stage[[i]] |> + dplyr::select(-trees) |> + tidyr::unnest(predict_stage) |> + tidyr::unnest(post_stage) |> dplyr::select(trees, min_n), grid_bst ) @@ -599,7 +599,7 @@ test_that("grid processing schedule - model only, submodels, regular grid", { test_that("grid processing schedule - model only, submodels, SFD grid", { wflow_bst <- workflow(outcome ~ ., mod_tune) - grid_sfd_bst <- extract_parameter_set_dials(wflow_bst) %>% + grid_sfd_bst <- extract_parameter_set_dials(wflow_bst) |> dials::grid_space_filling(size = 5, type = "uniform") sched_sfd_bst <- schedule_grid(grid_sfd_bst, wflow_bst) @@ -611,24 +611,24 @@ test_that("grid processing schedule - model only, submodels, SFD grid", { expect_named(sched_sfd_bst$model_stage[[1]], c("trees", "min_n", "predict_stage")) expect_equal( - sched_sfd_bst$model_stage[[1]] %>% - dplyr::select(-predict_stage) %>% - dplyr::select(trees, min_n) %>% + sched_sfd_bst$model_stage[[1]] |> + dplyr::select(-predict_stage) |> + dplyr::select(trees, min_n) |> dplyr::arrange(trees, min_n), - grid_sfd_bst %>% - dplyr::select(trees, min_n) %>% + grid_sfd_bst |> + dplyr::select(trees, min_n) |> dplyr::arrange(trees, min_n) ) expect_equal( - sched_sfd_bst$model_stage[[1]] %>% - dplyr::select(-trees) %>% - tidyr::unnest(predict_stage) %>% - tidyr::unnest(post_stage) %>% - dplyr::select(trees, min_n) %>% + sched_sfd_bst$model_stage[[1]] |> + dplyr::select(-trees) |> + tidyr::unnest(predict_stage) |> + tidyr::unnest(post_stage) |> + dplyr::select(trees, min_n) |> dplyr::arrange(trees, min_n), - grid_sfd_bst %>% - dplyr::select(trees, min_n) %>% + grid_sfd_bst |> + dplyr::select(trees, min_n) |> dplyr::arrange(trees, min_n) ) @@ -655,8 +655,8 @@ test_that("grid processing schedule - model only, submodels, irregular design", expect_named(sched_odd_bst$model_stage[[1]], c("trees", "min_n", "predict_stage")) expect_equal( - sched_odd_bst$model_stage[[1]] %>% - dplyr::select(-predict_stage) %>% + sched_odd_bst$model_stage[[1]] |> + dplyr::select(-predict_stage) |> dplyr::select(trees, min_n), tibble::tibble(trees = c(2, 1, 2, 1, 2), min_n = c(1, 2, 3, 4, 5)) ) @@ -683,7 +683,7 @@ test_that("grid processing schedule - model only, submodels, 1 point design", { wflow_bst <- workflow(outcome ~ ., mod_tune) set.seed(1) - grid_1_pt <- extract_parameter_set_dials(wflow_bst) %>% + grid_1_pt <- extract_parameter_set_dials(wflow_bst) |> dials::grid_random(size = 1) sched_1_pt <- schedule_grid(grid_1_pt, wflow_bst) @@ -730,8 +730,8 @@ test_that("grid processing schedule - postprocessing only", { skip_if_not_installed("probably") wflow_thrsh <- workflow(outcome ~ ., parsnip::logistic_reg(), tlr_tune) - grid_thrsh <- extract_parameter_set_dials(wflow_thrsh) %>% - update(lower_limit = dials::lower_limit(c(0, 1))) %>% + grid_thrsh <- extract_parameter_set_dials(wflow_thrsh) |> + update(lower_limit = dials::lower_limit(c(0, 1))) |> dials::grid_regular(levels = 3) sched_thrsh <- schedule_grid(grid_thrsh, wflow_thrsh) @@ -765,23 +765,23 @@ test_that("grid processing schedule - recipe + postprocessing, regular grid", { wflow_pre_post <- workflow(rec_tune, parsnip::logistic_reg(), tlr_tune) grid_pre_post <- - extract_parameter_set_dials(wflow_pre_post) %>% - update(lower_limit = dials::lower_limit(c(0, 1))) %>% + extract_parameter_set_dials(wflow_pre_post) |> + update(lower_limit = dials::lower_limit(c(0, 1))) |> dials::grid_regular(levels = 3) grid_pre <- - grid_pre_post %>% + grid_pre_post |> distinct(threshold, disp_df) grid_post <- - grid_pre_post %>% - distinct(lower_limit) %>% + grid_pre_post |> + distinct(lower_limit) |> arrange(lower_limit) sched_pre_post <- schedule_grid(grid_pre_post, wflow_pre_post) expect_named(sched_pre_post, c("threshold", "disp_df", "model_stage")) expect_equal( - sched_pre_post %>% select(-model_stage) %>% tibble::as_tibble(), + sched_pre_post |> select(-model_stage) |> tibble::as_tibble(), grid_pre ) @@ -796,7 +796,7 @@ test_that("grid processing schedule - recipe + postprocessing, regular grid", { c("post_stage") ) expect_identical( - sched_pre_post$model_stage[[i]]$predict_stage[[1]]$post_stage[[1]] %>% + sched_pre_post$model_stage[[i]]$predict_stage[[1]]$post_stage[[1]] |> arrange(lower_limit), grid_post ) @@ -814,18 +814,18 @@ test_that("grid processing schedule - recipe + postprocessing, irregular grid", wflow_pre_post <- workflow(rec_tune, parsnip::logistic_reg(), tlr_tune) grid_pre_post <- - extract_parameter_set_dials(wflow_pre_post) %>% - update(lower_limit = dials::lower_limit(c(0, 1))) %>% - dials::grid_regular() %>% + extract_parameter_set_dials(wflow_pre_post) |> + update(lower_limit = dials::lower_limit(c(0, 1))) |> + dials::grid_regular() |> dplyr::slice(-c(1, 14)) grid_pre <- - grid_pre_post %>% + grid_pre_post |> distinct(threshold, disp_df) grids_post <- - grid_pre_post %>% - dplyr::group_nest(threshold, disp_df) %>% + grid_pre_post |> + dplyr::group_nest(threshold, disp_df) |> mutate(data = purrr::map(data, ~ arrange(.x, lower_limit))) @@ -833,7 +833,7 @@ test_that("grid processing schedule - recipe + postprocessing, irregular grid", expect_named(sched_pre_post, c("threshold", "disp_df", "model_stage")) expect_equal( - sched_pre_post %>% select(-model_stage) %>% tibble::as_tibble(), + sched_pre_post |> select(-model_stage) |> tibble::as_tibble(), grid_pre ) @@ -849,19 +849,19 @@ test_that("grid processing schedule - recipe + postprocessing, irregular grid", ) pre_grid_i <- - sched_pre_post %>% - slice(i) %>% + sched_pre_post |> + slice(i) |> select(threshold, disp_df) post_grid_i <- - pre_grid_i %>% - inner_join(grids_post, by = dplyr::join_by(threshold, disp_df)) %>% - purrr::pluck("data") %>% - purrr::pluck(1) %>% + pre_grid_i |> + inner_join(grids_post, by = dplyr::join_by(threshold, disp_df)) |> + purrr::pluck("data") |> + purrr::pluck(1) |> arrange(lower_limit) expect_identical( - sched_pre_post$model_stage[[i]]$predict_stage[[1]]$post_stage[[1]] %>% + sched_pre_post$model_stage[[i]]$predict_stage[[1]]$post_stage[[1]] |> arrange(lower_limit), post_grid_i ) @@ -878,16 +878,16 @@ test_that("grid processing schedule - recipe + model, no submodels, regular grid wflow_pre_model <- workflow(rec_tune, mod_tune_no_submodel) grid_pre_model <- - extract_parameter_set_dials(wflow_pre_model) %>% + extract_parameter_set_dials(wflow_pre_model) |> dials::grid_regular() grid_pre <- - grid_pre_model %>% + grid_pre_model |> distinct(threshold, disp_df) grid_model <- - grid_pre_model %>% - distinct(min_n) %>% + grid_pre_model |> + distinct(min_n) |> arrange(min_n) @@ -895,14 +895,14 @@ test_that("grid processing schedule - recipe + model, no submodels, regular grid expect_named(sched_pre_model, c("threshold", "disp_df", "model_stage")) expect_equal( - sched_pre_model %>% select(-model_stage) %>% tibble::as_tibble(), + sched_pre_model |> select(-model_stage) |> tibble::as_tibble(), grid_pre ) for (i in seq_along(sched_pre_model$model_stage)) { expect_named(sched_pre_model$model_stage[[i]], c("min_n", "predict_stage")) expect_equal( - sched_pre_model$model_stage[[i]] %>% select(min_n) %>% arrange(min_n), + sched_pre_model$model_stage[[i]] |> select(min_n) |> arrange(min_n), grid_model ) } @@ -934,22 +934,22 @@ test_that("grid processing schedule - recipe + model, submodels, irregular grid" wflow_pre_model <- workflow(rec_tune, mod_tune) grid_pre_model <- - extract_parameter_set_dials(wflow_pre_model) %>% - dials::grid_regular() %>% + extract_parameter_set_dials(wflow_pre_model) |> + dials::grid_regular() |> # This will make the submodel parameter (trees) unbalanced for some # combination of parameters of the other parameters. slice(-c(1, 2, 11)) grid_pre <- - grid_pre_model %>% + grid_pre_model |> distinct(threshold, disp_df) grid_model <- - grid_pre_model %>% - dplyr::group_nest(threshold, disp_df) %>% + grid_pre_model |> + dplyr::group_nest(threshold, disp_df) |> mutate( - data = purrr::map(data, ~ .x %>% dplyr::summarize(trees = max(trees), .by = c(min_n))), - data = purrr::map(data, ~ .x %>% arrange(min_n)) + data = purrr::map(data, ~ .x |> dplyr::summarize(trees = max(trees), .by = c(min_n))), + data = purrr::map(data, ~ .x |> arrange(min_n)) ) @@ -957,7 +957,7 @@ test_that("grid processing schedule - recipe + model, submodels, irregular grid" expect_named(sched_pre_model, c("threshold", "disp_df", "model_stage")) expect_equal( - sched_pre_model %>% select(-model_stage) %>% tibble::as_tibble(), + sched_pre_model |> select(-model_stage) |> tibble::as_tibble(), grid_pre ) @@ -965,7 +965,7 @@ test_that("grid processing schedule - recipe + model, submodels, irregular grid" model_i <- sched_pre_model$model_stage[[i]] expect_named(model_i, c("trees", "min_n", "predict_stage")) expect_equal( - model_i %>% select(min_n, trees) %>% arrange(min_n), + model_i |> select(min_n, trees) |> arrange(min_n), grid_model$data[[i]] ) @@ -977,24 +977,24 @@ test_that("grid processing schedule - recipe + model, submodels, irregular grid" # Get the settings that have already be resolved: other_ij <- - model_i %>% - select(-predict_stage, -trees) %>% - slice(j) %>% + model_i |> + select(-predict_stage, -trees) |> + slice(j) |> vctrs::vec_cbind( - sched_pre_model %>% - select(threshold, disp_df) %>% + sched_pre_model |> + select(threshold, disp_df) |> slice(i) ) # What are the matching values from the grid? trees_ij <- - grid_pre_model %>% - inner_join(other_ij, by = c("min_n", "threshold", "disp_df")) %>% + grid_pre_model |> + inner_join(other_ij, by = c("min_n", "threshold", "disp_df")) |> select(trees) expect_equal( - predict_j %>% select(trees) %>% arrange(trees), - trees_ij %>% arrange(trees) + predict_j |> select(trees) |> arrange(trees), + trees_ij |> arrange(trees) ) } @@ -1012,27 +1012,27 @@ test_that("grid processing schedule - recipe + model + tailor, submodels, irregu wflow_pre_model_post <- workflow(rec_tune, mod_tune, tlr_tune) grid_pre_model_post <- - extract_parameter_set_dials(wflow_pre_model_post) %>% - update(lower_limit = dials::lower_limit(c(0, 1))) %>% - dials::grid_regular() %>% + extract_parameter_set_dials(wflow_pre_model_post) |> + update(lower_limit = dials::lower_limit(c(0, 1))) |> + dials::grid_regular() |> # This will make the submodel parameter (trees) unbalanced for some # combination of parameters of the other parameters. slice(seq(1, 240, by = 7)) grid_pre <- - grid_pre_model_post %>% + grid_pre_model_post |> distinct(threshold, disp_df) grid_model <- - grid_pre_model_post %>% - select(-lower_limit) %>% - dplyr::group_nest(threshold, disp_df) %>% + grid_pre_model_post |> + select(-lower_limit) |> + dplyr::group_nest(threshold, disp_df) |> mutate( data = purrr::map( data, - ~ .x %>% dplyr::summarize(trees = max(trees), .by = c(min_n)) + ~ .x |> dplyr::summarize(trees = max(trees), .by = c(min_n)) ), - data = purrr::map(data, ~ .x %>% arrange(min_n)) + data = purrr::map(data, ~ .x |> arrange(min_n)) ) @@ -1044,7 +1044,7 @@ test_that("grid processing schedule - recipe + model + tailor, submodels, irregu expect_named(sched_pre_model_post, c("threshold", "disp_df", "model_stage")) expect_equal( - sched_pre_model_post %>% select(-model_stage) %>% tibble::as_tibble(), + sched_pre_model_post |> select(-model_stage) |> tibble::as_tibble(), grid_pre ) @@ -1053,23 +1053,23 @@ test_that("grid processing schedule - recipe + model + tailor, submodels, irregu # Get the current set of preproc parameters to remove other_i <- - sched_pre_model_post[i,] %>% + sched_pre_model_post[i,] |> dplyr::select(-model_stage) # We expect to evaluate these specific models for this set of preprocessors exp_i <- - grid_pre_model_post %>% - inner_join(other_i, by = c("threshold", "disp_df")) %>% - arrange(trees, min_n, lower_limit) %>% + grid_pre_model_post |> + inner_join(other_i, by = c("threshold", "disp_df")) |> + arrange(trees, min_n, lower_limit) |> select(trees, min_n, lower_limit) # What we will evaluate: subgrid_i <- - model_i %>% - select(-trees) %>% - unnest(predict_stage) %>% - unnest(post_stage) %>% - arrange(trees, min_n, lower_limit) %>% + model_i |> + select(-trees) |> + unnest(predict_stage) |> + unnest(post_stage) |> + arrange(trees, min_n, lower_limit) |> select(trees, min_n, lower_limit) expect_equal(subgrid_i, exp_i) @@ -1084,20 +1084,20 @@ test_that("grid processing schedule - recipe + model + tailor, submodels, irregu exp_post_grid <- # Condition on the current set of non-submodel or post param to see # what we should be evaluating: - model_ij %>% - dplyr::select(-trees) %>% - vctrs::vec_cbind(other_i) %>% + model_ij |> + dplyr::select(-trees) |> + vctrs::vec_cbind(other_i) |> dplyr::inner_join( grid_pre_model_post, by = c("threshold", "disp_df", "min_n") - ) %>% - dplyr::select(trees, lower_limit) %>% + ) |> + dplyr::select(trees, lower_limit) |> dplyr::arrange(trees, lower_limit) # Which as scheduled to be evaluated: subgrid_ij <- - predict_j %>% - unnest(post_stage) %>% + predict_j |> + unnest(post_stage) |> dplyr::arrange(trees, lower_limit) expect_equal(subgrid_ij, exp_post_grid) diff --git a/tests/testthat/test-select_best.R b/tests/testthat/test-select_best.R index 40d4f82ec..a69d3e16a 100644 --- a/tests/testthat/test-select_best.R +++ b/tests/testthat/test-select_best.R @@ -24,11 +24,11 @@ test_that("select_best()", { ) expect_equal( - select_best(rcv_results, metric = "rmse") %>% select(-.config), + select_best(rcv_results, metric = "rmse") |> select(-.config), best_rmse ) expect_equal( - select_best(rcv_results, metric = "rsq") %>% select(-.config), + select_best(rcv_results, metric = "rsq") |> select(-.config), best_rsq ) @@ -54,22 +54,22 @@ test_that("show_best()", { rcv_results <- readRDS(test_path("data", "rcv_results.rds")) rcv_rmse <- - rcv_results %>% - collect_metrics() %>% - dplyr::filter(.metric == "rmse") %>% + rcv_results |> + collect_metrics() |> + dplyr::filter(.metric == "rmse") |> dplyr::arrange(mean) expect_equal( show_best(rcv_results, metric = "rmse", n = 1), - rcv_rmse %>% slice(1) + rcv_rmse |> slice(1) ) expect_equal( show_best(rcv_results, metric = "rmse", n = nrow(rcv_rmse) + 1), rcv_rmse ) expect_equal( - show_best(rcv_results, metric = "rmse", n = 1) %>% names(), - rcv_rmse %>% names() + show_best(rcv_results, metric = "rmse", n = 1) |> names(), + rcv_rmse |> names() ) expect_snapshot({ best_default_metric <- show_best(rcv_results) @@ -210,53 +210,53 @@ test_that("select_by_* can handle metrics with direction == 'zero'", { metrics = yardstick::metric_set(yardstick::mpe, yardstick::msd) ) - tune_res_metrics <- tune_res %>% collect_metrics() + tune_res_metrics <- tune_res |> collect_metrics() expect_equal( select_best(tune_res, metric = "msd")$.config, - tune_res_metrics %>% - filter(.metric == "msd") %>% - arrange(abs(mean)) %>% - slice(1) %>% - select(.config) %>% + tune_res_metrics |> + filter(.metric == "msd") |> + arrange(abs(mean)) |> + slice(1) |> + select(.config) |> pull() ) expect_equal( select_best(tune_res, metric = "mpe")$.config, - tune_res_metrics %>% - filter(.metric == "mpe") %>% - arrange(abs(mean)) %>% - slice(1) %>% - select(.config) %>% + tune_res_metrics |> + filter(.metric == "mpe") |> + arrange(abs(mean)) |> + slice(1) |> + select(.config) |> pull() ) expect_equal( show_best(tune_res, metric = "msd", n = 5)$.config, - tune_res_metrics %>% - filter(.metric == "msd") %>% - arrange(abs(mean)) %>% - slice(1:5) %>% - select(.config) %>% + tune_res_metrics |> + filter(.metric == "msd") |> + arrange(abs(mean)) |> + slice(1:5) |> + select(.config) |> pull() ) expect_equal( show_best(tune_res, metric = "mpe", n = 5)$.config, - tune_res_metrics %>% - filter(.metric == "mpe") %>% - arrange(abs(mean)) %>% - slice(1:5) %>% - select(.config) %>% + tune_res_metrics |> + filter(.metric == "mpe") |> + arrange(abs(mean)) |> + slice(1:5) |> + select(.config) |> pull() ) # one std error, msd ---------- best <- - tune_res_metrics %>% - filter(.metric == "msd") %>% - arrange(min(abs(mean))) %>% + tune_res_metrics |> + filter(.metric == "msd") |> + arrange(min(abs(mean))) |> slice(1) bound_lower <- -abs(best$mean) - abs(best$std_err) @@ -264,10 +264,10 @@ test_that("select_by_* can handle metrics with direction == 'zero'", { expect_equal(bound_lower, -bound_upper) simplest_within_bound <- - tune_res_metrics %>% - filter(.metric == "msd") %>% - filter(abs(mean) < bound_upper) %>% - arrange(desc(neighbors)) %>% + tune_res_metrics |> + filter(.metric == "msd") |> + filter(abs(mean) < bound_upper) |> + arrange(desc(neighbors)) |> slice(1) expect_equal( @@ -277,9 +277,9 @@ test_that("select_by_* can handle metrics with direction == 'zero'", { # one std error, mpe ---------- best <- - tune_res_metrics %>% - filter(.metric == "mpe") %>% - arrange(min(abs(mean))) %>% + tune_res_metrics |> + filter(.metric == "mpe") |> + arrange(min(abs(mean))) |> slice(1) bound_lower <- -abs(best$mean) - abs(best$std_err) @@ -287,10 +287,10 @@ test_that("select_by_* can handle metrics with direction == 'zero'", { expect_equal(bound_lower, -bound_upper) simplest_within_bound <- - tune_res_metrics %>% - filter(.metric == "mpe") %>% - filter(abs(mean) < bound_upper) %>% - arrange(desc(neighbors)) %>% + tune_res_metrics |> + filter(.metric == "mpe") |> + filter(abs(mean) < bound_upper) |> + arrange(desc(neighbors)) |> slice(1) expect_equal( @@ -300,45 +300,45 @@ test_that("select_by_* can handle metrics with direction == 'zero'", { # pct loss, msd ---------- best <- - tune_res_metrics %>% - filter(.metric == "msd") %>% - arrange(abs(mean)) %>% + tune_res_metrics |> + filter(.metric == "msd") |> + arrange(abs(mean)) |> slice(1) expect_equal( select_by_pct_loss(tune_res, metric = "msd", limit = 10, desc(neighbors))$.config, - tune_res_metrics %>% - filter(.metric == "msd") %>% - dplyr::rowwise() %>% - mutate(loss = abs((abs(mean) - abs(best$mean)) / best$mean) * 100) %>% - ungroup() %>% - arrange(desc(neighbors)) %>% - slice(1:which(.config == best$.config)) %>% - filter(loss < 10) %>% - slice(1) %>% - select(.config) %>% + tune_res_metrics |> + filter(.metric == "msd") |> + dplyr::rowwise() |> + mutate(loss = abs((abs(mean) - abs(best$mean)) / best$mean) * 100) |> + ungroup() |> + arrange(desc(neighbors)) |> + slice(1:which(.config == best$.config)) |> + filter(loss < 10) |> + slice(1) |> + select(.config) |> pull() ) # pct loss, mpe ---------- best <- - tune_res_metrics %>% - filter(.metric == "mpe") %>% - arrange(abs(mean)) %>% + tune_res_metrics |> + filter(.metric == "mpe") |> + arrange(abs(mean)) |> slice(1) expect_equal( select_by_pct_loss(tune_res, metric = "mpe", limit = 10, desc(neighbors))$.config, - tune_res_metrics %>% - filter(.metric == "mpe") %>% - dplyr::rowwise() %>% - mutate(loss = abs((abs(mean) - abs(best$mean)) / best$mean) * 100) %>% - ungroup() %>% - arrange(desc(neighbors)) %>% - slice(1:which(.config == best$.config)) %>% - filter(loss < 10) %>% - slice(1) %>% - select(.config) %>% + tune_res_metrics |> + filter(.metric == "mpe") |> + dplyr::rowwise() |> + mutate(loss = abs((abs(mean) - abs(best$mean)) / best$mean) * 100) |> + ungroup() |> + arrange(desc(neighbors)) |> + slice(1:which(.config == best$.config)) |> + filter(loss < 10) |> + slice(1) |> + select(.config) |> pull() ) }) diff --git a/vignettes/extras/optimizations.Rmd b/vignettes/extras/optimizations.Rmd index ac6cb24b5..352a41b21 100644 --- a/vignettes/extras/optimizations.Rmd +++ b/vignettes/extras/optimizations.Rmd @@ -28,8 +28,8 @@ methods("multi_predict") # There are arguments for the parameter(s) that can create multiple predictions. # For xgboost, `trees` are cheap to evaluate: -parsnip:::multi_predict._xgb.Booster %>% - formals() %>% +parsnip:::multi_predict._xgb.Booster |> + formals() |> names() ``` @@ -44,13 +44,13 @@ For example, suppose that [Isomap multi-dimensional scaling](https://en.wikipedi ```{r isomap, results="hide"} data(Chicago) iso_rec <- - recipe(ridership ~ ., data = Chicago) %>% - step_dummy(all_nominal()) %>% + recipe(ridership ~ ., data = Chicago) |> + step_dummy(all_nominal()) |> step_isomap(all_predictors(), num_terms = tune()) knn_mod <- - nearest_neighbor(neighbors = tune(), weight_func = tune()) %>% - set_engine("kknn") %>% + nearest_neighbor(neighbors = tune(), weight_func = tune()) |> + set_engine("kknn") |> set_mode("regression") ``` @@ -58,8 +58,8 @@ With the following grid: ```{r} grid <- - parameters(num_terms(c(1, 9)), neighbors(), weight_func()) %>% - grid_regular(levels = c(5, 10, 7)) %>% + parameters(num_terms(c(1, 9)), neighbors(), weight_func()) |> + grid_regular(levels = c(5, 10, 7)) |> arrange(num_terms, neighbors, weight_func) grid ``` @@ -126,11 +126,11 @@ Some helpful advice to avoid errors in parallel processing is to not use variabl ```{r par-tip, eval = FALSE} num_pcs <- 3 -recipe(mpg ~ ., data = mtcars) %>% +recipe(mpg ~ ., data = mtcars) |> # Bad since num_pcs might not be found by a worker process step_pca(all_predictors(), num_comp = num_pcs) -recipe(mpg ~ ., data = mtcars) %>% +recipe(mpg ~ ., data = mtcars) |> # Good since the value is injected into the object step_pca(all_predictors(), num_comp = !!num_pcs) ``` diff --git a/vignettes/tune.Rmd b/vignettes/tune.Rmd index 9ded2ed49..76b137e8d 100644 --- a/vignettes/tune.Rmd +++ b/vignettes/tune.Rmd @@ -43,8 +43,8 @@ library(tidymodels) data(ames) set.seed(4595) -data_split <- ames %>% - mutate(Sale_Price = log10(Sale_Price)) %>% +data_split <- ames |> + mutate(Sale_Price = log10(Sale_Price)) |> initial_split(strata = Sale_Price) ames_train <- training(data_split) ames_test <- testing(data_split) @@ -53,10 +53,10 @@ ames_test <- testing(data_split) For simplicity, the sale price of a house will be modeled as a function of its geo-location. These predictors appear to have nonlinear relationships with the outcome: ```{r geo-plots, fig.alt = "A ggplot2 scatterplot. x axes plot the latitude and longitude, in side-by-side facets, and the log sale price is on the y axis. The clouds of points follow highly non-linear trends, traced by a blue trend line."} -ames_train %>% - dplyr::select(Sale_Price, Longitude, Latitude) %>% +ames_train |> + dplyr::select(Sale_Price, Longitude, Latitude) |> tidyr::pivot_longer(cols = c(Longitude, Latitude), - names_to = "predictor", values_to = "value") %>% + names_to = "predictor", values_to = "value") |> ggplot(aes(x = value, Sale_Price)) + geom_point(alpha = .2) + geom_smooth(se = FALSE) + @@ -69,8 +69,8 @@ We can tag these parameters for optimization using the `tune()` function: ```{r tag-rec} ames_rec <- - recipe(Sale_Price ~ Gr_Liv_Area + Longitude + Latitude, data = ames_train) %>% - step_log(Gr_Liv_Area, base = 10) %>% + recipe(Sale_Price ~ Gr_Liv_Area + Longitude + Latitude, data = ames_train) |> + step_log(Gr_Liv_Area, base = 10) |> step_spline_natural(Longitude, Latitude, deg_free = tune()) ``` @@ -84,9 +84,9 @@ To accomplish this, individual `step_spline_natural()` terms can be added to the ```{r tag-rec-d} ames_rec <- - recipe(Sale_Price ~ Gr_Liv_Area + Longitude + Latitude, data = ames_train) %>% - step_log(Gr_Liv_Area, base = 10) %>% - step_spline_natural(Longitude, deg_free = tune("long df")) %>% + recipe(Sale_Price ~ Gr_Liv_Area + Longitude + Latitude, data = ames_train) |> + step_log(Gr_Liv_Area, base = 10) |> + step_spline_natural(Longitude, deg_free = tune("long df")) |> step_spline_natural(Latitude, deg_free = tune("lat df")) ``` @@ -112,8 +112,8 @@ The parameter objects can be easily changed using the `update()` function: ```{r updated} ames_param <- - ames_rec %>% - extract_parameter_set_dials() %>% + ames_rec |> + extract_parameter_set_dials() |> update( `long df` = spline_degree(), `lat df` = spline_degree() @@ -153,7 +153,7 @@ There are two other ingredients that are required before tuning. First is a model specification. Using functions in parsnip, a basic linear model can be used: ```{r mod} -lm_mod <- linear_reg() %>% set_engine("lm") +lm_mod <- linear_reg() |> set_engine("lm") ``` No tuning parameters here. @@ -196,8 +196,8 @@ The values in the `mean` column are the averages of the `r nrow(cv_splits)` resa ```{r best-rmse} rmse_vals <- - estimates %>% - dplyr::filter(.metric == "rmse") %>% + estimates |> + dplyr::filter(.metric == "rmse") |> arrange(mean) rmse_vals ``` @@ -213,10 +213,10 @@ Interestingly, latitude does _not_ do well with degrees of freedom less than 8. Let's plot these spline functions over the data for both good and bad values of `deg_free`: ```{r final-vals, fig.alt = "A scatterplot much like the first one, except that a smoother, red line, representing a spline term with fewer degrees of freedom, is also plotted. The red line is much smoother but accounts for the less of the variation shown."} -ames_train %>% - dplyr::select(Sale_Price, Longitude, Latitude) %>% +ames_train |> + dplyr::select(Sale_Price, Longitude, Latitude) |> tidyr::pivot_longer(cols = c(Longitude, Latitude), - names_to = "predictor", values_to = "value") %>% + names_to = "predictor", values_to = "value") |> ggplot(aes(x = value, Sale_Price)) + geom_point(alpha = .2) + geom_smooth(se = FALSE, method = lm, formula = y ~ splines::ns(x, df = 3), col = "red") + @@ -236,8 +236,8 @@ Instead of a linear regression, a nonlinear model might provide good performance ```{r knn} # requires the kknn package knn_mod <- - nearest_neighbor(neighbors = tune(), weight_func = tune()) %>% - set_engine("kknn") %>% + nearest_neighbor(neighbors = tune(), weight_func = tune()) |> + set_engine("kknn") |> set_mode("regression") ``` @@ -246,8 +246,8 @@ The easiest approach to optimize the pre-processing and model parameters is to b ```{r knn-wflow} library(workflows) knn_wflow <- - workflow() %>% - add_model(knn_mod) %>% + workflow() |> + add_model(knn_mod) |> add_recipe(ames_rec) ``` @@ -255,8 +255,8 @@ From this, the parameter set can be used to modify the range and values of param ```{r knn-set} knn_param <- - knn_wflow %>% - extract_parameter_set_dials() %>% + knn_wflow |> + extract_parameter_set_dials() |> update( `long df` = spline_degree(c(2, 18)), `lat df` = spline_degree(c(2, 18)), @@ -295,8 +295,8 @@ autoplot(knn_search, type = "performance", metric = "rmse") The best results here were: ```{r bo-best} -collect_metrics(knn_search) %>% - dplyr::filter(.metric == "rmse") %>% +collect_metrics(knn_search) |> + dplyr::filter(.metric == "rmse") |> arrange(mean) ```