diff --git a/DESCRIPTION b/DESCRIPTION index 2ae69d6b1..d6fd03340 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: parsnip Title: A Common API to Modeling and Analysis Functions -Version: 1.2.1.9002 +Version: 1.2.1.9003 Authors@R: c( person("Max", "Kuhn", , "max@posit.co", role = c("aut", "cre")), person("Davis", "Vaughan", , "davis@posit.co", role = "aut"), @@ -25,7 +25,7 @@ Imports: ggplot2, globals, glue, - hardhat (>= 1.4.0), + hardhat (>= 1.4.0.9002), lifecycle, magrittr, pillar, @@ -40,8 +40,8 @@ Imports: vctrs (>= 0.6.0), withr Suggests: - C50, bench, + C50, covr, dials (>= 1.1.0), earth, @@ -69,16 +69,17 @@ Suggests: xgboost (>= 1.5.0.1) VignetteBuilder: knitr +Remotes: + r-lib/sparsevctrs, + tidymodels/hardhat ByteCompile: true Config/Needs/website: C50, dbarts, earth, glmnet, keras, kernlab, kknn, - LiblineaR, mgcv, nnet, parsnip, randomForest, ranger, rpart, rstanarm, - tidymodels/tidymodels, tidyverse/tidytemplate, rstudio/reticulate, + LiblineaR, mgcv, nnet, parsnip, quantreg, randomForest, ranger, rpart, + rstanarm, tidymodels/tidymodels, tidyverse/tidytemplate, rstudio/reticulate, xgboost Config/rcmdcheck/ignore-inconsequential-notes: true Config/testthat/edition: 3 Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -Remotes: - r-lib/sparsevctrs RoxygenNote: 7.3.2 diff --git a/NAMESPACE b/NAMESPACE index d48f33586..64a8f5365 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -264,6 +264,7 @@ export(make_classes) export(make_engine_list) export(make_seealso_list) export(mars) +export(matrix_to_quantile_pred) export(max_mtry_formula) export(maybe_data_frame) export(maybe_matrix) diff --git a/NEWS.md b/NEWS.md index 8c470547f..ab97048ca 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,25 +1,41 @@ # parsnip (development version) +## New Features + +* A new model mode (`"quantile regression"`) was added. Including: + * A `linear_reg()` engine for `"quantreg"`. + * Predictions are encoded via a custom vector type. See [hardhat::quantile_pred()]. + * Predicted quantile levels are designated when the new mode is specified. See `?set_mode`. + * `fit_xy()` can now take dgCMatrix input for `x` argument (#1121). * `fit_xy()` can now take sparse tibbles as data values (#1165). * `predict()` can now take dgCMatrix and sparse tibble input for `new_data` argument, and error informatively when model doesn't support it (#1167). -* Transitioned package errors and warnings to use cli (#1147 and #1148 by - @shum461, #1153 by @RobLBaker and @wright13, #1154 by @JamesHWade, #1160, - #1161, #1081). +* New `extract_fit_time()` method has been added that returns the time it took to train the model (#853). + +## Other Changes + +* Transitioned package errors and warnings to use cli (#1147 and #1148 by @shum461, #1153 by @RobLBaker and @wright13, #1154 by @JamesHWade, #1160, #1161, #1081). * `fit_xy()` currently raises an error for `gen_additive_mod()` model specifications as the default engine (`"mgcv"`) specifies smoothing terms in model formulas. However, some engines specify smooths via additional arguments, in which case the restriction on `fit_xy()` is excessive. parsnip will now only raise an error when fitting a `gen_additive_mod()` with `fit_xy()` when using the `"mgcv"` engine (#775). * Aligned `null_model()` with other model types; the model type now has an engine argument that defaults to `"parsnip"` and is checked with the same machinery that checks other model types in the package (#1083). -* New `extract_fit_time()` method has been added that returns the time it took to train the model (#853). +## Bug Fixes * Ensure that `knit_engine_docs()` has the required packages installed (#1156). * Fixed bug where some models fit using `fit_xy()` couldn't predict (#1166). +## Breaking Change + +* For quantile prediction, the `quantile` argument to `predict()` has been deprecate in facor of `quantile_levels`. This does not affect models with mode `"quantile regression"`. + +* The quantile regression prediction type was disabled for the deprecated `surv_reg()` model. + + # parsnip 1.2.1 * Added a missing `tidy()` method for survival analysis glmnet models (#1086). diff --git a/R/aaa-import-standalone-types-check.R b/R/aaa-import-standalone-types-check.R index 6782d69b1..22ea57ba8 100644 --- a/R/aaa-import-standalone-types-check.R +++ b/R/aaa-import-standalone-types-check.R @@ -1,7 +1,3 @@ -# Standalone file: do not edit by hand -# Source: -# ---------------------------------------------------------------------- -# # --- # repo: r-lib/rlang # file: standalone-types-check.R @@ -13,6 +9,9 @@ # # ## Changelog # +# 2024-08-15: +# - `check_character()` gains an `allow_na` argument (@martaalcalde, #1724) +# # 2023-03-13: # - Improved error messages of number checkers (@teunbrand) # - Added `allow_infinite` argument to `check_number_whole()` (@mgirlich). @@ -461,15 +460,28 @@ check_formula <- function(x, # Vectors ----------------------------------------------------------------- +# TODO: Figure out what to do with logical `NA` and `allow_na = TRUE` + check_character <- function(x, ..., + allow_na = TRUE, allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { + if (!missing(x)) { if (is_character(x)) { + if (!allow_na && any(is.na(x))) { + abort( + sprintf("`%s` can't contain NA values.", arg), + arg = arg, + call = call + ) + } + return(invisible(NULL)) } + if (allow_null && is_null(x)) { return(invisible(NULL)) } @@ -479,7 +491,6 @@ check_character <- function(x, x, "a character vector", ..., - allow_na = FALSE, allow_null = allow_null, arg = arg, call = call diff --git a/R/aaa_models.R b/R/aaa_models.R index 1ebe8b4ef..e75080d15 100644 --- a/R/aaa_models.R +++ b/R/aaa_models.R @@ -1,6 +1,6 @@ # Initialize model environments -all_modes <- c("classification", "regression", "censored regression") +all_modes <- c("classification", "regression", "censored regression", "quantile regression") # ------------------------------------------------------------------------------ @@ -195,8 +195,8 @@ stop_missing_engine <- function(cls, call) { } check_mode_for_new_engine <- function(cls, eng, mode, call = caller_env()) { - all_modes <- get_from_env(paste0(cls, "_modes")) - if (!(mode %in% all_modes)) { + model_modes <- get_from_env(paste0(cls, "_modes")) + if (!(mode %in% model_modes)) { cli::cli_abort( "{.val {mode}} is not a known mode for model {.fn {cls}}.", call = call diff --git a/R/aaa_quantiles.R b/R/aaa_quantiles.R new file mode 100644 index 000000000..45a1942ed --- /dev/null +++ b/R/aaa_quantiles.R @@ -0,0 +1,17 @@ +#' Reformat quantile predictions +#' +#' @param x A matrix of predictions with rows as samples and columns as quantile +#' levels. +#' @param object A parsnip `model_fit` object from a quantile regression model. +#' @keywords internal +#' @export +matrix_to_quantile_pred <- function(x, object) { + if (!is.matrix(x)) { + x <- as.matrix(x) + } + rownames(x) <- NULL + n_pred_quantiles <- ncol(x) + quantile_levels <- object$spec$quantile_levels + + tibble::new_tibble(x = list(.pred_quantile = hardhat::quantile_pred(x, quantile_levels))) +} diff --git a/R/arguments.R b/R/arguments.R index f3d3198e4..156916391 100644 --- a/R/arguments.R +++ b/R/arguments.R @@ -49,6 +49,8 @@ check_eng_args <- function(args, obj, core_args) { #' set_args(mtry = 3, importance = TRUE) %>% #' set_mode("regression") #' +#' linear_reg() %>% +#' set_mode("quantile regression", quantile_levels = c(0.2, 0.5, 0.8)) #' @export set_args <- function(object, ...) { UseMethod("set_args") @@ -89,12 +91,18 @@ set_args.default <- function(object,...) { #' @rdname set_args #' @export -set_mode <- function(object, mode) { +set_mode <- function(object, mode, ...) { UseMethod("set_mode") } +#' @rdname set_args +#' @param quantile_levels A vector of values between zero and one (only for the +#' `"quantile regression"` mode); otherwise, it is `NULL`. The model uses these +#' values to appropriately train quantile regression models to make predictions +#' for these values (e.g., `quantile_levels = 0.5` is the median). #' @export -set_mode.model_spec <- function(object, mode) { +set_mode.model_spec <- function(object, mode, quantile_levels = NULL, ...) { + check_dots_empty() cls <- class(object)[1] if (rlang::is_missing(mode)) { spec_modes <- rlang::env_get(get_model_env(), paste0(cls, "_modes")) @@ -111,11 +119,21 @@ set_mode.model_spec <- function(object, mode) { object$mode <- mode object$user_specified_mode <- TRUE + if (mode == "quantile regression") { + hardhat::check_quantile_levels(quantile_levels) + } else { + if (!is.null(quantile_levels)) { + cli::cli_warn("{.arg quantile_levels} is only used when the mode is + {.val quantile regression}.") + } + } + + object$quantile_levels <- quantile_levels object } #' @export -set_mode.default <- function(object, mode) { +set_mode.default <- function(object, mode, ...) { error_set_object(object, func = "set_mode") invisible(FALSE) diff --git a/R/fit.R b/R/fit.R index 6f67f2cae..1b45a4519 100644 --- a/R/fit.R +++ b/R/fit.R @@ -176,6 +176,10 @@ fit.model_spec <- eval_env$formula <- formula eval_env$weights <- wts + if (!is.null(object$quantile_levels)) { + eval_env$quantile_levels <- object$quantile_levels + } + data <- materialize_sparse_tibble(data, object, "data") fit_interface <- @@ -187,7 +191,6 @@ fit.model_spec <- with a spark data object." ) - # populate `method` with the details for this model type object <- add_methods(object, engine = object$engine) @@ -295,6 +298,10 @@ fit_xy.model_spec <- eval_env$y_var <- y_var eval_env$weights <- weights_to_numeric(case_weights, object) + if (!is.null(object$quantile_levels)) { + eval_env$quantile_levels <- object$quantile_levels + } + # TODO case weights: pass in eval_env not individual elements fit_interface <- check_xy_interface(eval_env$x, eval_env$y, cl, object) diff --git a/R/install_packages.R b/R/install_packages.R index fdd682634..89f409ad6 100644 --- a/R/install_packages.R +++ b/R/install_packages.R @@ -26,8 +26,8 @@ install_engine_packages <- function(extension = TRUE, extras = TRUE, } if (extras) { - rmd_pkgs <- c("tidymodels", "broom.mixed", "glmnet", "Cubist", "xrf", "ape", - "rmarkdown") + rmd_pkgs <- c("ape", "broom.mixed", "Cubist", "glmnet", "quantreg", + "rmarkdown", "tidymodels", "xrf") engine_packages <- unique(c(engine_packages, rmd_pkgs)) } diff --git a/R/linear_reg_data.R b/R/linear_reg_data.R index bdf6a3753..c24f07c9e 100644 --- a/R/linear_reg_data.R +++ b/R/linear_reg_data.R @@ -1,6 +1,7 @@ set_new_model("linear_reg") set_model_mode("linear_reg", "regression") +set_model_mode("linear_reg", "quantile regression") # ------------------------------------------------------------------------------ @@ -582,3 +583,48 @@ set_pred( ) ) +# ------------------------------------------------------------------------------ + +set_model_engine(model = "linear_reg", mode = "quantile regression", eng = "quantreg") +set_dependency(model = "linear_reg", eng = "quantreg", pkg = "quantreg", mode = "quantile regression") + +set_fit( + model = "linear_reg", + eng = "quantreg", + mode = "quantile regression", + value = list( + interface = "formula", + protect = c("formula", "data", "weights"), + func = c(pkg = "quantreg", fun = "rq"), + defaults = list(tau = expr(quantile_levels)) + ) +) + +set_encoding( + model = "linear_reg", + eng = "quantreg", + mode = "quantile regression", + options = list( + predictor_indicators = "traditional", + compute_intercept = TRUE, + remove_intercept = TRUE, + allow_sparse_x = FALSE + ) +) + +set_pred( + model = "linear_reg", + eng = "quantreg", + mode = "quantile regression", + type = "quantile", + value = list( + pre = NULL, + post = matrix_to_quantile_pred, + func = c(fun = "predict"), + args = + list( + object = expr(object$fit), + newdata = expr(new_data) + ) + ) +) diff --git a/R/linear_reg_quantreg.R b/R/linear_reg_quantreg.R new file mode 100644 index 000000000..d8c0824c9 --- /dev/null +++ b/R/linear_reg_quantreg.R @@ -0,0 +1,11 @@ +#' Linear quantile regression via the quantreg package +#' +#' [quantreg::rq()] optimizes quantile loss to fit models with numeric outcomes. +#' +#' @includeRmd man/rmd/linear_reg_quantreg.md details +#' +#' @name details_linear_reg_quantreg +#' @keywords internal +NULL + +# See inst/README-DOCS.md for a description of how these files are processed diff --git a/R/predict.R b/R/predict.R index 16a8cb536..b3b451517 100644 --- a/R/predict.R +++ b/R/predict.R @@ -201,12 +201,14 @@ check_pred_type <- function(object, type, ..., call = rlang::caller_env()) { regression = "numeric", classification = "class", "censored regression" = "time", + "quantile regression" = "quantile", cli::cli_abort( - "{.arg type} should be 'regression', 'censored regression', or 'classification'.", + "{.arg type} should be one of {.or {.val {all_modes}}}.", call = call ) ) } + if (!(type %in% pred_types)) cli::cli_abort( "{.arg type} should be one of {.or {.arg {pred_types}}}.", @@ -373,7 +375,7 @@ check_pred_type_dots <- function(object, type, ..., call = rlang::caller_env()) # ---------------------------------------------------------------------------- - other_args <- c("interval", "level", "std_error", "quantile", + other_args <- c("interval", "level", "std_error", "quantile_levels", "time", "eval_time", "increasing") eval_time_types <- c("survival", "hazard") diff --git a/R/predict_quantile.R b/R/predict_quantile.R index b6b576316..6a8b5060b 100644 --- a/R/predict_quantile.R +++ b/R/predict_quantile.R @@ -1,48 +1,76 @@ #' @keywords internal #' @rdname other_predict -#' @param quantile A vector of numbers between 0 and 1 for the quantile being -#' predicted. +#' @param quantile,quantile_levels A vector of values between 0 and 1 for the +#' quantile to be predicted. If the model has a `"quantile regression"` mode, +#' this value should be `NULL`. For other modes, the default is `(1:9)/10`. +#' Note that, as of version 1.3.0 of parsnip, the `quantile` is deprecated. Use +#' `quantile_levels` instead. #' @inheritParams predict.model_fit #' @method predict_quantile model_fit #' @export predict_quantile.model_fit #' @export predict_quantile.model_fit <- function(object, new_data, - quantile = (1:9)/10, + quantile_levels = NULL, + quantile = deprecated(), interval = "none", level = 0.95, ...) { + check_dots_empty() + check_spec_pred_type(object, "quantile") - check_spec_pred_type(object, "quantile") - - if (inherits(object$fit, "try-error")) { - cli::cli_warn("Model fit failed; cannot make predictions.") - return(NULL) - } + if (lifecycle::is_present(quantile)) { + lifecycle::deprecate_warn( + "1.3.0", + "predict_quantile(quantile)", + "predict_quantile(quantile_levels)" + ) + quantile_levels <- quantile + } - new_data <- prepare_data(object, new_data) - # preprocess data - if (!is.null(object$spec$method$pred$quantile$pre)) - new_data <- object$spec$method$pred$quantile$pre(new_data, object) + if (inherits(object$fit, "try-error")) { + cli::cli_warn("Model fit failed; cannot make predictions.") + return(NULL) + } + if (object$spec$mode == "quantile regression") { + if (!is.null(quantile_levels)) { + cli::cli_abort("When the mode is {.val quantile regression}, + {.arg quantile_levels} are specified by {.fn set_mode}.") + } + } else { + if (is.null(quantile_levels)) { + quantile_levels <- (1:9)/10 + } + hardhat::check_quantile_levels(quantile_levels) # Pass some extra arguments to be used in post-processor - object$spec$method$pred$quantile$args$p <- quantile - pred_call <- make_pred_call(object$spec$method$pred$quantile) + object$spec$quantile_levels <- quantile_levels + } - res <- eval_tidy(pred_call) + new_data <- prepare_data(object, new_data) - # post-process the predictions - if(!is.null(object$spec$method$pred$quantile$post)) { - res <- object$spec$method$pred$quantile$post(res, object) - } + # preprocess data + if (!is.null(object$spec$method$pred$quantile$pre)) { + new_data <- object$spec$method$pred$quantile$pre(new_data, object) + } - res + pred_call <- make_pred_call(object$spec$method$pred$quantile) + + res <- eval_tidy(pred_call) + + # post-process the predictions + if(!is.null(object$spec$method$pred$quantile$post)) { + res <- object$spec$method$pred$quantile$post(res, object) } + res +} + # @export # @keywords internal # @rdname other_predict # @inheritParams predict.model_fit -predict_quantile <- function (object, ...) +predict_quantile <- function (object, ...) { UseMethod("predict_quantile") +} diff --git a/R/print.R b/R/print.R index 1e6e9fe05..d03dcfcdb 100644 --- a/R/print.R +++ b/R/print.R @@ -23,6 +23,10 @@ print_model_spec <- function(x, cls = class(x)[1], desc = get_model_desc(cls), . print(show_call(x)) } + if (x$mode == "quantile regression") { + cli::cli_inform("Quantile levels: {x$quantile_levels}.") + } + invisible(x) } diff --git a/R/surv_reg_data.R b/R/surv_reg_data.R index 9313ede22..a37dc50bd 100644 --- a/R/surv_reg_data.R +++ b/R/surv_reg_data.R @@ -59,25 +59,6 @@ set_pred( ) ) -set_pred( - model = "surv_reg", - eng = "flexsurv", - mode = "regression", - type = "quantile", - value = list( - pre = NULL, - post = flexsurv_quant, - func = c(fun = "summary"), - args = - list( - object = expr(object$fit), - newdata = expr(new_data), - type = "quantile", - quantiles = expr(quantile) - ) - ) -) - # ------------------------------------------------------------------------------ set_model_engine("surv_reg", mode = "regression", eng = "survival") @@ -133,22 +114,3 @@ set_pred( ) ) ) - -set_pred( - model = "surv_reg", - eng = "survival", - mode = "regression", - type = "quantile", - value = list( - pre = NULL, - post = survreg_quant, - func = c(fun = "predict"), - args = - list( - object = expr(object$fit), - newdata = expr(new_data), - type = "quantile", - p = expr(quantile) - ) - ) -) diff --git a/_pkgdown.yml b/_pkgdown.yml index e6868a7a3..c79ecca06 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -78,6 +78,7 @@ reference: - fit_xy - control_parsnip - glance.model_fit + - matrix_to_quantile_pred - model_fit - model_formula - model_spec diff --git a/inst/models.tsv b/inst/models.tsv index e77757b0b..1ef2a8505 100644 --- a/inst/models.tsv +++ b/inst/models.tsv @@ -55,6 +55,7 @@ "linear_reg" "regression" "lm" NA "linear_reg" "regression" "lme" "multilevelmod" "linear_reg" "regression" "lmer" "multilevelmod" +"linear_reg" "quantile regression" "quantreg" NA "linear_reg" "regression" "spark" NA "linear_reg" "regression" "stan" NA "linear_reg" "regression" "stan_glmer" "multilevelmod" diff --git a/man/details_linear_reg_quantreg.Rd b/man/details_linear_reg_quantreg.Rd new file mode 100644 index 000000000..1256bada8 --- /dev/null +++ b/man/details_linear_reg_quantreg.Rd @@ -0,0 +1,173 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/linear_reg_quantreg.R +\name{details_linear_reg_quantreg} +\alias{details_linear_reg_quantreg} +\title{Linear quantile regression via the quantreg package} +\description{ +\code{\link[quantreg:rq]{quantreg::rq()}} optimizes quantile loss to fit models with numeric outcomes. +} +\details{ +For this engine, there is a single mode: quantile regression + +This model has the same structure as the model fit by \code{lm()}, but +instead of optimizing the sum of squared errors, it optimizes “quantile +loss” in order to produce better estimates of the predictive +distribution. +\subsection{Tuning Parameters}{ + +This engine has no tuning parameters. +} + +\subsection{Translation from parsnip to the original package}{ + +This model only works with the \code{"quantile regression"} model and +requires users to specify which areas of the distribution to predict via +the \code{quantile_levels} argument. For example: + +\if{html}{\out{
}}\preformatted{linear_reg() \%>\% + set_engine("quantreg") \%>\% + set_mode("quantile regression", quantile_levels = (1:3) / 4) \%>\% + translate() +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## Linear Regression Model Specification (quantile regression) +## +## Computational engine: quantreg +## +## Model fit template: +## quantreg::rq(formula = missing_arg(), data = missing_arg(), weights = missing_arg(), +## tau = quantile_levels) + +## Quantile levels: 0.25, 0.5, and 0.75. +}\if{html}{\out{
}} +} + +\subsection{Output format}{ + +When multiple quantile levels are predicted, there are multiple +predicted values for each row of new data. The \code{predict()} method for +this mode produces a column named \code{.pred_quantile} that has a special +class of \code{"quantile_pred"}, and it contains the predictions for each +row. + +For example: + +\if{html}{\out{
}}\preformatted{library(modeldata) +rlang::check_installed("quantreg") + +n <- nrow(Chicago) +Chicago <- Chicago \%>\% select(ridership, Clark_Lake) + +Chicago_train <- Chicago[1:(n - 7), ] +Chicago_test <- Chicago[(n - 6):n, ] + +qr_fit <- + linear_reg() \%>\% + set_engine("quantreg") \%>\% + set_mode("quantile regression", quantile_levels = (1:3) / 4) \%>\% + fit(ridership ~ Clark_Lake, data = Chicago_train) +qr_fit +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## parsnip model object +## +## Call: +## quantreg::rq(formula = ridership ~ Clark_Lake, tau = quantile_levels, +## data = data) +## +## Coefficients: +## tau= 0.25 tau= 0.50 tau= 0.75 +## (Intercept) -0.2064189 0.2051549 0.8112286 +## Clark_Lake 0.9820582 0.9862306 0.9777820 +## +## Degrees of freedom: 5691 total; 5689 residual +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{qr_pred <- predict(qr_fit, Chicago_test) +qr_pred +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## # A tibble: 7 x 1 +## .pred_quantile +## +## 1 [21.1] +## 2 [21.4] +## 3 [21.7] +## 4 [21.4] +## 5 [19.5] +## 6 [6.88] +## # i 1 more row +}\if{html}{\out{
}} + +We can unnest these values and/or convert them to a rectangular format: + +\if{html}{\out{
}}\preformatted{as_tibble(qr_pred$.pred_quantile) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## # A tibble: 21 x 3 +## .pred_quantile .quantile_levels .row +## +## 1 20.6 0.25 1 +## 2 21.1 0.5 1 +## 3 21.5 0.75 1 +## 4 20.9 0.25 2 +## 5 21.4 0.5 2 +## 6 21.8 0.75 2 +## # i 15 more rows +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{as.matrix(qr_pred$.pred_quantile) +}\if{html}{\out{
}} + +\if{html}{\out{
}}\preformatted{## [,1] [,2] [,3] +## [1,] 20.590627 21.090561 21.517717 +## [2,] 20.863639 21.364733 21.789541 +## [3,] 21.190665 21.693148 22.115142 +## [4,] 20.879352 21.380513 21.805185 +## [5,] 19.047814 19.541193 19.981622 +## [6,] 6.435241 6.875033 7.423968 +## [7,] 6.062058 6.500265 7.052411 +}\if{html}{\out{
}} +} + +\subsection{Preprocessing requirements}{ + +Factor/categorical predictors need to be converted to numeric values +(e.g., dummy or indicator variables) for this engine. When using the +formula method via \code{\link[=fit.model_spec]{fit()}}, parsnip will +convert factor columns to indicators. +} + +\subsection{Case weights}{ + +This model can utilize case weights during model fitting. To use them, +see the documentation in \link{case_weights} and the examples +on \code{tidymodels.org}. + +The \code{fit()} and \code{fit_xy()} arguments have arguments called +\code{case_weights} that expect vectors of case weights. +} + +\subsection{Saving fitted model objects}{ + +This model object contains data that are not required to make +predictions. When saving the model for the purpose of prediction, the +size of the saved object might be substantially reduced by using +functions from the \href{https://butcher.tidymodels.org}{butcher} package. +} + +\subsection{Examples}{ + +The “Fitting and Predicting with parsnip” article contains +\href{https://parsnip.tidymodels.org/articles/articles/Examples.html#linear-reg-quantreg}{examples} +for \code{linear_reg()} with the \code{"quantreg"} engine. +} + +\subsection{References}{ +\itemize{ +\item Waldmann, E. (2018). Quantile regression: a short story on how and +why. \emph{Statistical Modelling}, 18(3-4), 203-218. +} +} +} +\keyword{internal} diff --git a/man/matrix_to_quantile_pred.Rd b/man/matrix_to_quantile_pred.Rd new file mode 100644 index 000000000..2723cd43e --- /dev/null +++ b/man/matrix_to_quantile_pred.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/aaa_quantiles.R +\name{matrix_to_quantile_pred} +\alias{matrix_to_quantile_pred} +\title{Reformat quantile predictions} +\usage{ +matrix_to_quantile_pred(x, object) +} +\arguments{ +\item{x}{A matrix of predictions with rows as samples and columns as quantile +levels.} + +\item{object}{A parsnip \code{model_fit} object from a quantile regression model.} +} +\description{ +Reformat quantile predictions +} +\keyword{internal} diff --git a/man/other_predict.Rd b/man/other_predict.Rd index efe0fbdb0..67b9de179 100644 --- a/man/other_predict.Rd +++ b/man/other_predict.Rd @@ -49,7 +49,8 @@ predict_numeric(object, ...) \method{predict_quantile}{model_fit}( object, new_data, - quantile = (1:9)/10, + quantile_levels = NULL, + quantile = deprecated(), interval = "none", level = 0.95, ... @@ -103,8 +104,11 @@ interval estimates.} \item{std_error}{A single logical for whether the standard error should be returned (assuming that the model can compute it).} -\item{quantile}{A vector of numbers between 0 and 1 for the quantile being -predicted.} +\item{quantile, quantile_levels}{A vector of values between 0 and 1 for the +quantile to be predicted. If the model has a \code{"quantile regression"} mode, +this value should be \code{NULL}. For other modes, the default is \code{(1:9)/10}. +Note that, as of version 1.3.0 of parsnip, the \code{quantile} is deprecated. Use +\code{quantile_levels} instead.} } \description{ These are internal functions not meant to be directly called by the user. diff --git a/man/rmd/linear_reg_lm.Rmd b/man/rmd/linear_reg_lm.Rmd index f07068d18..a904b724d 100644 --- a/man/rmd/linear_reg_lm.Rmd +++ b/man/rmd/linear_reg_lm.Rmd @@ -27,7 +27,7 @@ linear_reg() %>% _However_, the documentation in [stats::lm()] assumes that is specific type of case weights are being used: "Non-NULL weights can be used to indicate that different observations have different variances (with the values in weights being inversely proportional to the variances); or equivalently, when the elements of weights are positive integers `w_i`, that each response `y_i` is the mean of `w_i` unit-weight observations (including the case that there are w_i observations equal to `y_i` and the data have been summarized). However, in the latter case, notice that within-group variation is not used. Therefore, the sigma estimate and residual degrees of freedom may be suboptimal; in the case of replication weights, **even wrong**. Hence, standard errors and analysis of variance tables should be treated with care" (emphasis added) -Depending on your application, the degrees of freedown for the model (and other statistics) might be incorrect. +Depending on your application, the degrees of freedom for the model (and other statistics) might be incorrect. ## Saving fitted model objects diff --git a/man/rmd/linear_reg_quantreg.Rmd b/man/rmd/linear_reg_quantreg.Rmd new file mode 100644 index 000000000..a3cc386e7 --- /dev/null +++ b/man/rmd/linear_reg_quantreg.Rmd @@ -0,0 +1,79 @@ +```{r, child = "aaa.Rmd", include = FALSE} +``` + +`r descr_models("linear_reg", "quantreg")` + +This model has the same structure as the model fit by `lm()`, but instead of optimizing the sum of squared errors, it optimizes "quantile loss" in order to produce better estimates of the predictive distribution. + +## Tuning Parameters + +This engine has no tuning parameters. + +## Translation from parsnip to the original package + +This model only works with the `"quantile regression"` model and requires users to specify which areas of the distribution to predict via the `quantile_levels` argument. For example: + +```{r quantreg-reg} +linear_reg() %>% + set_engine("quantreg") %>% + set_mode("quantile regression", quantile_levels = (1:3) / 4) %>% + translate() +``` + +## Output format + +When multiple quantile levels are predicted, there are multiple predicted values for each row of new data. The `predict()` method for this mode produces a column named `.pred_quantile` that has a special class of `"quantile_pred"`, and it contains the predictions for each row. + +For example: + +```{r example} +library(modeldata) +rlang::check_installed("quantreg") + +n <- nrow(Chicago) +Chicago <- Chicago %>% select(ridership, Clark_Lake) + +Chicago_train <- Chicago[1:(n - 7), ] +Chicago_test <- Chicago[(n - 6):n, ] + +qr_fit <- + linear_reg() %>% + set_engine("quantreg") %>% + set_mode("quantile regression", quantile_levels = (1:3) / 4) %>% + fit(ridership ~ Clark_Lake, data = Chicago_train) +qr_fit + +qr_pred <- predict(qr_fit, Chicago_test) +qr_pred +``` + +We can unnest these values and/or convert them to a rectangular format: + +```{r example-format} +as_tibble(qr_pred$.pred_quantile) + +as.matrix(qr_pred$.pred_quantile) +``` + +## Preprocessing requirements + +```{r child = "template-makes-dummies.Rmd"} +``` + +## Case weights + +```{r child = "template-uses-case-weights.Rmd"} +``` + +## Saving fitted model objects + +```{r child = "template-butcher.Rmd"} +``` + +## Examples + +The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#linear-reg-quantreg) for `linear_reg()` with the `"quantreg"` engine. + +## References + + - Waldmann, E. (2018). Quantile regression: a short story on how and why. _Statistical Modelling_, 18(3-4), 203-218. diff --git a/man/rmd/linear_reg_quantreg.md b/man/rmd/linear_reg_quantreg.md new file mode 100644 index 000000000..61b7e4e06 --- /dev/null +++ b/man/rmd/linear_reg_quantreg.md @@ -0,0 +1,154 @@ + + + +For this engine, there is a single mode: quantile regression + +This model has the same structure as the model fit by `lm()`, but instead of optimizing the sum of squared errors, it optimizes "quantile loss" in order to produce better estimates of the predictive distribution. + +## Tuning Parameters + +This engine has no tuning parameters. + +## Translation from parsnip to the original package + +This model only works with the `"quantile regression"` model and requires users to specify which areas of the distribution to predict via the `quantile_levels` argument. For example: + + +``` r +linear_reg() %>% + set_engine("quantreg") %>% + set_mode("quantile regression", quantile_levels = (1:3) / 4) %>% + translate() +``` + +``` +## Linear Regression Model Specification (quantile regression) +## +## Computational engine: quantreg +## +## Model fit template: +## quantreg::rq(formula = missing_arg(), data = missing_arg(), weights = missing_arg(), +## tau = quantile_levels) +``` + +``` +## Quantile levels: 0.25, 0.5, and 0.75. +``` + +## Output format + +When multiple quantile levels are predicted, there are multiple predicted values for each row of new data. The `predict()` method for this mode produces a column named `.pred_quantile` that has a special class of `"quantile_pred"`, and it contains the predictions for each row. + +For example: + + +``` r +library(modeldata) +rlang::check_installed("quantreg") + +n <- nrow(Chicago) +Chicago <- Chicago %>% select(ridership, Clark_Lake) + +Chicago_train <- Chicago[1:(n - 7), ] +Chicago_test <- Chicago[(n - 6):n, ] + +qr_fit <- + linear_reg() %>% + set_engine("quantreg") %>% + set_mode("quantile regression", quantile_levels = (1:3) / 4) %>% + fit(ridership ~ Clark_Lake, data = Chicago_train) +qr_fit +``` + +``` +## parsnip model object +## +## Call: +## quantreg::rq(formula = ridership ~ Clark_Lake, tau = quantile_levels, +## data = data) +## +## Coefficients: +## tau= 0.25 tau= 0.50 tau= 0.75 +## (Intercept) -0.2064189 0.2051549 0.8112286 +## Clark_Lake 0.9820582 0.9862306 0.9777820 +## +## Degrees of freedom: 5691 total; 5689 residual +``` + +``` r +qr_pred <- predict(qr_fit, Chicago_test) +qr_pred +``` + +``` +## # A tibble: 7 x 1 +## .pred_quantile +## +## 1 [21.1] +## 2 [21.4] +## 3 [21.7] +## 4 [21.4] +## 5 [19.5] +## 6 [6.88] +## # i 1 more row +``` + +We can unnest these values and/or convert them to a rectangular format: + + +``` r +as_tibble(qr_pred$.pred_quantile) +``` + +``` +## # A tibble: 21 x 3 +## .pred_quantile .quantile_levels .row +## +## 1 20.6 0.25 1 +## 2 21.1 0.5 1 +## 3 21.5 0.75 1 +## 4 20.9 0.25 2 +## 5 21.4 0.5 2 +## 6 21.8 0.75 2 +## # i 15 more rows +``` + +``` r +as.matrix(qr_pred$.pred_quantile) +``` + +``` +## [,1] [,2] [,3] +## [1,] 20.590627 21.090561 21.517717 +## [2,] 20.863639 21.364733 21.789541 +## [3,] 21.190665 21.693148 22.115142 +## [4,] 20.879352 21.380513 21.805185 +## [5,] 19.047814 19.541193 19.981622 +## [6,] 6.435241 6.875033 7.423968 +## [7,] 6.062058 6.500265 7.052411 +``` + +## Preprocessing requirements + + +Factor/categorical predictors need to be converted to numeric values (e.g., dummy or indicator variables) for this engine. When using the formula method via \\code{\\link[=fit.model_spec]{fit()}}, parsnip will convert factor columns to indicators. + +## Case weights + + +This model can utilize case weights during model fitting. To use them, see the documentation in [case_weights] and the examples on `tidymodels.org`. + +The `fit()` and `fit_xy()` arguments have arguments called `case_weights` that expect vectors of case weights. + +## Saving fitted model objects + + +This model object contains data that are not required to make predictions. When saving the model for the purpose of prediction, the size of the saved object might be substantially reduced by using functions from the [butcher](https://butcher.tidymodels.org) package. + +## Examples + +The "Fitting and Predicting with parsnip" article contains [examples](https://parsnip.tidymodels.org/articles/articles/Examples.html#linear-reg-quantreg) for `linear_reg()` with the `"quantreg"` engine. + +## References + + - Waldmann, E. (2018). Quantile regression: a short story on how and why. _Statistical Modelling_, 18(3-4), 203-218. diff --git a/man/set_args.Rd b/man/set_args.Rd index b8e4620c5..8c4f8f320 100644 --- a/man/set_args.Rd +++ b/man/set_args.Rd @@ -3,11 +3,14 @@ \name{set_args} \alias{set_args} \alias{set_mode} +\alias{set_mode.model_spec} \title{Change elements of a model specification} \usage{ set_args(object, ...) -set_mode(object, mode) +set_mode(object, mode, ...) + +\method{set_mode}{model_spec}(object, mode, quantile_levels = NULL, ...) } \arguments{ \item{object}{A \link[=model_spec]{model specification}.} @@ -16,6 +19,11 @@ set_mode(object, mode) \item{mode}{A character string for the model type (e.g. "classification" or "regression")} + +\item{quantile_levels}{A vector of values between zero and one (only for the +\code{"quantile regression"} mode); otherwise, it is \code{NULL}. The model uses these +values to appropriately train quantile regression models to make predictions +for these values (e.g., \code{quantile_levels = 0.5} is the median).} } \value{ An updated model object. @@ -34,5 +42,8 @@ rand_forest() rand_forest() \%>\% set_args(mtry = 3, importance = TRUE) \%>\% set_mode("regression") + +linear_reg() \%>\% + set_mode("quantile regression", quantile_levels = c(0.2, 0.5, 0.8)) \dontshow{\}) # examplesIf} } diff --git a/tests/testthat/_snaps/args_and_modes.md b/tests/testthat/_snaps/args_and_modes.md index 43c4ffb55..af1ee702b 100644 --- a/tests/testthat/_snaps/args_and_modes.md +++ b/tests/testthat/_snaps/args_and_modes.md @@ -92,7 +92,7 @@ linear_reg() %>% set_mode() Condition Error in `set_mode()`: - ! Available modes for model type linear_reg are: "unknown" and "regression". + ! Available modes for model type linear_reg are: "unknown", "regression", and "quantile regression". --- @@ -109,7 +109,7 @@ linear_reg() %>% set_engine() Condition Error in `set_engine()`: - ! Missing engine. Possible mode/engine combinations are: regression {lm, glm, glmnet, stan, spark, keras, brulee}. + ! Missing engine. Possible mode/engine combinations are: quantile regression {quantreg} and regression {lm, glm, glmnet, stan, spark, keras, brulee}. --- diff --git a/tests/testthat/_snaps/augment.md b/tests/testthat/_snaps/augment.md index 1e73b011e..2432f55f4 100644 --- a/tests/testthat/_snaps/augment.md +++ b/tests/testthat/_snaps/augment.md @@ -5,5 +5,30 @@ Condition Error in `augment()`: ! Unknown mode "depeche". - i Model mode should be one of "classification", "regression", or "censored regression". + i Model mode should be one of "classification", "regression", "censored regression", or "quantile regression". + +# quantile regression models + + Code + linear_reg() %>% set_mode("quantile regression", quantile_levels = probs_1) + Output + Linear Regression Model Specification (quantile regression) + + Computational engine: lm + + Message + Quantile levels: 0.2, 0.4, 0.6, 0.8, and 1. + +--- + + Code + linear_reg() %>% set_mode("regression", quantile_levels = probs_1) + Condition + Warning: + `quantile_levels` is only used when the mode is "quantile regression". + Output + Linear Regression Model Specification (regression) + + Computational engine: lm + diff --git a/tests/testthat/_snaps/linear_reg_quantreg.md b/tests/testthat/_snaps/linear_reg_quantreg.md new file mode 100644 index 000000000..cba265991 --- /dev/null +++ b/tests/testthat/_snaps/linear_reg_quantreg.md @@ -0,0 +1,9 @@ +# linear quantile regression via quantreg - multiple quantiles + + Code + ten_quant_pred <- predict(ten_quant, new_data = sac_test, quantile_levels = (0: + 9) / 9) + Condition + Error in `predict_quantile()`: + ! When the mode is "quantile regression", `quantile_levels` are specified by `set_mode()`. + diff --git a/tests/testthat/_snaps/registration.md b/tests/testthat/_snaps/registration.md index 2ad3f75a3..4774183ac 100644 --- a/tests/testthat/_snaps/registration.md +++ b/tests/testthat/_snaps/registration.md @@ -22,6 +22,14 @@ Error in `set_new_model()`: ! `model` must be a single string, not a character vector. +# existing modes + + Code + get_from_env("modes") + Output + [1] "classification" "regression" "censored regression" + [4] "quantile regression" "unknown" + # adding a new mode Code diff --git a/tests/testthat/helper-objects.R b/tests/testthat/helper-objects.R index b3cbd8276..67549015b 100644 --- a/tests/testthat/helper-objects.R +++ b/tests/testthat/helper-objects.R @@ -25,6 +25,19 @@ is_tf_ok <- function() { res } +# ------------------------------------------------------------------------------ +# for quantile regression tests + +data("Sacramento") + +Sacramento_small <- + modeldata::Sacramento %>% + dplyr::mutate(price = log10(price)) %>% + dplyr::select(price, beds, baths, sqft, latitude, longitude) + +sac_train <- Sacramento_small[-(1:5), ] +sac_test <- Sacramento_small[ 1:5 , ] + # ------------------------------------------------------------------------------ # For sparse tibble testing @@ -61,3 +74,4 @@ sparse_hotel_rates <- function(tibble = FALSE) { res } + diff --git a/tests/testthat/test-augment.R b/tests/testthat/test-augment.R index 9c4c93d58..19c069764 100644 --- a/tests/testthat/test-augment.R +++ b/tests/testthat/test-augment.R @@ -93,3 +93,15 @@ test_that('augment for model without class probabilities', { expect_equal(nrow(augment(cls_form, head(two_class_dat))), 6) }) + + +test_that('quantile regression models', { + probs_1 <- (1:5)/5 + + expect_snapshot( + linear_reg() %>% set_mode("quantile regression", quantile_levels = probs_1) + ) + expect_snapshot( + linear_reg() %>% set_mode("regression", quantile_levels = probs_1) + ) +}) diff --git a/tests/testthat/test-linear_reg_quantreg.R b/tests/testthat/test-linear_reg_quantreg.R new file mode 100644 index 000000000..0785fe7b5 --- /dev/null +++ b/tests/testthat/test-linear_reg_quantreg.R @@ -0,0 +1,115 @@ +test_that('linear quantile regression via quantreg - single quantile', { + skip_if_not_installed("quantreg") + skip_if_not_installed("hardhat", minimum_version = "1.4.0.9002") + + # data in `helper-objects.R` + + one_quant <- + linear_reg() %>% + set_engine("quantreg") %>% + set_mode("quantile regression", quantile_levels = .5) %>% + fit(price ~ ., data = sac_train) + + expect_s3_class(one_quant, c("_rq", "model_fit")) + + ### + + one_quant_pred <- predict(one_quant, new_data = sac_test) + expect_true(nrow(one_quant_pred) == nrow(sac_test)) + expect_named(one_quant_pred, ".pred_quantile") + expect_true(is.list(one_quant_pred[[1]])) + expect_s3_class( + one_quant_pred$.pred_quantile[1], + c("quantile_pred", "vctrs_vctr", "list") + ) + expect_identical(class(one_quant_pred$.pred_quantile[[1]]), "numeric") + expect_true(length(one_quant_pred$.pred_quantile[[1]]) == 1L) + expect_identical(attr(one_quant_pred$.pred_quantile, "quantile_levels"), .5) + + one_quant_df <- as_tibble(one_quant_pred$.pred_quantile) + expect_s3_class(one_quant_df, c("tbl_df", "tbl", "data.frame")) + expect_named(one_quant_df, c(".pred_quantile", ".quantile_levels", ".row")) + expect_true(nrow(one_quant_df) == nrow(sac_test) * 1) + + ### + + one_quant_one_row <- predict(one_quant, new_data = sac_test[1,]) + expect_true(nrow(one_quant_one_row) == 1L) + expect_named(one_quant_one_row, ".pred_quantile") + expect_true(is.list(one_quant_one_row[[1]])) + expect_s3_class( + one_quant_one_row$.pred_quantile[1], + c("quantile_pred", "vctrs_vctr", "list") + ) + expect_identical(class(one_quant_one_row$.pred_quantile[[1]]), "numeric") + expect_true(length(one_quant_one_row$.pred_quantile[[1]]) == 1L) + expect_identical(attr(one_quant_pred$.pred_quantile, "quantile_levels"), .5) + + one_quant_one_row_df <- as_tibble(one_quant_one_row$.pred_quantile) + expect_s3_class(one_quant_one_row_df, c("tbl_df", "tbl", "data.frame")) + expect_named(one_quant_one_row_df, c(".pred_quantile", ".quantile_levels", ".row")) + expect_true(nrow(one_quant_one_row_df) == nrow(sac_test[1,]) * 1) +}) + +test_that('linear quantile regression via quantreg - multiple quantiles', { + skip_if_not_installed("quantreg") + + # data in `helper-objects.R` + + ten_quant <- + linear_reg() %>% + set_engine("quantreg") %>% + set_mode("quantile regression", quantile_levels = (0:9)/9) %>% + fit(price ~ ., data = sac_train) + + expect_s3_class(ten_quant, c("_rq", "model_fit")) + + ### + + ten_quant_pred <- predict(ten_quant, new_data = sac_test) + expect_true(nrow(ten_quant_pred) == nrow(sac_test)) + expect_named(ten_quant_pred, ".pred_quantile") + expect_true(is.list(ten_quant_pred[[1]])) + expect_s3_class( + ten_quant_pred$.pred_quantile[1], + c("quantile_pred", "vctrs_vctr", "list") + ) + expect_identical(class(ten_quant_pred$.pred_quantile[[1]]), "numeric") + expect_true(length(ten_quant_pred$.pred_quantile[[1]]) == 10L) + expect_identical(attr(ten_quant_pred$.pred_quantile, "quantile_levels"), (0:9)/9) + + ten_quant_df <- as_tibble(ten_quant_pred$.pred_quantile) + expect_s3_class(ten_quant_df, c("tbl_df", "tbl", "data.frame")) + expect_named(ten_quant_df, c(".pred_quantile", ".quantile_levels", ".row")) + expect_true(nrow(ten_quant_df) == nrow(sac_test) * 10) + + expect_snapshot( + ten_quant_pred <- predict(ten_quant, new_data = sac_test, quantile_levels = (0:9)/9), + error = TRUE + ) + + ### + + ten_quant_one_row <- predict(ten_quant, new_data = sac_test[1,]) + expect_true(nrow(ten_quant_one_row) == 1L) + expect_named(ten_quant_one_row, ".pred_quantile") + expect_true(is.list(ten_quant_one_row[[1]])) + expect_s3_class( + ten_quant_one_row$.pred_quantile[1], + c("quantile_pred", "vctrs_vctr", "list") + ) + expect_identical(class(ten_quant_one_row$.pred_quantile[[1]]), "numeric") + expect_true(length(ten_quant_one_row$.pred_quantile[[1]]) == 10L) + expect_identical( + attr(ten_quant_one_row$.pred_quantile, "quantile_levels"), + (0:9)/9 + ) + + ten_quant_one_row_df <- as_tibble(ten_quant_one_row$.pred_quantile) + expect_s3_class(ten_quant_one_row_df, c("tbl_df", "tbl", "data.frame")) + expect_named(ten_quant_one_row_df, c(".pred_quantile", ".quantile_levels", ".row")) + expect_true(nrow(ten_quant_one_row_df) == nrow(sac_test[1,]) * 10) +}) + + + diff --git a/tests/testthat/test-registration.R b/tests/testthat/test-registration.R index d9ce3e65c..3da900a16 100644 --- a/tests/testthat/test-registration.R +++ b/tests/testthat/test-registration.R @@ -47,6 +47,11 @@ expect_snapshot(error = TRUE, set_new_model(letters[1:2])) # ------------------------------------------------------------------------------ +test_that('existing modes', { + expect_snapshot(get_from_env("modes")) +}) + + test_that('adding a new mode', { set_model_mode("sponge", "classification") diff --git a/tests/testthat/test-surv_reg_survreg.R b/tests/testthat/test-surv_reg_survreg.R index 3cbcad6a9..bc66c8d87 100644 --- a/tests/testthat/test-surv_reg_survreg.R +++ b/tests/testthat/test-surv_reg_survreg.R @@ -10,8 +10,7 @@ complete_form <- survival::Surv(time) ~ group # ------------------------------------------------------------------------------ test_that('survival execution', { - skip_on_travis() - + skip_if_not_installed("survival") rlang::local_options(lifecycle_verbosity = "quiet") surv_basic <- surv_reg() %>% set_engine("survival") surv_lnorm <- surv_reg(dist = "lognormal") %>% set_engine("survival") @@ -45,7 +44,7 @@ test_that('survival execution', { }) test_that('survival prediction', { - skip_on_travis() + skip_if_not_installed("survival") rlang::local_options(lifecycle_verbosity = "quiet") surv_basic <- surv_reg() %>% set_engine("survival") @@ -60,16 +59,6 @@ test_that('survival prediction', { exp_pred <- predict(extract_fit_engine(res), head(lung)) exp_pred <- tibble(.pred = unname(exp_pred)) expect_equal(exp_pred, predict(res, head(lung))) - - exp_quant <- predict(extract_fit_engine(res), head(lung), p = (2:4)/5, type = "quantile") - exp_quant <- - apply(exp_quant, 1, function(x) - tibble(.pred = x, .quantile = (2:4) / 5)) - exp_quant <- tibble(.pred = exp_quant) - obs_quant <- predict(res, head(lung), type = "quantile", quantile = (2:4)/5) - - expect_equal(as.data.frame(exp_quant), as.data.frame(obs_quant)) - }) diff --git a/vignettes/articles/Examples.Rmd b/vignettes/articles/Examples.Rmd index 1a95daadc..705014dce 100644 --- a/vignettes/articles/Examples.Rmd +++ b/vignettes/articles/Examples.Rmd @@ -586,7 +586,52 @@ The following examples use consistent data sets throughout. For regression, we u +
+ + With the `"quantreg"` engine + +

Quantile regression Example (`quantreg`)

+ + ```{r echo=FALSE} + knitr::spin_child("template-reg-sacramento.R") + ``` + + We can define the model but should set the model mode. Also, for these models the levels of the distirunbtion that we would like to predict need to specified with the mode using the `quantile_levels` argument. Let's predict the 0.25, 0.50, and 0.75 quantiles: + + ```{r} + linreg_quant_spec <- + linear_reg() %>% + set_engine("quantreg") %>% + set_mode("quantile regression", quantile_levels = (1:3) / 4) + linreg_quant_spec + ``` + + Now we create the model fit object: + + ```{r} + set.seed(1) + linreg_quant_fit <- linreg_quant_spec %>% fit(price ~ sqft, data = sac_train) + linreg_quant_fit + ``` + + The holdout data can be predicted: + ```{r} + quant_pred <- predict(linreg_quant_fit, sac_test) + quant_pred + ``` + + `.pred_quantile` is a vector type that contains all of the quartile predictions for each row. You can convert this to a rectangular data set using either of: + + ```{r} + as.matrix(quant_pred$.pred_quantile) + + # or + as_tibble(quant_pred$.pred_quantile) + ``` + +
+ ## `logistic_reg()` models
diff --git a/vignettes/articles/template-reg-sacramento.R b/vignettes/articles/template-reg-sacramento.R new file mode 100644 index 000000000..bdb5fc01e --- /dev/null +++ b/vignettes/articles/template-reg-sacramento.R @@ -0,0 +1,11 @@ +#' We'll model the relationship between the cost of a house in Sacramento CA and the square footage of a property. + +#' A few rows were randomly held out for illustrating prediction. + +#+ results = "hide", messages = FALSE +library(tidymodels) +tidymodels_prefer() + +sac_holdout <- c(90L, 203L, 264L, 733L, 771L) +sac_train <- Sacramento[-sac_holdout, ] +sac_test <- Sacramento[ sac_holdout, ]