diff --git a/DESCRIPTION b/DESCRIPTION index d5570885..6a6d4cd5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: httr2 Title: Perform HTTP Requests and Process the Responses -Version: 1.0.6.9000 +Version: 1.0.7.9000 Authors@R: c( person("Hadley", "Wickham", , "hadley@posit.co", role = c("aut", "cre")), person("Posit Software, PBC", role = c("cph", "fnd")), @@ -37,7 +37,7 @@ Suggests: jose, jsonlite, knitr, - later (>= 1.3.2.9001), + later (>= 1.4.0), paws.common, promises, rmarkdown, @@ -54,4 +54,3 @@ Config/testthat/start-first: resp-stream, req-perform Encoding: UTF-8 Roxygen: list(markdown = TRUE) RoxygenNote: 7.3.2 -Remotes: r-lib/later diff --git a/NEWS.md b/NEWS.md index 7ba07839..f09f990e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,15 @@ # httr2 (development version) * `url_parse()` now uses `curl::curl_parse_url()` which is much faster and more correct (#577). +* `req_retry()` now defaults to `max_tries = 2` with a message. + Set to `max_tries = 1` to disable retries. + +* Errors thrown during the parsing of an OAuth response now have a dedicated + `httr2_oauth_parse` error class that includes the original response object + (@atheriel, #596). + +# httr2 1.0.7 + * `req_perform_promise()` upgraded to use event-driven async based on waiting efficiently on curl socket activity (#579). * New `req_oauth_token_exchange()` and `oauth_flow_token_exchange()` functions implement the OAuth token exchange protocol from RFC 8693 (@atheriel, #460). diff --git a/R/oauth-flow-auth-code.R b/R/oauth-flow-auth-code.R index f089305d..6aac75f9 100644 --- a/R/oauth-flow-auth-code.R +++ b/R/oauth-flow-auth-code.R @@ -7,12 +7,14 @@ #' This flow is the most commonly used OAuth flow where the user #' opens a page in their browser, approves the access, and then returns to R. #' When possible, it redirects the browser back to a temporary local webserver -#' to capture the authorization code. When this is not possible (e.g. when +#' to capture the authorization code. When this is not possible (e.g., when #' running on a hosted platform like RStudio Server), provide a custom #' `redirect_uri` and httr2 will prompt the user to enter the code manually. #' #' Learn more about the overall OAuth authentication flow in -#' . +#' , and more about the motivations +#' behind this flow in +#' . #' #' # Security considerations #' @@ -20,17 +22,18 @@ #' applications (which are equivalent to R packages). `r rfc(8252)` spells out #' important considerations for native apps. Most importantly there's no way #' for native apps to keep secrets from their users. This means that the -#' server should either not require a `client_secret` (i.e. a public client -#' not an confidential client) or ensure that possession of the `client_secret` -#' doesn't bestow any meaningful rights. +#' server should either not require a `client_secret` (i.e. it should be a +#' public client and not a confidential client) or ensure that possession of +#' the `client_secret` doesn't grant any significant privileges. #' -#' Only modern APIs from the bigger players (Azure, Google, etc) explicitly -#' native apps. However, in most cases, even for older APIs, possessing the -#' `client_secret` gives you no ability to do anything harmful, so our -#' general principle is that it's fine to include it in an R package, as long -#' as it's mildly obfuscated to protect it from credential scraping. There's -#' no incentive to steal your client credentials if it takes less time to -#' create a new client than find your client secret. +#' Only modern APIs from major providers (like Azure and Google) explicitly +#' support native apps. However, in most cases, even for older APIs, possessing +#' the `client_secret` provides limited ability to perform harmful actions. +#' Therefore, our general principle is that it's acceptable to include it in an +#' R package, as long as it's mildly obfuscated to protect against credential +#' scraping attacks (which aim to acquire large numbers of client secrets by +#' scanning public sites like GitHub). The goal is to ensure that obtaining your +#' client credentials is more work than just creating a new client. #' #' @export #' @family OAuth flows diff --git a/R/oauth-flow.R b/R/oauth-flow.R index 068ae876..4fcdcd4c 100644 --- a/R/oauth-flow.R +++ b/R/oauth-flow.R @@ -12,6 +12,8 @@ oauth_flow_parse <- function(resp, source, error_call = caller_env()) { cli::cli_abort( "Failed to parse response from {.arg {source}} OAuth url.", parent = err, + resp = resp, + class = "httr2_oauth_parse", call = error_call ) } @@ -40,6 +42,8 @@ oauth_flow_parse <- function(resp, source, error_call = caller_env()) { "Failed to parse response from {.arg {source}} OAuth url.", "*" = "Did not contain {.code access_token}, {.code device_code}, or {.code error} field." ), + resp = resp, + class = "httr2_oauth_parse", call = error_call ) } diff --git a/R/progress-bars.R b/R/progress-bars.R index 31c2a739..59d46df6 100644 --- a/R/progress-bars.R +++ b/R/progress-bars.R @@ -30,9 +30,6 @@ #' By default the same as `format`. #' * `name`: progress bar name. This is by default the empty string and it #' is displayed at the beginning of the progress bar. -#' * `show_after`: numeric scalar. Only show the progress bar after this -#' number of seconds. It overrides the `cli.progress_show_after` -#' global option. #' * `type`: progress bar type. Currently supported types are: #' * `iterator`: the default, a for loop or a mapping function, #' * `tasks`: a (typically small) number of tasks, diff --git a/R/req-auth.R b/R/req-auth.R index 9811f0a1..64593495 100644 --- a/R/req-auth.R +++ b/R/req-auth.R @@ -5,8 +5,8 @@ #' #' @inheritParams req_perform #' @param username User name. -#' @param password Password. You avoid entering the password directly when -#' calling this function as it will be captured by `.Rhistory`. Instead, +#' @param password Password. You should avoid entering the password directly +#' when calling this function as it will be captured by `.Rhistory`. Instead, #' leave it unset and the default behaviour will prompt you for it #' interactively. #' @returns A modified HTTP [request]. diff --git a/R/req-body.R b/R/req-body.R index f9786731..1087e4d4 100644 --- a/R/req-body.R +++ b/R/req-body.R @@ -137,7 +137,8 @@ req_body_json_modify <- function(req, ...) { #' data in the body. #' #' * For `req_body_form()`, the values must be strings (or things easily -#' coerced to strings); +#' coerced to strings). Vectors are convertd to strings using the +#' value of `.multi`. #' * For `req_body_multipart()` the values must be strings or objects #' produced by [curl::form_file()]/[curl::form_data()]. #' * For `req_body_json_modify()`, any simple data made from atomic vectors diff --git a/R/req-perform.R b/R/req-perform.R index 5eb8915b..e7335234 100644 --- a/R/req-perform.R +++ b/R/req-perform.R @@ -229,6 +229,10 @@ last_request <- function() { #' works by sending the real HTTP request to a local webserver, thanks to #' the magic of [curl::curl_echo()]. #' +#' ## Limitations +#' +#' * The `Host` header is not respected. +#' #' @inheritParams req_verbose #' @param quiet If `TRUE` doesn't print anything. #' @returns Invisibly, a list containing information about the request, diff --git a/R/req-retries.R b/R/req-retries.R index 202391a2..1faf758b 100644 --- a/R/req-retries.R +++ b/R/req-retries.R @@ -1,55 +1,55 @@ -#' Control when a request will retry, and how long it will wait between tries +#' Automatically retry a request on failure #' #' @description -#' `req_retry()` alters [req_perform()] so that it will automatically retry -#' in the case of failure. To activate it, you must specify either the total -#' number of requests to make with `max_tries` or the total amount of time -#' to spend with `max_seconds`. Then `req_perform()` will retry if the error is -#' "transient", i.e. it's an HTTP error that can be resolved by waiting. By -#' default, 429 and 503 statuses are treated as transient, but if the API you -#' are wrapping has other transient status codes (or conveys transient-ness -#' with some other property of the response), you can override the default -#' with `is_transient`. +#' `req_retry()` allows [req_perform()] to automatically retry failing +#' requests. It's particularly important for APIs with rate limiting, but can +#' also be useful when dealing with flaky servers. #' -#' Additionally, if you set `retry_on_failure = TRUE`, the request will retry -#' if either the HTTP request or HTTP response doesn't complete successfully +#' By default, `req_perform()` will retry if the response is a 429 +#' ("too many requests", often used for rate limiting) or 503 +#' ("service unavailable"). If the API you are wrapping has other transient +#' status codes (or conveys transience with some other property of the +#' response), you can override the default with `is_transient`. And +#' if you set `retry_on_failure = TRUE`, the request will retry +#' if either the HTTP request or HTTP response doesn't complete successfully, #' leading to an error from curl, the lower-level library that httr2 uses to -#' perform HTTP request. This occurs, for example, if your wifi is down. +#' perform HTTP requests. This occurs, for example, if your Wi-Fi is down. +#' +#' ## Delay #' #' It's a bad idea to immediately retry a request, so `req_perform()` will #' wait a little before trying again: #' #' * If the response contains the `Retry-After` header, httr2 will wait the #' amount of time it specifies. If the API you are wrapping conveys this -#' information with a different header (or other property of the response) -#' you can override the default behaviour with `retry_after`. +#' information with a different header (or other property of the response), +#' you can override the default behavior with `retry_after`. #' #' * Otherwise, httr2 will use "truncated exponential backoff with full -#' jitter", i.e. it will wait a random amount of time between one second and -#' `2 ^ tries` seconds, capped to at most 60 seconds. In other words, it +#' jitter", i.e., it will wait a random amount of time between one second and +#' `2 ^ tries` seconds, capped at a maximum of 60 seconds. In other words, it #' waits `runif(1, 1, 2)` seconds after the first failure, `runif(1, 1, 4)` #' after the second, `runif(1, 1, 8)` after the third, and so on. If you'd #' prefer a different strategy, you can override the default with `backoff`. #' #' @inheritParams req_perform -#' @param max_tries,max_seconds Cap the maximum number of attempts with -#' `max_tries` or the total elapsed time from the first request with -#' `max_seconds`. If neither option is supplied (the default), [req_perform()] -#' will not retry. +#' @param max_tries,max_seconds Cap the maximum number of attempts +#' (`max_tries`), the total elapsed time from the first request +#' (`max_seconds`), or both. #' -#' `max_tries` is the total number of attempts make, so this should always -#' be greater than one.` +#' `max_tries` is the total number of attempts made, so this should always +#' be greater than one. #' @param is_transient A predicate function that takes a single argument #' (the response) and returns `TRUE` or `FALSE` specifying whether or not #' the response represents a transient error. #' @param retry_on_failure Treat low-level failures as if they are -#' transient errors, and can be retried. +#' transient errors that can be retried. #' @param backoff A function that takes a single argument (the number of failed #' attempts so far) and returns the number of seconds to wait. #' @param after A function that takes a single argument (the response) and -#' returns either a number of seconds to wait or `NULL`, which indicates -#' that a precise wait time is not available that the `backoff` strategy -#' should be used instead.. +#' returns either a number of seconds to wait or `NA`. `NA` indicates +#' that a precise wait time is not available and that the `backoff` strategy +#' should be used instead. #' @returns A modified HTTP [request]. #' @export #' @seealso [req_throttle()] if the API has a rate-limit but doesn't expose @@ -61,7 +61,7 @@ #' #' # use a constant 10s delay after every failure #' request("http://example.com") |> -#' req_retry(backoff = ~10) +#' req_retry(backoff = \(resp) 10) #' #' # When rate-limited, GitHub's API returns a 403 with #' # `X-RateLimit-Remaining: 0` and an Unix time stored in the @@ -86,9 +86,16 @@ req_retry <- function(req, is_transient = NULL, backoff = NULL, after = NULL) { + check_request(req) - check_number_whole(max_tries, min = 2, allow_null = TRUE) + check_number_whole(max_tries, min = 1, allow_null = TRUE) check_number_whole(max_seconds, min = 0, allow_null = TRUE) + + if (is.null(max_tries) && is.null(max_seconds)) { + max_tries <- 2 + cli::cli_inform("Setting {.code max_tries = 2}.") + } + check_bool(retry_on_failure) req_policies(req, diff --git a/R/req-url.R b/R/req-url.R index d168e429..a35b1489 100644 --- a/R/req-url.R +++ b/R/req-url.R @@ -55,10 +55,11 @@ req_url <- function(req, url) { #' * `"error"`, the default, throws an error. #' * `"comma"`, separates values with a `,`, e.g. `?x=1,2`. #' * `"pipe"`, separates values with a `|`, e.g. `?x=1|2`. -#' * `"explode"`, turns each element into its own parameter, e.g. `?x=1&x=2`. +#' * `"explode"`, turns each element into its own parameter, e.g. `?x=1&x=2` #' -#' If none of these functions work, you can alternatively supply a function -#' that takes a character vector and returns a string. +#' If none of these options work for your needs, you can instead supply a +#' function that takes a character vector of argument values and returns a +#' a single string. req_url_query <- function(.req, ..., .multi = c("error", "comma", "pipe", "explode")) { diff --git a/R/sequential.R b/R/sequential.R index cda53896..db80bd6e 100644 --- a/R/sequential.R +++ b/R/sequential.R @@ -6,16 +6,19 @@ #' #' @param reqs A list of [request]s. #' @param paths An optional character vector of paths, if you want to download -#' the request bodies to disk. If supplied, must be the same length as `reqs`. +#' the response bodies to disk. If supplied, must be the same length as +#' `reqs`. #' @param on_error What should happen if one of the requests fails? #' #' * `stop`, the default: stop iterating with an error. #' * `return`: stop iterating, returning all the successful responses #' received so far, as well as an error object for the failed request. #' * `continue`: continue iterating, recording errors in the result. -#' @param progress Display a progress bar? Use `TRUE` to turn on a basic -#' progress bar, use a string to give it a name, or see [progress_bars] to -#' customise it in other ways. +#' @param progress Display a progress bar for the status of all requests? Use +#' `TRUE` to turn on a basic progress bar, use a string to give it a name, +#' or see [progress_bars] to customize it in other ways. Not compatible with +#' [req_progress()], as httr2 can only display a single progress bar at a +#' time. #' @return #' A list, the same length as `reqs`, containing [response]s and possibly #' error objects, if `on_error` is `"return"` or `"continue"` and one of the diff --git a/cran-comments.md b/cran-comments.md index 95b9100a..f012acff 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -4,4 +4,4 @@ ## revdepcheck results -This is a patch release that only adds new features and fixes a randomly failing test. Compared to the last release I have provided a patch for osmapiR: https://github.com/ropensci/osmapiR/pull/58 +This is a patch release that only add a new feature and fixes a rarely used function. diff --git a/man/multi_req_perform.Rd b/man/multi_req_perform.Rd index a178c021..f47ac90f 100644 --- a/man/multi_req_perform.Rd +++ b/man/multi_req_perform.Rd @@ -10,7 +10,8 @@ multi_req_perform(reqs, paths = NULL, pool = NULL, cancel_on_error = FALSE) \item{reqs}{A list of \link{request}s.} \item{paths}{An optional character vector of paths, if you want to download -the request bodies to disk. If supplied, must be the same length as \code{reqs}.} +the response bodies to disk. If supplied, must be the same length as +\code{reqs}.} \item{pool}{Optionally, a curl pool made by \code{\link[curl:multi]{curl::new_pool()}}. Supply this if you want to override the defaults for total concurrent connections diff --git a/man/progress_bars.Rd b/man/progress_bars.Rd index 3acb1d16..83c52c56 100644 --- a/man/progress_bars.Rd +++ b/man/progress_bars.Rd @@ -33,9 +33,6 @@ the same as \code{format}. By default the same as \code{format}. \item \code{name}: progress bar name. This is by default the empty string and it is displayed at the beginning of the progress bar. -\item \code{show_after}: numeric scalar. Only show the progress bar after this -number of seconds. It overrides the \code{cli.progress_show_after} -global option. \item \code{type}: progress bar type. Currently supported types are: \itemize{ \item \code{iterator}: the default, a for loop or a mapping function, diff --git a/man/req_auth_basic.Rd b/man/req_auth_basic.Rd index 16274044..7f304924 100644 --- a/man/req_auth_basic.Rd +++ b/man/req_auth_basic.Rd @@ -11,8 +11,8 @@ req_auth_basic(req, username, password = NULL) \item{username}{User name.} -\item{password}{Password. You avoid entering the password directly when -calling this function as it will be captured by \code{.Rhistory}. Instead, +\item{password}{Password. You should avoid entering the password directly +when calling this function as it will be captured by \code{.Rhistory}. Instead, leave it unset and the default behaviour will prompt you for it interactively.} } diff --git a/man/req_body.Rd b/man/req_body.Rd index cf416284..6517608a 100644 --- a/man/req_body.Rd +++ b/man/req_body.Rd @@ -53,7 +53,8 @@ or an empty list (\code{"list"}).} data in the body. \itemize{ \item For \code{req_body_form()}, the values must be strings (or things easily -coerced to strings); +coerced to strings). Vectors are convertd to strings using the +value of \code{.multi}. \item For \code{req_body_multipart()} the values must be strings or objects produced by \code{\link[curl:multipart]{curl::form_file()}}/\code{\link[curl:multipart]{curl::form_data()}}. \item For \code{req_body_json_modify()}, any simple data made from atomic vectors @@ -69,11 +70,12 @@ containing multiple values: \item \code{"error"}, the default, throws an error. \item \code{"comma"}, separates values with a \verb{,}, e.g. \verb{?x=1,2}. \item \code{"pipe"}, separates values with a \code{|}, e.g. \code{?x=1|2}. -\item \code{"explode"}, turns each element into its own parameter, e.g. \code{?x=1&x=2}. +\item \code{"explode"}, turns each element into its own parameter, e.g. \code{?x=1&x=2} } -If none of these functions work, you can alternatively supply a function -that takes a character vector and returns a string.} +If none of these options work for your needs, you can instead supply a +function that takes a character vector of argument values and returns a +a single string.} } \value{ A modified HTTP \link{request}. diff --git a/man/req_dry_run.Rd b/man/req_dry_run.Rd index 62ce1511..50ce30bd 100644 --- a/man/req_dry_run.Rd +++ b/man/req_dry_run.Rd @@ -25,6 +25,13 @@ actually sending anything. It requires the httpuv package because it works by sending the real HTTP request to a local webserver, thanks to the magic of \code{\link[curl:curl_echo]{curl::curl_echo()}}. } +\details{ +\subsection{Limitations}{ +\itemize{ +\item The \code{Host} header is not respected. +} +} +} \examples{ # httr2 adds default User-Agent, Accept, and Accept-Encoding headers request("http://example.com") |> req_dry_run() diff --git a/man/req_oauth_auth_code.Rd b/man/req_oauth_auth_code.Rd index f00fec1d..6ceae16a 100644 --- a/man/req_oauth_auth_code.Rd +++ b/man/req_oauth_auth_code.Rd @@ -101,29 +101,32 @@ by \href{https://datatracker.ietf.org/doc/html/rfc6749#section-4.1}{Section 4.1 This flow is the most commonly used OAuth flow where the user opens a page in their browser, approves the access, and then returns to R. When possible, it redirects the browser back to a temporary local webserver -to capture the authorization code. When this is not possible (e.g. when +to capture the authorization code. When this is not possible (e.g., when running on a hosted platform like RStudio Server), provide a custom \code{redirect_uri} and httr2 will prompt the user to enter the code manually. Learn more about the overall OAuth authentication flow in -\url{https://httr2.r-lib.org/articles/oauth.html}. +\url{https://httr2.r-lib.org/articles/oauth.html}, and more about the motivations +behind this flow in +\url{https://stack-auth.com/blog/oauth-from-first-principles}. } \section{Security considerations}{ The authorization code flow is used for both web applications and native applications (which are equivalent to R packages). \href{https://datatracker.ietf.org/doc/html/rfc8252}{RFC 8252} spells out important considerations for native apps. Most importantly there's no way for native apps to keep secrets from their users. This means that the -server should either not require a \code{client_secret} (i.e. a public client -not an confidential client) or ensure that possession of the \code{client_secret} -doesn't bestow any meaningful rights. - -Only modern APIs from the bigger players (Azure, Google, etc) explicitly -native apps. However, in most cases, even for older APIs, possessing the -\code{client_secret} gives you no ability to do anything harmful, so our -general principle is that it's fine to include it in an R package, as long -as it's mildly obfuscated to protect it from credential scraping. There's -no incentive to steal your client credentials if it takes less time to -create a new client than find your client secret. +server should either not require a \code{client_secret} (i.e. it should be a +public client and not a confidential client) or ensure that possession of +the \code{client_secret} doesn't grant any significant privileges. + +Only modern APIs from major providers (like Azure and Google) explicitly +support native apps. However, in most cases, even for older APIs, possessing +the \code{client_secret} provides limited ability to perform harmful actions. +Therefore, our general principle is that it's acceptable to include it in an +R package, as long as it's mildly obfuscated to protect against credential +scraping attacks (which aim to acquire large numbers of client secrets by +scanning public sites like GitHub). The goal is to ensure that obtaining your +client credentials is more work than just creating a new client. } \examples{ diff --git a/man/req_oauth_password.Rd b/man/req_oauth_password.Rd index 0fe91d1b..713f6231 100644 --- a/man/req_oauth_password.Rd +++ b/man/req_oauth_password.Rd @@ -31,8 +31,8 @@ oauth_flow_password( \item{username}{User name.} -\item{password}{Password. You avoid entering the password directly when -calling this function as it will be captured by \code{.Rhistory}. Instead, +\item{password}{Password. You should avoid entering the password directly +when calling this function as it will be captured by \code{.Rhistory}. Instead, leave it unset and the default behaviour will prompt you for it interactively.} diff --git a/man/req_perform_iterative.Rd b/man/req_perform_iterative.Rd index 8c8ce2f4..4b86e534 100644 --- a/man/req_perform_iterative.Rd +++ b/man/req_perform_iterative.Rd @@ -35,9 +35,11 @@ perform all requests until \code{next_req()} returns \code{NULL}.} far, as well as an error object for the failed request. }} -\item{progress}{Display a progress bar? Use \code{TRUE} to turn on a basic -progress bar, use a string to give it a name, or see \link{progress_bars} to -customise it in other ways.} +\item{progress}{Display a progress bar for the status of all requests? Use +\code{TRUE} to turn on a basic progress bar, use a string to give it a name, +or see \link{progress_bars} to customize it in other ways. Not compatible with +\code{\link[=req_progress]{req_progress()}}, as httr2 can only display a single progress bar at a +time.} } \value{ A list, at most length \code{max_reqs}, containing \link{response}s and possibly one diff --git a/man/req_perform_parallel.Rd b/man/req_perform_parallel.Rd index e6dfb5e2..f9c94399 100644 --- a/man/req_perform_parallel.Rd +++ b/man/req_perform_parallel.Rd @@ -16,7 +16,8 @@ req_perform_parallel( \item{reqs}{A list of \link{request}s.} \item{paths}{An optional character vector of paths, if you want to download -the request bodies to disk. If supplied, must be the same length as \code{reqs}.} +the response bodies to disk. If supplied, must be the same length as +\code{reqs}.} \item{pool}{Optionally, a curl pool made by \code{\link[curl:multi]{curl::new_pool()}}. Supply this if you want to override the defaults for total concurrent connections @@ -30,9 +31,11 @@ received so far, as well as an error object for the failed request. \item \code{continue}: continue iterating, recording errors in the result. }} -\item{progress}{Display a progress bar? Use \code{TRUE} to turn on a basic -progress bar, use a string to give it a name, or see \link{progress_bars} to -customise it in other ways.} +\item{progress}{Display a progress bar for the status of all requests? Use +\code{TRUE} to turn on a basic progress bar, use a string to give it a name, +or see \link{progress_bars} to customize it in other ways. Not compatible with +\code{\link[=req_progress]{req_progress()}}, as httr2 can only display a single progress bar at a +time.} } \value{ A list, the same length as \code{reqs}, containing \link{response}s and possibly diff --git a/man/req_perform_sequential.Rd b/man/req_perform_sequential.Rd index c5d1334f..1089db62 100644 --- a/man/req_perform_sequential.Rd +++ b/man/req_perform_sequential.Rd @@ -15,7 +15,8 @@ req_perform_sequential( \item{reqs}{A list of \link{request}s.} \item{paths}{An optional character vector of paths, if you want to download -the request bodies to disk. If supplied, must be the same length as \code{reqs}.} +the response bodies to disk. If supplied, must be the same length as +\code{reqs}.} \item{on_error}{What should happen if one of the requests fails? \itemize{ @@ -25,9 +26,11 @@ received so far, as well as an error object for the failed request. \item \code{continue}: continue iterating, recording errors in the result. }} -\item{progress}{Display a progress bar? Use \code{TRUE} to turn on a basic -progress bar, use a string to give it a name, or see \link{progress_bars} to -customise it in other ways.} +\item{progress}{Display a progress bar for the status of all requests? Use +\code{TRUE} to turn on a basic progress bar, use a string to give it a name, +or see \link{progress_bars} to customize it in other ways. Not compatible with +\code{\link[=req_progress]{req_progress()}}, as httr2 can only display a single progress bar at a +time.} } \value{ A list, the same length as \code{reqs}, containing \link{response}s and possibly diff --git a/man/req_retry.Rd b/man/req_retry.Rd index 9c3d5044..0b7ebeec 100644 --- a/man/req_retry.Rd +++ b/man/req_retry.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/req-retries.R \name{req_retry} \alias{req_retry} -\title{Control when a request will retry, and how long it will wait between tries} +\title{Automatically retry a request on failure} \usage{ req_retry( req, @@ -17,16 +17,15 @@ req_retry( \arguments{ \item{req}{A httr2 \link{request} object.} -\item{max_tries, max_seconds}{Cap the maximum number of attempts with -\code{max_tries} or the total elapsed time from the first request with -\code{max_seconds}. If neither option is supplied (the default), \code{\link[=req_perform]{req_perform()}} -will not retry. +\item{max_tries, max_seconds}{Cap the maximum number of attempts +(\code{max_tries}), the total elapsed time from the first request +(\code{max_seconds}), or both. -\code{max_tries} is the total number of attempts make, so this should always -be greater than one.`} +\code{max_tries} is the total number of attempts made, so this should always +be greater than one.} \item{retry_on_failure}{Treat low-level failures as if they are -transient errors, and can be retried.} +transient errors that can be retried.} \item{is_transient}{A predicate function that takes a single argument (the response) and returns \code{TRUE} or \code{FALSE} specifying whether or not @@ -36,44 +35,45 @@ the response represents a transient error.} attempts so far) and returns the number of seconds to wait.} \item{after}{A function that takes a single argument (the response) and -returns either a number of seconds to wait or \code{NULL}, which indicates -that a precise wait time is not available that the \code{backoff} strategy -should be used instead..} +returns either a number of seconds to wait or \code{NA}. \code{NA} indicates +that a precise wait time is not available and that the \code{backoff} strategy +should be used instead.} } \value{ A modified HTTP \link{request}. } \description{ -\code{req_retry()} alters \code{\link[=req_perform]{req_perform()}} so that it will automatically retry -in the case of failure. To activate it, you must specify either the total -number of requests to make with \code{max_tries} or the total amount of time -to spend with \code{max_seconds}. Then \code{req_perform()} will retry if the error is -"transient", i.e. it's an HTTP error that can be resolved by waiting. By -default, 429 and 503 statuses are treated as transient, but if the API you -are wrapping has other transient status codes (or conveys transient-ness -with some other property of the response), you can override the default -with \code{is_transient}. +\code{req_retry()} allows \code{\link[=req_perform]{req_perform()}} to automatically retry failing +requests. It's particularly important for APIs with rate limiting, but can +also be useful when dealing with flaky servers. -Additionally, if you set \code{retry_on_failure = TRUE}, the request will retry -if either the HTTP request or HTTP response doesn't complete successfully +By default, \code{req_perform()} will retry if the response is a 429 +("too many requests", often used for rate limiting) or 503 +("service unavailable"). If the API you are wrapping has other transient +status codes (or conveys transience with some other property of the +response), you can override the default with \code{is_transient}. And +if you set \code{retry_on_failure = TRUE}, the request will retry +if either the HTTP request or HTTP response doesn't complete successfully, leading to an error from curl, the lower-level library that httr2 uses to -perform HTTP request. This occurs, for example, if your wifi is down. +perform HTTP requests. This occurs, for example, if your Wi-Fi is down. +\subsection{Delay}{ It's a bad idea to immediately retry a request, so \code{req_perform()} will wait a little before trying again: \itemize{ \item If the response contains the \code{Retry-After} header, httr2 will wait the amount of time it specifies. If the API you are wrapping conveys this -information with a different header (or other property of the response) -you can override the default behaviour with \code{retry_after}. +information with a different header (or other property of the response), +you can override the default behavior with \code{retry_after}. \item Otherwise, httr2 will use "truncated exponential backoff with full -jitter", i.e. it will wait a random amount of time between one second and -\code{2 ^ tries} seconds, capped to at most 60 seconds. In other words, it +jitter", i.e., it will wait a random amount of time between one second and +\code{2 ^ tries} seconds, capped at a maximum of 60 seconds. In other words, it waits \code{runif(1, 1, 2)} seconds after the first failure, \code{runif(1, 1, 4)} after the second, \code{runif(1, 1, 8)} after the third, and so on. If you'd prefer a different strategy, you can override the default with \code{backoff}. } } +} \examples{ # google APIs assume that a 500 is also a transient error request("http://google.com") |> @@ -81,7 +81,7 @@ request("http://google.com") |> # use a constant 10s delay after every failure request("http://example.com") |> - req_retry(backoff = ~10) + req_retry(backoff = \(resp) 10) # When rate-limited, GitHub's API returns a 403 with # `X-RateLimit-Remaining: 0` and an Unix time stored in the diff --git a/man/req_url.Rd b/man/req_url.Rd index 87bdc284..ca5b50b6 100644 --- a/man/req_url.Rd +++ b/man/req_url.Rd @@ -34,11 +34,12 @@ containing multiple values: \item \code{"error"}, the default, throws an error. \item \code{"comma"}, separates values with a \verb{,}, e.g. \verb{?x=1,2}. \item \code{"pipe"}, separates values with a \code{|}, e.g. \code{?x=1|2}. -\item \code{"explode"}, turns each element into its own parameter, e.g. \code{?x=1&x=2}. +\item \code{"explode"}, turns each element into its own parameter, e.g. \code{?x=1&x=2} } -If none of these functions work, you can alternatively supply a function -that takes a character vector and returns a string.} +If none of these options work for your needs, you can instead supply a +function that takes a character vector of argument values and returns a +a single string.} } \value{ A modified HTTP \link{request}. diff --git a/tests/testthat/_snaps/req-retries.md b/tests/testthat/_snaps/req-retries.md index a44fc28d..a1d8ea12 100644 --- a/tests/testthat/_snaps/req-retries.md +++ b/tests/testthat/_snaps/req-retries.md @@ -1,3 +1,10 @@ +# has useful default (with message) + + Code + req <- req_retry(req) + Message + Setting `max_tries = 2`. + # useful message if `after` wrong Code @@ -9,17 +16,17 @@ # validates its inputs Code - req_retry(req, max_tries = 1) + req_retry(req, max_tries = 0) Condition Error in `req_retry()`: - ! `max_tries` must be a whole number larger than or equal to 2 or `NULL`, not the number 1. + ! `max_tries` must be a whole number larger than or equal to 1 or `NULL`, not the number 0. Code - req_retry(req, max_seconds = "x") + req_retry(req, max_tries = 2, max_seconds = "x") Condition Error in `req_retry()`: ! `max_seconds` must be a whole number or `NULL`, not the string "x". Code - req_retry(req, retry_on_failure = "x") + req_retry(req, max_tries = 2, retry_on_failure = "x") Condition Error in `req_retry()`: ! `retry_on_failure` must be `TRUE` or `FALSE`, not the string "x". diff --git a/tests/testthat/test-oauth-flow.R b/tests/testthat/test-oauth-flow.R index 0bce31e4..a3222f5b 100644 --- a/tests/testthat/test-oauth-flow.R +++ b/tests/testthat/test-oauth-flow.R @@ -21,6 +21,24 @@ test_that("userful errors if response isn't parseable", { }) }) +test_that("can inspect the original response if response isn't parseable", { + resp1 <- response(headers = list(`content-type` = "text/plain")) + resp2 <- response_json(body = list()) + + tryCatch( + oauth_flow_parse(resp1, "test"), + httr2_oauth_parse = function(cnd) { + expect_equal(cnd$resp, resp1) + } + ) + tryCatch( + oauth_flow_parse(resp2, "test"), + httr2_oauth_parse = function(cnd) { + expect_equal(cnd$resp, resp2) + } + ) +}) + test_that("returns body if known good structure", { resp <- response_json(body = list(access_token = "10")) expect_equal(oauth_flow_parse(resp, "test"), list(access_token = "10")) diff --git a/tests/testthat/test-req-retries.R b/tests/testthat/test-req-retries.R index c59b3dca..e3918848 100644 --- a/tests/testthat/test-req-retries.R +++ b/tests/testthat/test-req-retries.R @@ -1,3 +1,10 @@ +test_that("has useful default (with message)", { + req <- request_test() + expect_snapshot(req <- req_retry(req)) + expect_equal(retry_max_tries(req), 2) + expect_equal(retry_max_seconds(req), Inf) +}) + test_that("can set define maximum retries", { req <- request_test() expect_equal(retry_max_tries(req), 1) @@ -70,9 +77,9 @@ test_that("validates its inputs", { req <- new_request("http://example.com") expect_snapshot(error = TRUE, { - req_retry(req, max_tries = 1) - req_retry(req, max_seconds = "x") - req_retry(req, retry_on_failure = "x") + req_retry(req, max_tries = 0) + req_retry(req, max_tries = 2, max_seconds = "x") + req_retry(req, max_tries = 2, retry_on_failure = "x") }) })