Skip to content

Commit 5524237

Browse files
hadleymgirlich
andauthored
Implement new iteration strategy (#353)
New `req_perform_iteratively()` takes the callback and returns a list of responses. Paired with `iterate_with_offset()` and friends to do the iteration, and `resps_combine()` and friends to work with the results. Fixes #341. Fixes #298. Co-authored-by: Maximilian Girlich <[email protected]>
1 parent c73661e commit 5524237

21 files changed

+705
-1052
lines changed

NAMESPACE

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@ export(curl_help)
1717
export(curl_translate)
1818
export(example_github_client)
1919
export(example_url)
20-
export(iterate_next_request)
20+
export(iterate_with_cursor)
21+
export(iterate_with_link_url)
22+
export(iterate_with_offset)
2123
export(jwt_claim)
2224
export(jwt_encode_hmac)
2325
export(jwt_encode_sig)
@@ -68,11 +70,6 @@ export(req_oauth_device)
6870
export(req_oauth_password)
6971
export(req_oauth_refresh)
7072
export(req_options)
71-
export(req_paginate)
72-
export(req_paginate_next_url)
73-
export(req_paginate_offset)
74-
export(req_paginate_page_index)
75-
export(req_paginate_token)
7673
export(req_perform)
7774
export(req_perform_iteratively)
7875
export(req_perform_parallel)
@@ -117,6 +114,9 @@ export(resp_url_queries)
117114
export(resp_url_query)
118115
export(response)
119116
export(response_json)
117+
export(resps_combine)
118+
export(resps_errors)
119+
export(resps_responses)
120120
export(secret_decrypt)
121121
export(secret_decrypt_file)
122122
export(secret_encrypt)

R/iterate-helpers.R

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
#' Iteration helpers
2+
#'
3+
#' @description
4+
#' These functions are intended for use with the `next_req` argument to
5+
#' [req_perform_iteratively()]. Each implements iteration for a common
6+
#' pagination pattern:
7+
#'
8+
#' * `iterate_with_offset()` increments a query parameter, e.g. `?page=1`,
9+
#' `?page=2`, or `?offset=1`, `offset=21`.
10+
#' * `iterate_with_cursor()` updates a query parameter with the value of a
11+
#' cursor found somewhere in the response.
12+
#' * `iterate_with_link_url()` follows the url found in the `Link` header.
13+
#' See `resp_link_url()` for more details.
14+
#'
15+
#' @param param_name Name of query parameter.
16+
#' @param start Starting value.
17+
#' @param offset Offset for each page.
18+
#' @param resp_complete A callback function that takes a response (`resp`)
19+
#' and returns `TRUE` if there are no further pages.
20+
#' @param resp_pages A callback function that takes a response (`resp`) and
21+
#' returns the total number of pages, or `NULL` if unknown. It will only
22+
#' be called once.
23+
#' @export
24+
#' @examples
25+
#' req <- request(example_url()) |>
26+
#' req_url_path("/iris") |>
27+
#' req_throttle(10) |>
28+
#' req_url_query(limit = 50)
29+
#'
30+
#' # If you don't know the total number of pages in advance, you can
31+
#' # provide a `resp_complete()` callback
32+
#' is_complete <- function(resp) {
33+
#' length(resp_body_json(resp)$data) == 0
34+
#' }
35+
#' resps <- req_perform_iteratively(
36+
#' req,
37+
#' next_req = iterate_with_offset("page_index", resp_complete = is_complete),
38+
#' max_reqs = Inf
39+
#' )
40+
#'
41+
#' \dontrun{
42+
#' # Alternatively, if the response returns the total number of pages (or you
43+
#' # can easily calculate it), you can use the `resp_pages()` callback which
44+
#' # will generate a better progress bar.
45+
#'
46+
#' resps <- req_perform_iteratively(
47+
#' req %>% req_url_query(limit = 1),
48+
#' next_req = iterate_with_offset(
49+
#' "page_index",
50+
#' resp_pages = function(resp) resp_body_json(resp)$pages
51+
#' ),
52+
#' max_reqs = Inf)
53+
#' }
54+
#'
55+
iterate_with_offset <- function(param_name,
56+
start = 1,
57+
offset = 1,
58+
resp_pages = NULL,
59+
resp_complete = NULL) {
60+
check_string(param_name)
61+
check_number_whole(start)
62+
check_number_whole(offset, min = 1)
63+
check_function2(resp_pages, args = "resp", allow_null = TRUE)
64+
check_function2(resp_complete, args = "resp", allow_null = TRUE)
65+
resp_complete <- resp_complete %||% function(resp) FALSE
66+
67+
known_total <- FALSE
68+
i <- start # assume already fetched
69+
70+
function(resp, req) {
71+
if (!is.null(resp_pages) && !known_total) {
72+
n <- resp_pages(resp)
73+
if (!is.null(n)) {
74+
known_total <<- TRUE
75+
signal("", class = "httr2_total_pages", n = n)
76+
}
77+
}
78+
79+
if (!isTRUE(resp_complete(resp))) {
80+
i <<- i + offset
81+
req %>% req_url_query(!!param_name := i)
82+
}
83+
}
84+
}
85+
86+
#' @rdname iterate_with_offset
87+
#' @export
88+
#' @param resp_param_value A callback function that takes a response (`resp`)
89+
#' and returns the next cursor value. Return `NULL` if there are no further
90+
#' pages.
91+
iterate_with_cursor <- function(param_name, resp_param_value) {
92+
check_string(param_name)
93+
check_function2(resp_param_value, args = "resp")
94+
95+
function(resp, req) {
96+
value <- resp_param_value(resp)
97+
if (!is.null(value)) {
98+
req %>% req_url_query(!!param_name := value)
99+
}
100+
}
101+
}
102+
103+
#' @rdname iterate_with_offset
104+
#' @export
105+
#' @param rel The "link relation type" to use to retrieve the next page.
106+
iterate_with_link_url <- function(rel = "next") {
107+
check_string(rel)
108+
109+
function(resp, req) {
110+
url <- resp_link_url(resp, rel)
111+
if (!is.null(url)) {
112+
req %>% req_url(url)
113+
}
114+
}
115+
}

R/iterate-responses.R

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#' Tools for working with lists of responses
2+
#'
3+
#' * `resps_combine()` combines the data from each response into a single
4+
#' vector.
5+
#' * `resps_response()` returns all successful responses.
6+
#' * `resps_error()` returns all errors.
7+
#'
8+
#' @export
9+
#' @param resps A list of responses (possibly including errors).
10+
#' @param resp_data A function that takes a response (`resp`) and
11+
#' returns its data as a vector or data frame.
12+
resps_combine <- function(resps, resp_data) {
13+
check_installed("vctrs")
14+
15+
check_function2(resp_data, "resp")
16+
vctrs::list_unchop(lapply(resps, resp_data))
17+
}
18+
resps_is_resp <- function(resps) {
19+
vapply(resps, is_response, logical(1))
20+
}
21+
22+
#' @export
23+
#' @rdname resps_combine
24+
resps_responses <- function(resps) {
25+
resps[resps_is_resp(resps)]
26+
}
27+
#' @export
28+
#' @rdname resps_combine
29+
resps_errors <- function(resps) {
30+
resps[!resps_is_resp(resps)]
31+
}

R/iterate.R

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
#' Perform requests iteratively, generating new requests from previous responses
2+
#'
3+
#' @description
4+
#' `r lifecycle::badge("experimental")`
5+
#'
6+
#' `req_perform_iteratively()` iteratively generates and performs requests,
7+
#' using a callback function, `next_req`, to define the next request based on
8+
#' the current request and response. You will probably want to it pair with an
9+
#' [iteration helper][iterate_with_offset] and use a
10+
#' [multi-response handler][resps_combine] to process the result.
11+
#'
12+
#' @inheritParams req_perform
13+
#' @param next_req A function that takes the previous response (`resp`) and
14+
#' request (`req`) and returns a [request] for the next page or `NULL` if
15+
#' the iteration should terminate.
16+
#' @param max_reqs The maximum number of requests to perform. Use `Inf` to
17+
#' perform all requests until `next_req()` returns `NULL`.
18+
#' @param progress Display a progress bar? Use `TRUE` to turn on a basic
19+
#' progress bar, use a string to give it a name, or see [progress_bars] to
20+
#' customise it in other ways.
21+
#' @param path Optionally, path to save the body of request. This should be
22+
#' a glue string that uses `{i}` to distinguish different requests.
23+
#' Useful for large responses because it avoids storing the response in
24+
#' memory.
25+
#' @return A list of [response()]s.
26+
#' @export
27+
#' @examples
28+
#' req <- request(example_url()) |>
29+
#' req_url_path("/iris") |>
30+
#' req_throttle(10) |>
31+
#' req_url_query(limit = 5)
32+
#'
33+
#' resps <- req_perform_iteratively(req, iterate_with_offset("page_index"))
34+
#'
35+
#' resps |> resps_combine(function(resp) {
36+
#' data <- resp_body_json(resp)$data
37+
#' data.frame(
38+
#' Sepal.Length = sapply(data, `[[`, "Sepal.Length"),
39+
#' Sepal.Width = sapply(data, `[[`, "Sepal.Width"),
40+
#' Petal.Length = sapply(data, `[[`, "Petal.Length"),
41+
#' Petal.Width = sapply(data, `[[`, "Petal.Width"),
42+
#' Species = sapply(data, `[[`, "Species")
43+
#' )
44+
#' })
45+
req_perform_iteratively <- function(req,
46+
next_req,
47+
path = NULL,
48+
max_reqs = 20,
49+
progress = TRUE) {
50+
check_request(req)
51+
check_function2(next_req, args = c("resp", "req"))
52+
check_number_whole(max_reqs, allow_infinite = TRUE, min = 1)
53+
check_string(path, allow_empty = FALSE, allow_null = TRUE)
54+
55+
get_path <- function(i) {
56+
if (is.null(path)) {
57+
NULL
58+
} else {
59+
glue::glue(path)
60+
}
61+
}
62+
63+
progress <- create_progress_bar(
64+
total = max_reqs,
65+
name = "Iterating",
66+
config = progress
67+
)
68+
69+
resps <- vector("list", length = if (is.finite(max_reqs)) max_reqs else 100)
70+
i <- 1L
71+
72+
tryCatch({
73+
repeat {
74+
resps[[i]] <- resp <- req_perform(req, path = get_path(i))
75+
progress$update()
76+
77+
withCallingHandlers(
78+
{
79+
req <- next_req(resp = resp, req = req)
80+
},
81+
httr2_total_pages = function(cnd) {
82+
# Allow next_req() to shrink the number of pages remaining
83+
# Most important in max_req = Inf case
84+
if (cnd$n < max_reqs) {
85+
max_reqs <<- cnd$n
86+
progress$update(total = max_reqs, inc = 0)
87+
}
88+
}
89+
)
90+
91+
if (is.null(req) || i >= max_reqs) {
92+
break
93+
}
94+
check_request(req, arg = "next_req()")
95+
96+
i <- i + 1L
97+
if (i > length(resps)) {
98+
signal("", class = "httr2:::doubled")
99+
length(resps) <- length(resps) * 2
100+
}
101+
}
102+
}, interrupt = function(cnd) {
103+
# interrupt might occur after i was incremented
104+
if (is.null(resps[[i]])) {
105+
i <<- i - 1
106+
}
107+
cli::cli_alert_warning(
108+
"Terminating iteration; returning {i} response{?s}."
109+
)
110+
})
111+
progress$done()
112+
113+
if (i < length(resps)) {
114+
resps <- resps[seq_len(i)]
115+
}
116+
117+
resps
118+
}

0 commit comments

Comments
 (0)