From 6b085735f37a34130c5c27213faec78825ebc86e Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Fri, 20 Oct 2023 08:43:38 -0500 Subject: [PATCH] Cache the result of `resp_body_json()` (#351) Part of #341. --- NEWS.md | 3 +++ R/resp-body.R | 25 ++++++++++++++++++++++--- R/resp.R | 3 ++- man/resp_body_raw.Rd | 3 ++- tests/testthat/test-resp-body.R | 27 +++++++++++++++++++++++++++ 5 files changed, 56 insertions(+), 5 deletions(-) diff --git a/NEWS.md b/NEWS.md index 2c25bf04..0eb8557c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,8 @@ # httr2 (development version) +* `resp_body_json()` and `resp_body_xml()` now caches the parsed values so + that you can use them repeatedly without worrying about the performance cost. + * `req_url_query()` gains a `.multi` parameter that controls what happens when you supply multiple values in a vector. The default will continue to error but you can use `.multi = "comma"` to separate with commas, `"pipe"` to diff --git a/R/resp-body.R b/R/resp-body.R index 76df730f..707680b7 100644 --- a/R/resp-body.R +++ b/R/resp-body.R @@ -10,7 +10,8 @@ #' #' `resp_body_json()` and `resp_body_xml()` check that the content-type header #' is correct; if the server returns an incorrect type you can suppress the -#' check with `check_type = FALSE`. +#' check with `check_type = FALSE`. These two functions also cache the parsed +#' object so the second and subsequent calls are low-cost. #' #' @param resp A response object. #' @returns @@ -77,6 +78,11 @@ resp_body_string <- function(resp, encoding = NULL) { #' @rdname resp_body_raw #' @export resp_body_json <- function(resp, check_type = TRUE, simplifyVector = FALSE, ...) { + key <- body_cache_key("json", simplifyVector = simplifyVector, ...) + if (env_has(resp$cache, key)) { + return(resp$cache[[key]]) + } + check_response(resp) check_installed("jsonlite") resp_check_content_type( @@ -87,7 +93,8 @@ resp_body_json <- function(resp, check_type = TRUE, simplifyVector = FALSE, ...) ) text <- resp_body_string(resp, "UTF-8") - jsonlite::fromJSON(text, simplifyVector = simplifyVector, ...) + resp$cache[[key]] <- jsonlite::fromJSON(text, simplifyVector = simplifyVector, ...) + resp$cache[[key]] } #' @rdname resp_body_raw @@ -107,6 +114,12 @@ resp_body_html <- function(resp, check_type = TRUE, ...) { #' @rdname resp_body_raw #' @export resp_body_xml <- function(resp, check_type = TRUE, ...) { + key <- body_cache_key("xml", ...) + if (env_has(resp$cache, key)) { + return(resp$cache[[key]]) + } + + check_response(resp) check_installed("xml2") resp_check_content_type( @@ -116,5 +129,11 @@ resp_body_xml <- function(resp, check_type = TRUE, ...) { check_type = check_type ) - xml2::read_xml(resp$body, ...) + resp$cache[[key]] <- xml2::read_xml(resp$body, ...) + resp$cache[[key]] +} + +body_cache_key <- function(prefix, ...) { + key <- hash(list(...)) + paste0(prefix, "-", substr(key, 1, 10)) } diff --git a/R/resp.R b/R/resp.R index b637213d..9d99ec5e 100644 --- a/R/resp.R +++ b/R/resp.R @@ -90,7 +90,8 @@ new_response <- function(method, url = url, status_code = status_code, headers = headers, - body = body + body = body, + cache = new_environment() ), class = "httr2_response" ) diff --git a/man/resp_body_raw.Rd b/man/resp_body_raw.Rd index 92b8212e..e6ed657c 100644 --- a/man/resp_body_raw.Rd +++ b/man/resp_body_raw.Rd @@ -58,7 +58,8 @@ booleans, numbers, and strings) be caused to atomic vectors?} \code{resp_body_json()} and \code{resp_body_xml()} check that the content-type header is correct; if the server returns an incorrect type you can suppress the -check with \code{check_type = FALSE}. +check with \code{check_type = FALSE}. These two functions also cache the parsed +object so the second and subsequent calls are low-cost. } \examples{ resp <- request("https://httr2.r-lib.org") |> req_perform() diff --git a/tests/testthat/test-resp-body.R b/tests/testthat/test-resp-body.R index a34c82b9..6a9f726d 100644 --- a/tests/testthat/test-resp-body.R +++ b/tests/testthat/test-resp-body.R @@ -30,6 +30,33 @@ test_that("can retrieve parsed body", { expect_s3_class(resp_body_xml(resp), "xml_document") }) +test_that("resp_body_json stores parsed result", { + resp <- request_test("/json") %>% req_perform() + json1 <- resp_body_json(resp) + # check it's saved + expect_length(resp$cache, 1) + + # check it's not recomputed + json2 <- resp_body_json(resp) + expect_true(is_reference(json2, json1)) + + # check the arguments matter + json3 <- resp_body_json(resp, simplifyVector = TRUE) + expect_false(is_reference(json3, json1)) + expect_length(resp$cache, 2) +}) + +test_that("resp_body_xml stores parsed result", { + resp <- request_test("/xml") %>% req_perform() + xml1 <- resp_body_xml(resp) + # check it's saved + expect_length(resp$cache, 1) + + # check it's not recomputed + xml2 <- resp_body_xml(resp) + expect_true(is_reference(xml2, xml1)) +}) + test_that("content types are checked", { expect_snapshot(error = TRUE, { request_test("/xml") %>% req_perform() %>% resp_body_json()