From fba58d759169a685f4cef86acacd71d2e51b164c Mon Sep 17 00:00:00 2001 From: Sebastian Funk <sebastian.funk@lshtm.ac.uk> Date: Tue, 28 Jan 2025 10:23:07 +0000 Subject: [PATCH 01/13] detect initial accumulation --- R/preprocessing.R | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index a7aa7ac71..decff2591 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -36,7 +36,10 @@ ##' have a sufficient number of modelled observations accumulated onto the ##' first data point. For modelling weekly incidence data this should be set ##' to 7. If accumulating and the first data point is not NA and this is -##' argument is not set, then that data point will be removed with a warning. +##' argument is not set, then if all dates in the data have the same gap this +##' will be taken as initial accumulation and a warning given to inform the +##' user. If not all gaps are the same the first data point will be removed +##' with a warning. ##' @param obs_column Character (default: "confirm"). If given, only the column ##' specified here will be used for checking missingness. This is useful if ##' using a data set that has multiple columns of hwich one of them @@ -74,9 +77,6 @@ fill_missing <- function(data, assert_character(missing_obs) assert_character(obs_column) assert_character(by, null.ok = TRUE) - if (!missing(initial_accumulate)) { - assert_integerish(initial_accumulate, lower = 1) - } assert_names( colnames(data), must.include = c("date", by, obs_column), @@ -84,21 +84,41 @@ fill_missing <- function(data, ) assert_date(data$date, any.missing = FALSE) - data <- as.data.table(data) - missing_dates <- arg_match(missing_dates) missing_obs <- arg_match(missing_obs) + data <- as.data.table(data) + + if (missing(initial_accumulate)) { + ## detect frequency of accumulation if possible + diffs <- data[, list(diff = unique(diff(date))), by = by] + unique_diff <- unique(diffs$diff) + if (length(unique_diff) == 1 && unique_diff > 1) { + cli_warn( + c( + "!" = + "Detected fixed accumulation frequency of {unique_diff}.", + "i" = + "This will be used for initial accumulation. Use + {.var initial_accumulate} to change this behaviour. + To silence this warning, + set {.var initial_accumulate} to {unique_diff}." + ) + ) + initial_accumulate <- unique_diff + } + } else { + assert_integerish(initial_accumulate, lower = 1) + } + ## first, processing missing dates initial_add <- ifelse(missing(initial_accumulate), 1, initial_accumulate) cols <- list( date = seq(min(data$date) - initial_add + 1, max(data$date), by = "day") ) - if (!is.null(by)) { - for (by_col in by) { - cols[[by_col]] <- unique(data[[by_col]]) - } + for (by_col in by) { + cols[[by_col]] <- unique(data[[by_col]]) } complete <- do.call(CJ, cols) From b255b514a12a28f9af298a28e419c29dc0b72fe3 Mon Sep 17 00:00:00 2001 From: Sebastian Funk <sebastian.funk@lshtm.ac.uk> Date: Tue, 28 Jan 2025 10:23:19 +0000 Subject: [PATCH 02/13] update docs --- man/create_forecast_data.Rd | 2 +- man/create_stan_data.Rd | 2 +- man/epinow.Rd | 2 +- man/estimate_infections.Rd | 2 +- man/fill_missing.Rd | 5 ++++- man/forecast_opts.Rd | 1 - man/regional_epinow.Rd | 2 +- 7 files changed, 9 insertions(+), 7 deletions(-) diff --git a/man/create_forecast_data.Rd b/man/create_forecast_data.Rd index 3ca7b0ccb..38a98a37e 100644 --- a/man/create_forecast_data.Rd +++ b/man/create_forecast_data.Rd @@ -9,7 +9,7 @@ create_forecast_data(forecast = forecast_opts(), data) \arguments{ \item{forecast}{A list of options as generated by \code{\link[=forecast_opts]{forecast_opts()}} defining the forecast opitions. Defaults to \code{\link[=forecast_opts]{forecast_opts()}}. If NULL then no -forecasting will be one.} +forecasting will be done.} \item{data}{A \verb{<data.frame>} of confirmed cases (confirm) by date (date). \code{confirm} must be numeric and \code{date} must be in date format. Optionally diff --git a/man/create_stan_data.Rd b/man/create_stan_data.Rd index e0f05ff26..e5680e01a 100644 --- a/man/create_stan_data.Rd +++ b/man/create_stan_data.Rd @@ -48,7 +48,7 @@ define the back calculation. Defaults to \code{\link[=backcalc_opts]{backcalc_op \item{forecast}{A list of options as generated by \code{\link[=forecast_opts]{forecast_opts()}} defining the forecast opitions. Defaults to \code{\link[=forecast_opts]{forecast_opts()}}. If NULL then no -forecasting will be one.} +forecasting will be done.} } \value{ A list of stan data diff --git a/man/epinow.Rd b/man/epinow.Rd index 8a62820a5..008759969 100644 --- a/man/epinow.Rd +++ b/man/epinow.Rd @@ -73,7 +73,7 @@ observation model. Defaults to \code{\link[=obs_opts]{obs_opts()}}.} \item{forecast}{A list of options as generated by \code{\link[=forecast_opts]{forecast_opts()}} defining the forecast opitions. Defaults to \code{\link[=forecast_opts]{forecast_opts()}}. If NULL then no -forecasting will be one.} +forecasting will be done.} \item{stan}{A list of stan options as generated by \code{\link[=stan_opts]{stan_opts()}}. Defaults to \code{\link[=stan_opts]{stan_opts()}}. Can be used to override \code{data}, \code{init}, and \code{verbose} diff --git a/man/estimate_infections.Rd b/man/estimate_infections.Rd index 992b65d46..4f5a97dde 100644 --- a/man/estimate_infections.Rd +++ b/man/estimate_infections.Rd @@ -69,7 +69,7 @@ observation model. Defaults to \code{\link[=obs_opts]{obs_opts()}}.} \item{forecast}{A list of options as generated by \code{\link[=forecast_opts]{forecast_opts()}} defining the forecast opitions. Defaults to \code{\link[=forecast_opts]{forecast_opts()}}. If NULL then no -forecasting will be one.} +forecasting will be done.} \item{stan}{A list of stan options as generated by \code{\link[=stan_opts]{stan_opts()}}. Defaults to \code{\link[=stan_opts]{stan_opts()}}. Can be used to override \code{data}, \code{init}, and \code{verbose} diff --git a/man/fill_missing.Rd b/man/fill_missing.Rd index 9e37e0b02..ac9a2582d 100644 --- a/man/fill_missing.Rd +++ b/man/fill_missing.Rd @@ -44,7 +44,10 @@ then dates are added to the beginning of the data set to get be able to have a sufficient number of modelled observations accumulated onto the first data point. For modelling weekly incidence data this should be set to 7. If accumulating and the first data point is not NA and this is -argument is not set, then that data point will be removed with a warning.} +argument is not set, then if all dates in the data have the same gap this +will be taken as initial accumulation and a warning given to inform the +user. If not all gaps are the same the first data point will be removed +with a warning.} \item{obs_column}{Character (default: "confirm"). If given, only the column specified here will be used for checking missingness. This is useful if diff --git a/man/forecast_opts.Rd b/man/forecast_opts.Rd index 2bd02766b..cadd823aa 100644 --- a/man/forecast_opts.Rd +++ b/man/forecast_opts.Rd @@ -17,7 +17,6 @@ forecasts unless set explicitly here.} } \value{ A \verb{<forecast_opts>} object of forecast setting. -rstan functions. } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} diff --git a/man/regional_epinow.Rd b/man/regional_epinow.Rd index efaa0ee6b..1dfd319df 100644 --- a/man/regional_epinow.Rd +++ b/man/regional_epinow.Rd @@ -66,7 +66,7 @@ observation model. Defaults to \code{\link[=obs_opts]{obs_opts()}}.} \item{forecast}{A list of options as generated by \code{\link[=forecast_opts]{forecast_opts()}} defining the forecast opitions. Defaults to \code{\link[=forecast_opts]{forecast_opts()}}. If NULL then no -forecasting will be one.} +forecasting will be done.} \item{stan}{A list of stan options as generated by \code{\link[=stan_opts]{stan_opts()}}. Defaults to \code{\link[=stan_opts]{stan_opts()}}. Can be used to override \code{data}, \code{init}, and \code{verbose} From b1fc9438e0007db78409e8e3b401f4572f047e35 Mon Sep 17 00:00:00 2001 From: Sebastian Funk <sebastian.funk@lshtm.ac.uk> Date: Tue, 28 Jan 2025 10:23:26 +0000 Subject: [PATCH 03/13] add test --- tests/testthat/test-preprocessing.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/testthat/test-preprocessing.R b/tests/testthat/test-preprocessing.R index 02b1621c9..fe9b8f9da 100644 --- a/tests/testthat/test-preprocessing.R +++ b/tests/testthat/test-preprocessing.R @@ -28,6 +28,11 @@ test_that("fill_missing works with by columns", { }) test_that("fill_missing warns about initial data points", { + expect_warning( + fill_missing(cases, missing_dates = "accumulate"), + "Detected fixed accumulation frequency" + ) + cases[1, date := date + 1] expect_warning( fill_missing(cases, missing_dates = "accumulate"), "Initial data point not marked as accumulated" From 34f39f0ab6d6fceb114cf1fc1d2585d800ab4722 Mon Sep 17 00:00:00 2001 From: Sebastian Funk <sebastian.funk@lshtm.ac.uk> Date: Tue, 28 Jan 2025 10:24:24 +0000 Subject: [PATCH 04/13] add news item --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 4d5e1206d..7f8734f30 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,7 +2,7 @@ ## Model changes -- The models now support more complex patterns of aggregating reported cases by accumulating them over multiple time points, as well as mixtures of accumulation and missingness via the new `fill_missing()` function and a logical `accumulate` column that can be supplied with the data. By @sbfnk in #851 and reviewed by @seabbs and @jamesmbaazam.. +- The models now support more complex patterns of aggregating reported cases by accumulating them over multiple time points, as well as mixtures of accumulation and missingness via the new `fill_missing()` function and a logical `accumulate` column that can be supplied with the data. Any fixed accumulation frequencies detected when using `fill_missing()`. By @sbfnk in #851 and # and reviewed by @seabbs and @jamesmbaazam. ```r # Deprecated From 84f2be60f730ffac7dd8e8b539d46b7afa653def Mon Sep 17 00:00:00 2001 From: Sebastian Funk <sebastian.funk@lshtm.ac.uk> Date: Tue, 28 Jan 2025 13:11:37 +0000 Subject: [PATCH 05/13] use inform not warn --- R/preprocessing.R | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index decff2591..2b0014233 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -94,15 +94,13 @@ fill_missing <- function(data, diffs <- data[, list(diff = unique(diff(date))), by = by] unique_diff <- unique(diffs$diff) if (length(unique_diff) == 1 && unique_diff > 1) { - cli_warn( + cli_inform( c( "!" = - "Detected fixed accumulation frequency of {unique_diff}.", - "i" = - "This will be used for initial accumulation. Use - {.var initial_accumulate} to change this behaviour. - To silence this warning, - set {.var initial_accumulate} to {unique_diff}." + "Detected fixed accumulation frequency of {unique_diff}. This will + be used for initial accumulation. Use {.var initial_accumulate} to + change this behaviour. To silence this warning, set + {.var initial_accumulate} to {unique_diff}." ) ) initial_accumulate <- unique_diff From a0b5cabd9bbc31c2409c8ead86c8f002d36e6502 Mon Sep 17 00:00:00 2001 From: Sebastian Funk <sebastian.funk@lshtm.ac.uk> Date: Tue, 28 Jan 2025 14:20:29 +0000 Subject: [PATCH 06/13] update test --- tests/testthat/test-preprocessing.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-preprocessing.R b/tests/testthat/test-preprocessing.R index fe9b8f9da..88e6178a4 100644 --- a/tests/testthat/test-preprocessing.R +++ b/tests/testthat/test-preprocessing.R @@ -28,7 +28,7 @@ test_that("fill_missing works with by columns", { }) test_that("fill_missing warns about initial data points", { - expect_warning( + expect_message( fill_missing(cases, missing_dates = "accumulate"), "Detected fixed accumulation frequency" ) From abd394d081d60a9a5a1ac94912139952889d2aae Mon Sep 17 00:00:00 2001 From: Sebastian Funk <sebastian.funk@lshtm.ac.uk> Date: Tue, 28 Jan 2025 16:32:11 +0000 Subject: [PATCH 07/13] insert PR number --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 7f8734f30..301d21f54 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,7 +2,7 @@ ## Model changes -- The models now support more complex patterns of aggregating reported cases by accumulating them over multiple time points, as well as mixtures of accumulation and missingness via the new `fill_missing()` function and a logical `accumulate` column that can be supplied with the data. Any fixed accumulation frequencies detected when using `fill_missing()`. By @sbfnk in #851 and # and reviewed by @seabbs and @jamesmbaazam. +- The models now support more complex patterns of aggregating reported cases by accumulating them over multiple time points, as well as mixtures of accumulation and missingness via the new `fill_missing()` function and a logical `accumulate` column that can be supplied with the data. Any fixed accumulation frequencies detected when using `fill_missing()`. By @sbfnk in #851 and #933 and reviewed by @seabbs and @jamesmbaazam. ```r # Deprecated From bc4c74ec3fbc8e6b70c0c58472b6591e92f31246 Mon Sep 17 00:00:00 2001 From: Sebastian Funk <sebastian.funk@lshtm.ac.uk> Date: Thu, 30 Jan 2025 15:33:34 +0000 Subject: [PATCH 08/13] rephrase news --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index b77087fda..1c2356121 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,7 +2,7 @@ ## Model changes -- The models now support more complex patterns of aggregating reported cases by accumulating them over multiple time points, as well as mixtures of accumulation and missingness via the new `fill_missing()` function and a logical `accumulate` column that can be supplied with the data. Any fixed accumulation frequencies detected when using `fill_missing()`. By @sbfnk in #851 and #933 and reviewed by @seabbs and @jamesmbaazam. +- The models now support more complex patterns of aggregating reported cases by accumulating them over multiple time points, as well as mixtures of accumulation and missingness via the new `fill_missing()` function and a logical `accumulate` column that can be supplied with the data. If the accumulation frequency is fixed in the data this s detected when using `fill_missing()`. By @sbfnk in #851 and #933 and reviewed by @seabbs and @jamesmbaazam. ```r # Deprecated From 1b97cbc307acba6fef77dd7cf6fc801ec346e4e9 Mon Sep 17 00:00:00 2001 From: Sebastian Funk <sebastian.funk@lshtm.ac.uk> Date: Thu, 30 Jan 2025 15:34:36 +0000 Subject: [PATCH 09/13] improve variable names as suggested by @jamesmbaazam Co-authored-by: James Azam <james.azam@lshtm.ac.uk> --- R/preprocessing.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index 2b0014233..fc653788c 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -92,18 +92,18 @@ fill_missing <- function(data, if (missing(initial_accumulate)) { ## detect frequency of accumulation if possible diffs <- data[, list(diff = unique(diff(date))), by = by] - unique_diff <- unique(diffs$diff) - if (length(unique_diff) == 1 && unique_diff > 1) { + unique_patterns <- unique(missing_date_patterns$pattern) + if (length(unique_patterns) == 1 && unique_patterns > 1) { cli_inform( c( "!" = - "Detected fixed accumulation frequency of {unique_diff}. This will + "Detected fixed accumulation frequency of {unique_patterns}. This will be used for initial accumulation. Use {.var initial_accumulate} to change this behaviour. To silence this warning, set - {.var initial_accumulate} to {unique_diff}." + {.var initial_accumulate} to {unique_patterns}." ) ) - initial_accumulate <- unique_diff + initial_accumulate <- unique_patterns } } else { assert_integerish(initial_accumulate, lower = 1) From 58285a3dcee9dfe21a829110543ee9aa6659bfad Mon Sep 17 00:00:00 2001 From: Sebastian Funk <sebastian.funk@lshtm.ac.uk> Date: Thu, 30 Jan 2025 15:35:59 +0000 Subject: [PATCH 10/13] add i --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 1c2356121..8f6904405 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,7 +2,7 @@ ## Model changes -- The models now support more complex patterns of aggregating reported cases by accumulating them over multiple time points, as well as mixtures of accumulation and missingness via the new `fill_missing()` function and a logical `accumulate` column that can be supplied with the data. If the accumulation frequency is fixed in the data this s detected when using `fill_missing()`. By @sbfnk in #851 and #933 and reviewed by @seabbs and @jamesmbaazam. +- The models now support more complex patterns of aggregating reported cases by accumulating them over multiple time points, as well as mixtures of accumulation and missingness via the new `fill_missing()` function and a logical `accumulate` column that can be supplied with the data. If the accumulation frequency is fixed in the data this is detected when using `fill_missing()`. By @sbfnk in #851 and #933 and reviewed by @seabbs and @jamesmbaazam. ```r # Deprecated From f23ae18c7fda0fc7543d9ddd47b139d062be7e18 Mon Sep 17 00:00:00 2001 From: Sebastian Funk <sebastian.funk@lshtm.ac.uk> Date: Thu, 30 Jan 2025 15:46:14 +0000 Subject: [PATCH 11/13] missed one Co-authored-by: James Azam <james.azam@lshtm.ac.uk> --- R/preprocessing.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index fc653788c..201ab7d90 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -91,7 +91,7 @@ fill_missing <- function(data, if (missing(initial_accumulate)) { ## detect frequency of accumulation if possible - diffs <- data[, list(diff = unique(diff(date))), by = by] + missing_date_patterns <- data[, list(pattern = unique(diff(date))), by = by] unique_patterns <- unique(missing_date_patterns$pattern) if (length(unique_patterns) == 1 && unique_patterns > 1) { cli_inform( From 6e93087df9b282b44d8dd30b5b543ceffe6724b9 Mon Sep 17 00:00:00 2001 From: Sebastian Funk <sebastian.funk@lshtm.ac.uk> Date: Thu, 30 Jan 2025 15:59:55 +0000 Subject: [PATCH 12/13] break line --- R/preprocessing.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index 201ab7d90..4a2fd488a 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -97,10 +97,10 @@ fill_missing <- function(data, cli_inform( c( "!" = - "Detected fixed accumulation frequency of {unique_patterns}. This will - be used for initial accumulation. Use {.var initial_accumulate} to - change this behaviour. To silence this warning, set - {.var initial_accumulate} to {unique_patterns}." + "Detected fixed accumulation frequency of {unique_patterns}. + This will be used for initial accumulation. Use + {.var initial_accumulate} to change this behaviour. To silence this + warning, set {.var initial_accumulate} to {unique_patterns}." ) ) initial_accumulate <- unique_patterns From bcbf8a3ce339fe1ae13effd5e366af02b7538150 Mon Sep 17 00:00:00 2001 From: Sebastian Funk <sebastian.funk@lshtm.ac.uk> Date: Fri, 31 Jan 2025 09:33:32 +0000 Subject: [PATCH 13/13] fix tests --- tests/testthat/test-preprocessing.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/testthat/test-preprocessing.R b/tests/testthat/test-preprocessing.R index d1479428e..00d0e5da6 100644 --- a/tests/testthat/test-preprocessing.R +++ b/tests/testthat/test-preprocessing.R @@ -32,22 +32,22 @@ test_that("fill_missing warns about initial data points", { fill_missing(cases, missing_dates = "accumulate"), "Detected fixed accumulation frequency" ) - cases[1, date := date + 1] + shifted <- copy(cases) + shifted[1, date := date + 1] expect_warning( - fill_missing(cases, missing_dates = "accumulate"), + fill_missing(shifted, missing_dates = "accumulate"), "Initial data point not marked as accumulated" ) }) test_that("add_horizon works", { - expect_warning( - fill_missing(cases, missing_dates = "accumulate"), - "Initial data point not marked as accumulated" - ) + expect_equal(nrow(add_horizon(cases, horizon = 7L)), nrow(cases) + 7L) }) test_that("add_horizon identifies gaps correctly", { - filled <- fill_missing(cases, missing_dates = "accumulate", initial_accumulate = 7) + filled <- fill_missing( + cases, missing_dates = "accumulate", initial_accumulate = 7 + ) expect_message( result <- add_horizon(filled, horizon = 7), "Forecasts accumulated every 7 days"