diff --git a/NAMESPACE b/NAMESPACE index 0b13f3e..2341dad 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -23,6 +23,8 @@ export(na_plot) export(na_spline) export(na_weekly_mean) export(na_zero) +export(npcra_iv) +export(npcra_ivm) export(periodogram) export(raw_data) export(read_acttrust) diff --git a/R/find_epoch.R b/R/find_epoch.R index b42173c..cb86dd3 100644 --- a/R/find_epoch.R +++ b/R/find_epoch.R @@ -14,8 +14,8 @@ #' periodicities with the same prevalence above the threshold, `best_match` will #' return just one of those values. #' -#' @param data A [tsibble][tsibble::tsibble()] object with a [`Date`][as.Date()] -#' or [`POSIXt`][as.POSIXct()] vector as index. +#' @param data A [`tsibble`][tsibble::tsibble()] object with a +#' [`Date`][as.Date()] or [`POSIXt`][as.POSIXct()] vector as index. #' @param threshold (optional) a number, from `0` to `1`, indicating the minimum proportion #' that an epoch must have to be considered valid. `threshold = 1` means that #' the regularity of the time series must be strict (i.e., have just 1 diff --git a/R/npcra_iv.R b/R/npcra_iv.R new file mode 100644 index 0000000..2ea9a05 --- /dev/null +++ b/R/npcra_iv.R @@ -0,0 +1,239 @@ +#' Non-Parametric Function IV (Intradaily Variability) +#' +#' @description +#' +#' `r lifecycle::badge("experimental")` +#' +#' Calculates the Intradaily Variability, which identifies the fragmentation of +#' the rest-activity rhythms. +#' +#' This fragmentation can have different results for the same data according +#' to the chosen time interval, that is, a minute by minute calculation brings +#' a more sensitive result than a check in hours, where small changes tend +#' to have less influence on intradaily variability. +#' +#' @details +#' +#' Intradaily variability is a number that is usually between 0 and 2 calculated +#' by dividing the square mean of the first derivative of the data by the +#' population variance (Witting et al., 1990). +#' +#' Although real records typically have an IV value less than 2, +#' it is possible that this number exceeds 2. IVs greater than 2 +#' are quite common in simulations of random values, since there is +#' no way to define a standard for the data and, therefore, will have +#' a high variability. +#' +#' If \eqn{X} represents the activity or other numeric variable passed as +#' parameter `x`, the IV follows the formula below: +#' +#' \deqn{IV = SQM_FD / VAR(X)} +#' +#' Where SQM_FD is the square mean of the first derivative of the data, +#' calculated as below: +#' +#' \deqn{SQM_FD = \sum_{i=2}^{N} (X_i - X_{i-1})^2 / (N-1)} +#' +#' And the population variance VAR(X) is: +#' +#' \deqn{VAR(X) = \sum_{i=1}^{N} (\bar{X} - X_i)^2} +#' +#' Where: +#' +#' \eqn{X_i} is each data point or, more likely, each activity average +#' for the time interval passed by the parameter \code{minutes_interval}; +#' +#' N is the amount of data or the number of intervals that can fit in +#' the data set; +#' +#' \eqn{\bar{X}} is the average of all data or the mean of all average +#' activities by time interval. +#' +#' Usually the activity data used in the calculation are hourly averages of +#' the activity, thus avoiding activity fluctuations in the same period of +#' time. The calculation for IV by hourly averages is also called IV60 +#' (Goncalves et al., 2014), this being the standard minute interval for +#' the method (60 minutes). It is still possible to vary this interval of +#' minutes, which can generate totally different results for the IV that +#' can be analyzed to identify some pattern of the data. +#' +#' Higher values of IV represent a greater fragmentation of the rest - activity +#' rhythm, this is because the calculation has a certain sensitivity to +#' immediate changes between time intervals, such as naps during the day and +#' nighttime awakenings. +#' +#' @param data A [`tsibble`][tsibble::tsibble()] object. +#' @param col A string indicating which column of `data` to use. +#' @param minutes_interval An integer value representing the duration in minutes +#' of the time interval for grouping the data. By default, 60 minutes are +#' considered, this means that the activity will be averaged at hourly +#' intervals. The minimum value is 1 minute, where all points will be +#' considered (so there will be no average in intervals). +#' +#' @return A [`numeric`][base::numeric()] value. +#' +#' @template references_f +#' @family NPCRA functions +#' @export +#' +#' @examples +#' ## Running for 100 random observations +#' +#' first_date <- as.POSIXct('2015-01-01') +#' last_date <- as.POSIXct('2015-01-11') +#' shuffled_timestamp <- sample(seq(first_date, last_date, by = "sec"), 100) +#' timestamp <- sort(shuffled_timestamp) +#' x <- runif(100, 0, 10000) +#' +#' act <- dplyr::tibble(x, timestamp) %>% +#' tsibble::as_tsibble(index = "timestamp") +#' +#' npcra_iv(act, "x", minutes_interval = 120) +npcra_iv <- function(data, col, minutes_interval = 60) { + checkmate::assert_numeric(data[[col]]) + checkmate::assert_posixct(data[[tsibble::index(data)]]) + checkmate::assert_int(minutes_interval) + + x <- data[[col]] + timestamp <- data[[tsibble::index(data)]] + + if (minutes_interval < 1 | minutes_interval > 1440) { + cli::cli_abort(paste0( + "{.strong {cli::col_red('minutes_interval')}} must have ", + "a value between {.strong 1} and {.strong 1440} minutes, ", + "but is {.strong {minutes_interval}}." + )) + } + + if (minutes_interval == 1) { + periodic_means <- x + } else { + time_interval <- paste(minutes_interval, "min") + interval <- cut(timestamp, time_interval) + periodic_means <- tapply(x, interval, mean) + } + + periodic_means <- periodic_means[!is.na(periodic_means)] + n <- length(periodic_means) + square_diff <- diff(periodic_means)^2 + + numerator <- n * sum(square_diff) + + denonimator <- stats::var(periodic_means) + denonimator <- denonimator * (n - 1)^2 + out <- numerator / denonimator + out +} + +#' Non-Parametric Function IVm (Intradaily Variability mean) +#' +#' @description +#' +#' `r lifecycle::badge("experimental")` +#' +#' Intradaily Variability identifies the fragmentation of the rest-activity +#' rhythms. +#' +#' This method calculates the average of IV's up to a minute limit. By default, +#' the limit is 60 minutes, so the 60 IV's will be calculated separately +#' and the results will be averaged to be returned. +#' +#' @details +#' +#' Intradaily variability (see [npcra_iv()]) is a number that is usually +#' between 0 and 2 calculated by dividing the square mean of the first +#' derivative of the data by the population variance (Witting et al., 1990). +#' +#' Although real records typically have an IV value less than 2, it is possible +#' that this number exceeds 2. IVs greater than 2 are quite common in +#' simulations of random values, since there is no way to define a standard for +#' the data and, therefore, will have a high variability. +#' +#' From the IV stipulated by Witting et al, other estimates based on the IV were +#' derived, one being the mean IV (IVm). This method simply consists of +#' averaging IVs at different time intervals (Goncalves et al., 2014). The +#' function of this package considers a minute limit to calculate the average of +#' IVs. +#' +#' As an example, the default is 60 minutes, so IVs will be calculated with time +#' intervals from 1 to 60 minutes and then average all these values. +#' +#' @param data A [`tsibble`][tsibble::tsibble()] object. +#' @param col A string indicating which column of `data` to use. +#' @param minute_limit An integer value that corresponds to the last minute +#' interval to group the data. The default is 60, so 60 values of IV will be +#' calculated to take the average, with the first every minute and the last +#' every 60 minutes. +#' @param show_messages if set to true it will return the IV for every minute on +#' the console as a message. +#' @param summarize if set to true, only the IVm value will be output from the +#' function, if set to false it will also return the IV values for each minute +#' in a tibble +#' +#' @return The numeric value of IVm if summarize is set as TRUE, otherwise will +#' be returned a tibble with the IVs until the minute limit with their names +#' (IVm, IV1, IV2, ...) in the first column and the values of IV in the second +#' column. The first column is called iv_minute and the second as iv, with the +#' IVm in the first row. +#' +#' @template references_f +#' @family NPCRA functions +#' @export +#' +#' @examples +#' ## Running for 100 random observations +#' +#' first_date <- as.POSIXct('2015-01-01') +#' last_date <- as.POSIXct('2015-01-11') +#' shuffled_timestamp <- sample(seq(first_date, last_date, by = "sec"), 100) +#' timestamp <- sort(shuffled_timestamp) +#' x <- runif(n = 100, min = 0, max = 10000) +#' +#' act <- dplyr::tibble(x, timestamp) +#' act <- tsibble::as_tsibble(act, index="timestamp") +#' +#' npcra_ivm(act, col = "x", minute_limit = 120, summarize = FALSE) +npcra_ivm <- function(data, col, minute_limit = 60, show_messages = TRUE, + summarize = TRUE) { + checkmate::assert_numeric(data[[col]]) + checkmate::assert_posixct(data[[tsibble::index(data)]]) + checkmate::assert_numeric(minute_limit) + checkmate::assert_logical(summarize) + checkmate::assert_logical(show_messages) + + if (minute_limit < 1 | minute_limit > 1440) { + cli::cli_abort(paste0( + "{.strong {cli::col_red('minute_limit')}} must have ", + "a value between {.strong 1} and {.strong 1440} minutes, ", + "but is {.strong {minute_limit}}." + )) + } + + iv <- c() + iv_minute <- c() + + for (current_minute in seq_len(minute_limit)) { + current_iv <- npcra_iv(data, col, current_minute) + + iv <- c(iv, current_iv) + iv_minute <- c(iv_minute, paste("IV", current_minute, sep = "")) + + if (show_messages) { + message("IV", current_minute, ": ", current_iv) + } + } + + ivm <- sum(iv) / minute_limit + message("IVm:", ivm) + + if (summarize) { + out <- ivm + } else { + iv <- c(ivm, iv) + iv_minute <- c("IVm", iv_minute) + + out <- dplyr::tibble(iv_minute, iv) + } + + out +} diff --git a/man-roxygen/references_f.R b/man-roxygen/references_f.R new file mode 100644 index 0000000..49f9eef --- /dev/null +++ b/man-roxygen/references_f.R @@ -0,0 +1,11 @@ +#' @references +#' +#' Goncalves, B. S. B., Cavalcanti, P. R. A., Tavares, G. R., Campos, +#' T. F., & Araujo, J. F. (2014). Nonparametric methods in actigraphy: an +#' update. Sleep Science, 7(3), 158-164. +#' \doi{10.1016/j.slsci.2014.09.013} +#' +#' Witting, W., Kwa, I. H., Eikelenboom, P., Mirmiran, M., & Swaab, D. F. +#' (1990). Alterations in the circadian rest-activity rhythm in aging and +#' Alzheimer's disease. _Biological Psychiatry_, _27_(6), 563-572. +#' \doi{10.1016/0006-3223(90)90523-5}. diff --git a/man/find_epoch.Rd b/man/find_epoch.Rd index 482eeea..f15bf26 100644 --- a/man/find_epoch.Rd +++ b/man/find_epoch.Rd @@ -7,8 +7,8 @@ find_epoch(data, threshold = 0.9) } \arguments{ -\item{data}{A \link[tsibble:tsibble]{tsibble} object with a \code{\link[=as.Date]{Date}} -or \code{\link[=as.POSIXct]{POSIXt}} vector as index.} +\item{data}{A \code{\link[tsibble:tsibble]{tsibble}} object with a +\code{\link[=as.Date]{Date}} or \code{\link[=as.POSIXct]{POSIXt}} vector as index.} \item{threshold}{(optional) a number, from \code{0} to \code{1}, indicating the minimum proportion that an epoch must have to be considered valid. \code{threshold = 1} means that diff --git a/man/npcra_iv.Rd b/man/npcra_iv.Rd new file mode 100644 index 0000000..fd0a0fc --- /dev/null +++ b/man/npcra_iv.Rd @@ -0,0 +1,112 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/npcra_iv.R +\name{npcra_iv} +\alias{npcra_iv} +\title{Non-Parametric Function IV (Intradaily Variability)} +\usage{ +npcra_iv(data, col, minutes_interval = 60) +} +\arguments{ +\item{data}{A \code{\link[tsibble:tsibble]{tsibble}} object.} + +\item{col}{A string indicating which column of \code{data} to use.} + +\item{minutes_interval}{A integer value representing the duration in minutes +of the time interval for grouping the data. By default, 60 minutes are +considered, this means that the activity will be averaged at hourly +intervals. The minimum value is 1 minute, where all points will be +considered (so there will be no average in intervals).} +} +\value{ +A \code{\link[base:numeric]{numeric}} value. +} +\description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} + +Calculates the Intradaily Variability, which identifies the fragmentation of +the rest-activity rhythms. + +This fragmentation can have different results for the same data according +to the chosen time interval, that is, a minute by minute calculation brings +a more sensitive result than a check in hours, where small changes tend +to have less influence on intradaily variability. +} +\details{ +Intradaily variability is a number that is usually between 0 and 2 calculated +by dividing the square mean of the first derivative of the data by the +population variance (Witting et al., 1990). + +Although real records typically have an IV value less than 2, +it is possible that this number exceeds 2. IVs greater than 2 +are quite common in simulations of random values, since there is +no way to define a standard for the data and, therefore, will have +a high variability. + +If \eqn{X} represents the activity or other numeric variable passed as +parameter \code{x}, the IV follows the formula below: + +\deqn{IV = SQM_FD / VAR(X)} + +Where SQM_FD is the square mean of the first derivative of the data, +calculated as below: + +\deqn{SQM_FD = \sum_{i=2}^{N} (X_i - X_{i-1})^2 / (N-1)} + +And the population variance VAR(X) is: + +\deqn{VAR(X) = \sum_{i=1}^{N} (\bar{X} - X_i)^2} + +Where: + +\eqn{X_i} is each data point or, more likely, each activity average +for the time interval passed by the parameter \code{minutes_interval}; + +N is the amount of data or the number of intervals that can fit in +the data set; + +\eqn{\bar{X}} is the average of all data or the mean of all average +activities by time interval. + +Usually the activity data used in the calculation are hourly averages of +the activity, thus avoiding activity fluctuations in the same period of +time. The calculation for IV by hourly averages is also called IV60 +(Goncalves et al., 2014), this being the standard minute interval for +the method (60 minutes). It is still possible to vary this interval of +minutes, which can generate totally different results for the IV that +can be analyzed to identify some pattern of the data. + +Higher values of IV represent a greater fragmentation of the rest - activity +rhythm, this is because the calculation has a certain sensitivity to +immediate changes between time intervals, such as naps during the day and +nighttime awakenings. +} +\examples{ +## Running for 100 random observations + +first_date <- as.POSIXct('2015-01-01') +last_date <- as.POSIXct('2015-01-11') +shuffled_timestamp <- sample(seq(first_date, last_date, by = "sec"), 100) +timestamp <- sort(shuffled_timestamp) +x <- runif(100, 0, 10000) + +act <- dplyr::tibble(x, timestamp) \%>\% + tsibble::as_tsibble(index = "timestamp") + +npcra_iv(act, "x", minutes_interval = 120) +} +\references{ +Goncalves, B. S. B., Cavalcanti, P. R. A., Tavares, G. R., Campos, +T. F., & Araujo, J. F. (2014). Nonparametric methods in actigraphy: an +update. Sleep Science, 7(3), 158-164. +\doi{10.1016/j.slsci.2014.09.013} + +Witting, W., Kwa, I. H., Eikelenboom, P., Mirmiran, M., & Swaab, D. F. +(1990). Alterations in the circadian rest-activity rhythm in aging and +Alzheimer's disease. \emph{Biological Psychiatry}, \emph{27}(6), 563-572. +\doi{10.1016/0006-3223(90)90523-5}. +} +\seealso{ +Other NPCRA functions: +\code{\link{npcra_ivm}()} +} +\concept{NPCRA functions} diff --git a/man/npcra_ivm.Rd b/man/npcra_ivm.Rd new file mode 100644 index 0000000..ae3886c --- /dev/null +++ b/man/npcra_ivm.Rd @@ -0,0 +1,91 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/npcra_iv.R +\name{npcra_ivm} +\alias{npcra_ivm} +\title{Non-Parametric Function IVm (Intradaily Variability mean)} +\usage{ +npcra_ivm(data, col, minute_limit = 60, show_messages = TRUE, summarize = TRUE) +} +\arguments{ +\item{data}{A \code{\link[tsibble:tsibble]{tsibble}} object.} + +\item{col}{A string indicating which column of \code{data} to use.} + +\item{minute_limit}{integer value that corresponds to the last minute +interval to group the data. The default is 60, so 60 values of IV will be +calculated to take the average, with the first every minute and the last +every 60 minutes.} + +\item{show_messages}{if set to true it will return the IV for every minute on +the console as a message.} + +\item{summarize}{if set to true, only the IVm value will be output from the +function, if set to false it will also return the IV values for each minute +in a tibble} +} +\value{ +The numeric value of IVm if summarize is set as TRUE, otherwise will +be returned a tibble with the IVs until the minute limit with their names +(IVm, IV1, IV2, ...) in the first column and the values of IV in the second +column. The first column is called iv_minute and the second as iv, with the +IVm in the first row. +} +\description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} + +Intradaily Variability identifies the fragmentation of the rest-activity +rhythms. + +This method calculates the average of IV's up to a minute limit. By default, +the limit is 60 minutes, so the 60 IV's will be calculated separately +and the results will be averaged to be returned. +} +\details{ +Intradaily variability (see \code{\link[=npcra_iv]{npcra_iv()}}) is a number that is usually +between 0 and 2 calculated by dividing the square mean of the first +derivative of the data by the population variance (Witting et al., 1990). + +Although real records typically have an IV value less than 2, it is possible +that this number exceeds 2. IVs greater than 2 are quite common in +simulations of random values, since there is no way to define a standard for +the data and, therefore, will have a high variability. + +From the IV stipulated by Witting et al, other estimates based on the IV were +derived, one being the mean IV (IVm). This method simply consists of +averaging IVs at different time intervals (Goncalves et al., 2014). The +function of this package considers a minute limit to calculate the average of +IVs. + +As an example, the default is 60 minutes, so IVs will be calculated with time +intervals from 1 to 60 minutes and then average all these values. +} +\examples{ +## Running for 100 random observations + +first_date <- as.POSIXct('2015-01-01') +last_date <- as.POSIXct('2015-01-11') +shuffled_timestamp <- sample(seq(first_date, last_date, by = "sec"), 100) +timestamp <- sort(shuffled_timestamp) +x <- runif(n = 100, min = 0, max = 10000) + +act <- dplyr::tibble(x, timestamp) +act <- tsibble::as_tsibble(act, index="timestamp") + +npcra_ivm(act, col = "x", minute_limit = 120, summarize = FALSE) +} +\references{ +Goncalves, B. S. B., Cavalcanti, P. R. A., Tavares, G. R., Campos, +T. F., & Araujo, J. F. (2014). Nonparametric methods in actigraphy: an +update. Sleep Science, 7(3), 158-164. +\doi{10.1016/j.slsci.2014.09.013} + +Witting, W., Kwa, I. H., Eikelenboom, P., Mirmiran, M., & Swaab, D. F. +(1990). Alterations in the circadian rest-activity rhythm in aging and +Alzheimer's disease. \emph{Biological Psychiatry}, \emph{27}(6), 563-572. +\doi{10.1016/0006-3223(90)90523-5}. +} +\seealso{ +Other NPCRA functions: +\code{\link{npcra_iv}()} +} +\concept{NPCRA functions} diff --git a/man/periodogram.Rd b/man/periodogram.Rd index c69ecfd..22bc561 100644 --- a/man/periodogram.Rd +++ b/man/periodogram.Rd @@ -32,11 +32,11 @@ series interval and, consequently, its \code{p} periods. Valid values are: \code{1000}).} \item{p_max}{(optional) an integer number indicating the maximum period -(\eqn{p}(), with the same unit as \code{p_unit}, to compute the test (default: +(\eqn{p}), with the same unit as \code{p_unit}, to compute the test (default: \code{2500}).} \item{p_step}{(optional) an integer number indicating the range of values -that will be skipped between computing one test and the next (e.g., when +that must be skipped between computing one test and the next (e.g., when \code{p_min == 1}, \code{p_max == 7}, and \code{p_step == 2}, the test periods will be \code{1}, \code{3}, \code{5}, and \code{7}) (default: \code{1}).} @@ -54,7 +54,7 @@ A \code{\link[=list]{list}} object with the following elements: aggregated. \item \code{p_seq}: a \code{\link[=numeric]{numeric}} object with the the sequence of the tested periods. -\item \code{alpha}: a string indicating the significant level used. +\item \code{alpha}: a number indicating the significant level used. \item \code{a_p}: a \code{\link[=numeric]{numeric}} object with the root mean square amplitude (\eqn{A_{p}}{Ap}) for each period. \item \code{a_p_plot}: a \code{\link[ggplot2:ggplot]{ggplot}} object with a line chart @@ -106,14 +106,14 @@ the most frequent value (mode) (for non-numeric or single integer variables). \subsection{Missing values}{ \code{periodogram()} will work even with missing values. As is the case for -any analysis with missing data, the results may diverge, you may want to +any analysis with missing data, the results may diverge. You may want to interpolate these data points. There are few articles that deals with interpolation in actigraphy. Tonon et al. (2022) recommends not using interpolation (i.e., maintain \code{NA} values) -whenever is possible. The same authors also recommends using the weekly mean -method of interpolation when the parameters cannot be computed in the -presence of \code{NA} values. +whenever is possible. The same authors also recommends using the +\link[=?na_weekly_mean]{weekly mean method} of interpolation when the parameters +cannot be computed in the presence of \code{NA} values. } } \section{Guidelines}{ @@ -138,8 +138,8 @@ you can see this documentation on the package \code{p_min == 1}, \code{p_max == 5}, and \code{p_step == 1}, the test periods will be \code{1}, \code{2}, \code{3}, \code{4} and \code{5}). -The \eqn{A_{p}}{Ap} measure of the Enright's periodogram is the standard -deviation of the column means of a Buys-Ballot table, or, as Enright puts it, +The \eqn{A_{p}}{Ap} measure of Enright's periodogram is the standard +deviation of column means of a Buys-Ballot table, or, as Enright puts it, "the root mean square \strong{amplitude}". This Buys-Ballot table has \eqn{P} columns and \eqn{m} rows, where \eqn{m} is a number that maximizes the amount of values that a time series of \eqn{N} values can have represented in a diff --git a/man/spectrogram.Rd b/man/spectrogram.Rd index 92876b8..6863f21 100644 --- a/man/spectrogram.Rd +++ b/man/spectrogram.Rd @@ -35,16 +35,16 @@ series interval and, consequently, its \code{p} periods. Valid values are: \code{1000}).} \item{p_max}{(optional) an integer number indicating the maximum period -(\eqn{p}(), with the same unit as \code{p_unit}, to compute the test (default: +(\eqn{p}), with the same unit as \code{p_unit}, to compute the test (default: \code{2500}).} \item{p_step}{(optional) an integer number indicating the range of values -that will be skipped between computing one test and the next (e.g., when +that must be skipped between computing one test and the next (e.g., when \code{p_min == 1}, \code{p_max == 7}, and \code{p_step == 2}, the test periods will be \code{1}, \code{3}, \code{5}, and \code{7}) (default: \code{1}).} -\item{int}{(optional) a string indicating the interval unit.Valid values are: -\verb{“seconds”}, \verb{“minutes”}, \verb{“hours”}, \verb{“days”}, \verb{“weeks”}, \verb{“months”}, +\item{int}{(optional) a string indicating the interval unit. Valid values +are: \verb{“seconds”}, \verb{“minutes”}, \verb{“hours”}, \verb{“days”}, \verb{“weeks”}, \verb{“months”}, \verb{“quarters”}, and \verb{“years”}) (default: \code{"days"}).} \item{int_n}{(optional) an integer number indicating the size of the diff --git a/tests/testthat/test-npcra_iv.R b/tests/testthat/test-npcra_iv.R new file mode 100644 index 0000000..75b8f5b --- /dev/null +++ b/tests/testthat/test-npcra_iv.R @@ -0,0 +1,153 @@ +test_that('npcra_iv() | Random data', { + # Random data + # It is very likely that there are time intervals with no activity data + # (since there are only 1000 data for 29 days -> 696 hours), the function + # should still return the numerical value of IV60 + # Remember that although the IV values tend to remain between 0 + # and 2, depending on the distribution of the data this value can exceed 2 + # (something also likely in a random distribution) + x <- runif(1000, min = 0, max = 10000) + + first_date <- as.POSIXct('2015-01-01') + last_date <- as.POSIXct('2015-01-30') + + timestamp <- seq(first_date, last_date, by = "sec") %>% + sample(size = 1000) %>% + sort() + + tsbl <- dplyr::tibble(x, timestamp) + tsbl <- tsibble::as_tsibble(tsbl, index="timestamp") + + iv <- npcra_iv(tsbl, col="x") + expect_true(dplyr::between(iv, left = 0, right = 3)) +}) + +test_that('npcra_iv() | Senoidal data', { + # Senoidal data + # Data distributed according to a sinusoid must converge the + # result to 0. + # Note that this is a test of the algorithm theory, since by the + # distribution used there are negative values of activity + t <- seq(0, 4 * pi, length.out = 100) + x <- 3*sin(2*t)+runif(100)*2 + + first_date <- as.POSIXct('2015-01-01') + last_date <- as.POSIXct('2015-01-11') + + timestamp <- seq(first_date, last_date, by = "sec") %>% + sample(size = 100) %>% + sort() + + tsbl <- dplyr::tibble(x, timestamp) + tsbl <- tsibble::as_tsibble(tsbl, index="timestamp") + + iv <- npcra_iv(tsbl, col="x") + expect_true(dplyr::between(iv, left = 0, right = 1)) +}) + +test_that('npcra_iv() | many records', { + # Run for 1 million observation with 1-minute time interval + x <- runif(10^6, min = 0, max = 10000) + + first_date <- as.POSIXct('2015-01-01') + last_date <- as.POSIXct('2015-01-30') + + timestamp <- seq(first_date, last_date, by = "sec") %>% + sample(size = 10^6) %>% + sort() + + tsbl <- dplyr::tibble(x, timestamp) + tsbl <- tsibble::as_tsibble(tsbl, index="timestamp") + + iv <- npcra_iv(tsbl, col="x", minutes_interval = 1) + expect_true(dplyr::between(iv, left = 0, right = 3)) +}) + +test_that('npcra_iv() | One day', { + # Random data for a single day + # IV can be calculated with data for just one day, as it + # depends on the variation between averages per minute regardless + # of the day of collection + x <- runif(10000, min = 0, max = 10000) + + first_date <- as.POSIXct('2015-01-01') + last_date <- as.POSIXct('2015-01-15 23:59:59') + + timestamp <- seq(first_date, last_date, by = "min") %>% + sample(size = 10000) %>% + sort() + + tsbl <- dplyr::tibble(x, timestamp) + tsbl <- tsibble::as_tsibble(tsbl, index="timestamp") + + iv <- npcra_iv(tsbl, "x") + expect_true(dplyr::between(iv, left = 0, right = 3)) +}) + +#npcra_ivm() tests +test_that('npcra_ivm() | Random data', { + # Random data for 29 days + x <- runif(1000, min = 0, max = 10000) + + first_date <- as.POSIXct('2015-01-01') + last_date <- as.POSIXct('2015-01-30') + + timestamp <- seq(first_date, last_date, by = "sec") %>% + sample(size = 1000) %>% + sort() + + tsbl <- dplyr::tibble(x, timestamp) + tsbl <- tsibble::as_tsibble(tsbl, index="timestamp") + + ivm <- npcra_ivm(tsbl, col="x", show_messages=FALSE) + expect_true(dplyr::between(ivm, left = 0, right = 3)) +}) + +test_that('npcra_ivm() | Senoidal data', { + # Senoidal data + # Data distributed according to a sinusoid must converge the + # result to 0. + # Note that this is a test of the algorithm theory, since by the + # distribution used there are negative values of activity + t <- seq(0, 4 * pi, length.out = 100) + x <- 3*sin(2*t)+runif(100)*2 + + first_date <- as.POSIXct('2015-01-01') + last_date <- as.POSIXct('2015-01-11') + + timestamp <- seq(first_date, last_date, by = "sec") %>% + sample(size = 100) %>% + sort() + + tsbl <- dplyr::tibble(x, timestamp) + tsbl <- tsibble::as_tsibble(tsbl, index="timestamp") + + ivm <- npcra_ivm(tsbl, "x", show_messages=FALSE) + expect_true(dplyr::between(ivm, left = 0, right = 1)) +}) + +test_that('npcra_ivm() | summarize = FALSE', { + # Returns a tibble with the IVmean for the first 10 minutes on the first + # line and each IV on the following lines (IV1, IV2, ..., IV10) + minute_limit = 10 + x <- runif(1000, min = 0, max = 10000) + + first_date <- as.POSIXct('2015-01-01') + last_date <- as.POSIXct('2015-01-30') + + timestamp <- seq(first_date, last_date, by = "sec") %>% + sample(size = 1000) %>% + sort() + + tsbl <- dplyr::tibble(x, timestamp) + tsbl <- tsibble::as_tsibble(tsbl, index="timestamp") + + ivm <- npcra_ivm(tsbl, + "x", + minute_limit, + summarize = FALSE, + show_messages=FALSE) + expect_length(ivm, 2) + expect_true(dplyr::between(ivm$iv[1], left = 0, right = 3)) + expect_true(nrow(ivm) == minute_limit + 1) +})