Function update (#39)

* Created new functions to: provide a vector of available models, provide parameter lists for each available model, extract estimates according to the chosen model. Updated parameter names from species to population in multi-individual models. * Updated description of functions for parameter names. * Updated assign_data function to allow data frames rather than specific vectors. Added functions for each DE. * Updated error handling in assign_data to include checking the length of vectors. Updated documentation. * Built function to plot DE pieces. Updated data assignment function to have more error checking. Updated model_des function to have a switch that returns the chosen model function as well as the DEs directly. * Fixed list element calls in data assignment and estimate extraction functions. Updated data assignment testing to evaluate different combinations of user input. * Corrected typos, updated docs, added to the included packages. * Actually saved the DESCRIPTION update. * Updated testing for assign_data function, updated error handling for assign_data function. * Removed browser() calls and changed list references. * Finalised testing for assign_data function. * Updated error handling for model names. Added error handling for extract_estimates . * Changed measurement data error checking to iterate through vector of required names. * Updated plot_de_pieces function and testing. Need to double check out to force inclusion of ggplot2. * Updated imports for a couple of functions so they would load dplyr and ggplot2 properly. * Removed stats package from imports, only used for quantile function in extract_estimates so that is specifically imported alone. Hopefully this fixes the load order warnings. * Updated testing for assign_data, updated error handling and testing for plot_de_pieces.
traitecoevo · Aug 15, 2024 · 5ed0ae0 · 5ed0ae0
1 parent 00fff73
commit 5ed0ae0
Show file tree

Hide file tree

Showing 29 changed files with 1,033 additions and 47 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -8,7 +8,7 @@ Authors@R:
   person(given = "Fonti", family = "Kar", role = c("ctb"), email = "[email protected]", comment = c(ORCID = "0000-0002-2760-3974")),
   person(given = "David", family=  "Warton", role = c("aut", "ctb"), email = "[email protected]", comment = c(ORCID = "0000-0002-1642-628X"))
     )
-Description: What the package does (one paragraph).
+Description: Wrapper for Stan that offers a number of in-built models to implement a hierarchical Bayesian longitudinal model for repeat observation data. Model choice selects the differential equation that is fit to the observations. Single and multi-individual models are available.
 License: MIT + file LICENSE
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
@@ -18,13 +18,14 @@ Depends:
     R (>= 3.5.0)
 Imports: 
     methods,
+    dplyr,
+    ggplot2,
     purrr,
     Rcpp (>= 0.12.0),
     RcppParallel (>= 5.0.1),
     rlang,
     rstan (>= 2.18.1),
-    rstantools (>= 2.3.1.1),
-    stats
+    rstantools (>= 2.3.1.1)
 LinkingTo: 
     BH (>= 1.66.0),
     Rcpp (>= 0.12.0),
@@ -35,7 +36,6 @@ LinkingTo:
 SystemRequirements: GNU make
 Suggests: 
     knitr,
-    dplyr,
     rmarkdown,
     testthat (>= 3.0.0),
     withr,

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,11 +1,24 @@
 # Generated by roxygen2: do not edit by hand
 
 export(hmde_assign_data)
+export(hmde_canham_de)
+export(hmde_const_de)
+export(hmde_extract_estimates)
+export(hmde_linear_de)
 export(hmde_model)
+export(hmde_model_des)
+export(hmde_model_name)
+export(hmde_model_pars)
+export(hmde_plot_de_pieces)
+export(hmde_power_de)
 export(hmde_run)
+export(hmde_vb_de)
 import(Rcpp)
+import(dplyr)
+import(ggplot2)
 import(methods)
 importFrom(RcppParallel,RcppParallelLibs)
 importFrom(rstan,sampling)
 importFrom(rstantools,rstan_config)
+importFrom(stats,quantile)
 useDynLib(hmde, .registration = TRUE)
diff --git a/R/hmde-package.R b/R/hmde-package.R
@@ -8,6 +8,7 @@
 #' @useDynLib hmde, .registration = TRUE
 #' @import methods
 #' @import Rcpp
+#' @import dplyr
 #' @importFrom rstan sampling
 #' @importFrom rstantools rstan_config
 #' @importFrom RcppParallel RcppParallelLibs

diff --git a/R/hmde_assign_data.R b/R/hmde_assign_data.R
@@ -1,30 +1,89 @@
-#' Assign data to template
+#' Assign data to template for chosen model
 #'
 #' @param model_template output from hmde_model
-#' @param ... data-masking name-value pairs
+#' @param data Input data tibble with columns including time, y_obs, obs_index, and additionally ind_id for multi-individual models
+#' @param step_size Step size for numerical integration.
+#' @param ... data-masking name-value pairs allowing specific input of elements
 #'
 #' @return updated named list with your data assigned to Stan model parameters
 #' @export
 
-hmde_assign_data <- function(model_template, ...){
-  # Grab user expressions
-  user_code <- rlang::enquos(..., .check_assign = TRUE)
+hmde_assign_data <- function(model_template, data = NULL, step_size = NULL, ...){
+  if(!model_template$model %in% hmde_model_name()){
+    stop("Model name not recognised. Run hmde_model_name() to see available models.")
+  }
+
+  if(!is.null(data)){ # Use provided tibble
+    user_fields <- names(data)
+
+  } else { # Grab user expressions from individual list items and extract data
+    user_code <- rlang::enquos(..., .check_assign = TRUE)
+    user_fields <- names(user_code)
+    # Evaluate the RHS of expressions (the values)
+    data <- purrr::map(user_code,
+                       ~rlang::eval_tidy(.x, env = rlang::caller_env())
+    )
+  }
 
   # Grab the names
-  fields <- names(user_code)
+  model_fields <- names(model_template)
+
+  # Check user data has required names
+  if(grepl("multi", model_template$model)){ # Multi-individual with ind_id vec
+    for(i in c("ind_id", "time", "y_obs", "obs_index")){
+      if(!i %in% user_fields){
+        stop(paste0("Improper data structure: ", i, " missing"))
+      }
+    }
 
-  #TODO: Check names are in model_template
+  } else { # Single individual models
+    for(i in c("time", "y_obs", "obs_index")){
+      if(!i %in% user_fields){
+        stop(paste0("Improper data structure: ", i, " missing"))
+      }
+    }
+  }
 
-  # Evaluate the RHS of expressions (the values)
-  data <- purrr::map(user_code,
-                     ~rlang::eval_tidy(.x, env = rlang::caller_env())
-                     )
+  for(i in model_fields){ # Iterate through required fields and fill them
+    if(i %in% user_fields){
+      model_template <- purrr::list_modify(model_template, !!!data[i])
+    } else {
+      model_template[[i]] <- switch(
+        i,
+        step_size = step_size,
+        n_obs = length(data$y_obs),
+        n_ind = length(unique(data$ind_id)),
+        y_0_obs = data$y_obs[which(data$obs_index == 1)],
+        y_bar = mean(data$y_obs),
+        model = model_template$model
+      )
+    }
 
-  for(i in fields){
-    model_template <- purrr::list_modify(model_template, !!!data[i])
+    if(is.null(model_template[[i]])){ #Report missing data
+      stop(paste("Improper data structure: Data missing:", i))
+    }
   }
 
-  #TODO: Check if N is supplied, if not, give error
+  #Check lengths for y_obs, obs_index, time, ind_id
+  vec_lengths <- c(
+    model_template$n_obs,
+    length(model_template$y_obs),
+    length(model_template$obs_index),
+    length(model_template$time)
+  )
+  if(grepl("multi", model_template$model)){ # Multi-individual with ind_id vector
+    vec_lengths[5] <- length(model_template$ind_id)
+
+    #Check number ind ID values
+    ind_id_lengths <- c(model_template$n_ind,length(unique(data$ind_id)))
+    if(length(unique(ind_id_lengths))!=1){
+      stop("Different values for n_ind and number of unique entries in ind_id.")
+    }
+  }
+
+  if(length(unique(vec_lengths))!=1){
+    stop("Improper data structure: Different lengths of data vectors.")
+  }
 
   return(model_template)
 }
diff --git a/R/hmde_extract_estimates.R b/R/hmde_extract_estimates.R
@@ -0,0 +1,175 @@
+#' Extract samples and return measurement, individual, and population-level estimates
+#'
+#' @param model model name character string
+#' @param fit fitted model Stan fit
+#' @param input_measurement_data data used to fit the model with ind_id, y_obs, time, obs_index tibble
+#'
+#' @return named list with data frames for measurement, individual, population-level, and error parameter estimates
+#' @export
+#' @import dplyr
+#' @importFrom stats quantile
+
+hmde_extract_estimates <- function(model = NULL,
+                                   fit = NULL,
+                                   input_measurement_data = NULL){
+  #Check for fit
+  if(is.null(fit)){
+    stop("Fit not provided.")
+  }
+
+  if(typeof(fit) != "S4"){
+    stop("Fit not S4 stanfit type.")
+  }
+
+  #Check for model
+  if(!model %in% hmde_model_name()){
+    stop("Model name not recognised. Run hmde_model_name() to see available models.")
+  }
+
+  #Check for input measurement data
+  for(i in c("y_obs", "time", "obs_index")){
+    if(!i %in% names(input_measurement_data)){
+      stop(paste("Input measurements information missing:", i))
+    }
+  }
+
+
+  estimate_list <- list()
+  par_names <- hmde_model_pars(model)
+
+  if(grepl("multi", model)){ #Get n_ind for multi-individual
+    n_ind <- length(unique(input_measurement_data$ind_id))
+  } else {
+    n_ind <- 1
+  }
+
+  #Extract samples
+  samples <- rstan::extract(fit, permuted = TRUE, inc_warmup = FALSE)
+  sample_par_names <- names(samples)
+
+  #Check parameter names
+  for(i in par_names[1:(length(par_names)-1)]){
+    for(j in i){
+      if(!j %in% sample_par_names){
+        stop(paste(
+          "Parameter missing from model:", j
+        ))
+      }
+    }
+  }
+
+  #Extract measurement, individual-level, and error parameter estimates and add to list
+  estimate_list$measurement_data <- hmde_extract_measurement_ests(samples,
+                                                                  par_names$measurement_pars_names,
+                                                                  input_measurement_data)
+
+  estimate_list$individual_data <- hmde_extract_individual_par_ests(samples,
+                                                                    par_names$individual_pars_names,
+                                                                    n_ind)
+
+  estimate_list$error_data <- hmde_extract_error_par_ests(samples, par_names$error_pars_names)
+
+  #If model is multi-individual extract population-level estimates and add to list
+  if(!is.null(par_names$population_pars_names)){
+    estimate_list$population_data <- hmde_extract_pop_par_ests(samples,
+                                                               par_names$population_pars_names)
+  }
+
+  return(estimate_list)
+}
+
+
+#' Sample extraction for measurement-level estimates
+#' @keywords internal
+#' @noRd
+hmde_extract_measurement_ests <- function(samples = NULL,
+                                          measurement_pars_names = NULL,
+                                          input_measurement_data = NULL){
+  measurement_data <- input_measurement_data
+
+  for(i in measurement_pars_names){
+    measurement_data[[i]] <- apply(samples[[i]], 2, mean)
+  }
+
+  return(measurement_data)
+}
+
+#' Sample extraction for individual-level parameters
+#' @keywords internal
+#' @noRd
+hmde_extract_individual_par_ests <- function(samples = NULL,
+                                             individual_pars_names = NULL,
+                                             n_ind = NULL){
+  individual_data <- tibble(ind_id = 1:n_ind)
+
+  #Extract mean of parameter posterior distributions
+
+  if(n_ind > 1){
+    for(i in individual_pars_names){
+      individual_data[[paste0(i, "_mean")]] <- apply(samples[[i]], 2, mean)
+      individual_data[[paste0(i, "_median")]] <- apply(samples[[i]], 2, median)
+      individual_data[[paste0(i, "_CI_lower")]] <- apply(samples[[i]], 2,
+                                                         stats::quantile, probs=c(0.025))
+      individual_data[[paste0(i, "_CI_upper")]] <- apply(samples[[i]], 2,
+                                                         stats::quantile, probs=c(0.975))
+    }
+  } else {
+    for(i in individual_pars_names){
+      individual_data[[paste0(i, "_mean")]] <- mean(samples[[i]])
+      individual_data[[paste0(i, "_median")]] <- median(samples[[i]], 2, )
+      individual_data[[paste0(i, "_CI_lower")]] <- as.numeric(stats::quantile(samples[[i]],
+                                                            probs=c(0.025)))
+      individual_data[[paste0(i, "_CI_upper")]] <- as.numeric(stats::quantile(samples[[i]],
+                                                            probs=c(0.975)))
+    }
+  }
+
+
+  return(individual_data)
+}
+
+#' #' Sample extraction for population-level parameters
+#' @keywords internal
+#' @noRd
+hmde_extract_pop_par_ests <- function(samples = NULL,
+                                      population_pars_names = NULL){
+  population_data <- tibble()
+
+  #Extract mean of parameter posterior distributions
+  for(i in population_pars_names){
+    pop_data_temp <- tibble(par_name = i)
+    pop_data_temp$mean <-  mean(samples[[i]])
+    pop_data_temp$median <- median(samples[[i]])
+    pop_data_temp$CI_lower <- as.numeric(stats::quantile(samples[[i]],
+                                                       probs=c(0.025)))
+    pop_data_temp$CI_upper <- as.numeric(stats::quantile(samples[[i]],
+                                                       probs=c(0.975)))
+
+    population_data <- rbind(population_data, pop_data_temp)
+  }
+
+  return(population_data)
+}
+
+#' #' Sample extraction for error parameters
+#' @keywords internal
+#' @noRd
+hmde_extract_error_par_ests <- function(samples = NULL,
+                                        error_pars_names = NULL){
+  error_data <- tibble()
+
+  #Extract mean of parameter posterior distributions
+  for(i in error_pars_names){
+    error_data_temp <- tibble(par_name = i)
+    error_data_temp[["mean"]] <- mean(samples[[i]])
+    error_data_temp[["median"]] <- median(samples[[i]])
+    error_data_temp[["CI_lower"]] <- as.numeric(stats::quantile(samples[[i]],
+                                                         probs=c(0.025)))
+    error_data_temp[["CI_upper"]] <- as.numeric(stats::quantile(samples[[i]],
+                                                         probs=c(0.975)))
+
+    error_data <- rbind(error_data, error_data_temp)
+  }
+
+  return(error_data)
+}