From f08a6ce1a99fb5f19e14470db8cc2ee1eb1a6410 Mon Sep 17 00:00:00 2001
From: Sam Abbott <s.e.abbott12@gmail.com>
Date: Wed, 5 Mar 2025 10:52:02 +0000
Subject: [PATCH] Issue #468: Add marginaleffects and rvars to the FAQ (#547)

* add marginal effects section

* add a section of posterior

* update news

* remember that sigma is on the log scale
---
 NEWS.md           |   2 +
 vignettes/faq.Rmd | 115 +++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 105 insertions(+), 12 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 9bdcf4b1b..655d5ca8d 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -14,6 +14,8 @@ This is the development version of `epidist`.
 ## Documentation
 
 - Added a new vignette "Guide to the statistical models implemented in epidist". See #514.
+- Added a new FAQ section showcasing how to use the `posterior` package with `epidist` models, particularly for working with random variables (`rvars`) to propagate uncertainty in calculations. See #547.
+- Added a new FAQ section on how to use the `marginaleffects` package with `epidist` models. See #547.
 
 ## Bugs
 
diff --git a/vignettes/faq.Rmd b/vignettes/faq.Rmd
index aaa6e08a7..344024ae7 100644
--- a/vignettes/faq.Rmd
+++ b/vignettes/faq.Rmd
@@ -30,20 +30,42 @@ library(dplyr)
 library(ggplot2)
 library(scales)
 library(tidyr)
-library(tidybayes)
 
 set.seed(1)
 
-meanlog <- 1.8
-sdlog <- 0.5
+# Define different parameters for each location
+meanlog_0 <- 1.6
+sdlog_0 <- 0.4
+meanlog_1 <- 2.1
+sdlog_1 <- 0.6
+
 obs_time <- 25
 sample_size <- 200
 
-obs_cens_trunc_samp <- simulate_gillespie(seed = 101) |>
+# Create base data with location assignments
+base_data <- simulate_gillespie(seed = 101)
+base_data$location <- rbinom(n = nrow(base_data), size = 1, prob = 0.5)
+
+# Create location 0 data
+location_0_data <- base_data |>
+  filter(location == 0) |>
+  simulate_secondary(
+    meanlog = meanlog_0,
+    sdlog = sdlog_0
+  ) |>
+  select(case, ptime, delay, stime, location)
+
+# Create location 1 data
+location_1_data <- base_data |>
+  filter(location == 1) |>
   simulate_secondary(
-    meanlog = meanlog,
-    sdlog = sdlog
+    meanlog = meanlog_1,
+    sdlog = sdlog_1
   ) |>
+  select(case, ptime, delay, stime, location)
+
+# Combine datasets
+obs_cens_trunc_samp <- bind_rows(location_0_data, location_1_data) |>
   mutate(
     ptime_lwr = floor(.data$ptime),
     ptime_upr = .data$ptime_lwr + 1,
@@ -59,7 +81,8 @@ linelist_data <- as_epidist_linelist_data(
   obs_cens_trunc_samp$ptime_upr,
   obs_cens_trunc_samp$stime_lwr,
   obs_cens_trunc_samp$stime_upr,
-  obs_time = obs_cens_trunc_samp$obs_time
+  obs_time = obs_cens_trunc_samp$obs_time,
+  covariates = data.frame(location = obs_cens_trunc_samp$location)
 )
 data <- as_epidist_marginal_model(linelist_data)
 
@@ -73,13 +96,27 @@ fit <- epidist(
   iter = 1000,
   backend = "cmdstanr"
 )
+
+fit_location <- epidist(
+  data,
+  formula = bf(mu ~ location, sigma ~ location),
+  seed = 1,
+  chains = 2,
+  cores = 2,
+  refresh = ifelse(interactive(), 250, 0),
+  iter = 1000,
+  backend = "cmdstanr"
+)
 ```
 
-## I would like to work with the samples output
+## Working with posterior samples
 
 The output of a call to `epidist` is compatible with typical Stan workflows.
-We recommend use of the [`posterior`](https://mc-stan.org/posterior/) package for working with samples from MCMC or other sampling algorithms.
-For example, the function `posterior::as_draws_df()` may be used to obtain a dataframe of MCMC draws for specified parameters.
+We recommend use of the [`posterior`](https://mc-stan.org/posterior/) package for working with samples.
+
+### Getting a `data.frame` of posterior samples
+
+The function `posterior::as_draws_df()` may be used to obtain a dataframe of MCMC draws for specified parameters.
 
 ```{r message = FALSE}
 library(posterior)
@@ -87,6 +124,35 @@ draws <- as_draws_df(fit, variable = c("Intercept", "Intercept_sigma"))
 head(draws)
 ```
 
+### Using random variables (`rvars`) for uncertainty propagation
+
+The `posterior` package also provides the `rvar` class for working with posterior samples as random variables.
+This approach allows you to perform mathematical operations directly with posterior distributions while propagating uncertainty.
+
+```{r}
+library(posterior)
+# Convert model parameters to rvars
+rv <- as_draws_rvars(fit_location)
+
+# Look at the intercept in rvar form
+rv$b_Intercept
+
+# Calculate the mean delay (on natural scale) for each location
+mean_delay_loc0 <- exp(rv$b_Intercept + 0.5 * exp(rv$b_sigma_Intercept)^2)
+mean_delay_loc1 <- exp(rv$b_Intercept +
+  rv$b_location + 0.5 * (exp(rv$b_sigma_Intercept + rv$b_sigma_location))^2)
+
+# Summarize the distributions
+summary(mean_delay_loc0)
+summary(mean_delay_loc1)
+
+# Calculate the difference between locations
+delay_diff <- mean_delay_loc1 - mean_delay_loc0
+delay_diff
+```
+
+For more details, see the [`posterior` package documentation](https://mc-stan.org/posterior/articles/rvar.html).
+
 ## How can I assess whether sampling has converged?
 
 The output of a call to `epidist` is compatible with typical Stan workflows.
@@ -183,7 +249,7 @@ quantile(pred$mean, c(0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99))
 quantile(pred$sd, c(0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.99))
 ```
 
-# How can I assess how sensitive the fitted posterior distribution is to the prior distribution used?
+## How can I assess how sensitive the fitted posterior distribution is to the prior distribution used?
 
 We recommend use of the [`priorsense`](https://github.com/n-kall/priorsense) package [@kallioinen2024detecting] to check how sensitive the posterior distribution is to perturbations of the prior distribution and likelihood using power-scaling analysis:
 
@@ -225,6 +291,7 @@ To see these functions demonstrated in a vignette, see ["Advanced features with
 As a short example, to generate 4000 predictions (equal to the number of draws) of the delay that would be observed with a double censored observation process (in which the primary and secondary censoring windows are both one) then:
 
 ```{r}
+library(tidybayes)
 draws_pmf <- tibble::tibble(
   relative_obs_time = Inf, pwindow = 1, swindow = 1, delay_upr = NA
 ) |>
@@ -239,7 +306,31 @@ ggplot(draws_pmf, aes(x = .prediction)) +
 
 Importantly, this functionality is only available for `epidist` models using `brms` families that have a `log_lik_censor` method implemented internally in `brms`. If you are using another family, consider [submitting a pull request](https://github.com/epinowcast/epidist/pulls) to implement these methods!
 
-# How can I use the `cmdstanr` backend?
+## How can I use the `marginaleffects` package with my fitted `epidist` model?
+
+The [`marginaleffects`](https://marginaleffects.com/) package provides tools for computing and visualising marginal effects, contrasts, and predictions from regression models. It works with `epidist` models because they are built on top of `brms`.
+
+For `epidist` models with covariates, you can use `marginaleffects` to:
+- Compute average marginal effects
+- Make comparisons between different covariate values
+- Visualise model predictions across the range of covariates
+
+Here's a simple example using a model that includes location as a covariate:
+
+```{r}
+library(marginaleffects)
+
+# Compare outcomes between location categories (0 and 1)
+avg_comparisons(
+  fit_location,
+  variables = list(location = c(0, 1))
+)
+```
+
+For more details on the available functions and their use, see the [`marginaleffects` documentation](https://marginaleffects.com/).
+
+
+## How can I use the `cmdstanr` backend?
 
 The `cmdstanr` backend is typically more performant than the default `rstan` backend.
 To use the `cmdstanr` backend, we first need to install CmdStan (see the README for more details). We can check we have everything we need as follows: