Add method for Fischer's exact test (#395)

* add fisher * linter, simplify tests * bump effectsize version to get fei effect size output and update snapshots * Trigger lintr tests * update snapshots * Add all packages to Remotes in DESCRIPTION * add commas * change order of Remotes, header, remove tab
easystats · Oct 22, 2023 · bca974c · bca974c
1 parent da67740
commit bca974c
Show file tree

Hide file tree

Showing 13 changed files with 214 additions and 280 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
-Type: Package
 Package: report
+Type: Package
 Title: Automated Reporting of Results and Statistical Models
-Version: 0.5.7.11
+Version: 0.5.7.12
 Authors@R:
     c(person(given = "Dominique",
              family = "Makowski",
@@ -56,7 +56,7 @@ Depends:
     R (>= 3.6)
 Imports:
     bayestestR (>= 0.13.0),
-    effectsize (>= 0.8.5),
+    effectsize (>= 0.8.6.1),
     insight (>= 0.19.3.2),
     parameters (>= 0.20.2),
     performance (>= 0.10.2),
@@ -125,6 +125,7 @@ Collate:
     'report_effectsize.R'
     'report_htest_chi2.R'
     'report_htest_cor.R'
+    'report_htest_fisher.R'
     'report_htest_friedman.R'
     'report_htest_ttest.R'
     'report_htest_wilcox.R'
@@ -146,4 +147,8 @@ Collate:
 Roxygen: list(markdown = TRUE)
 Remotes: 
   easystats/insight,
-  easystats/effectsize
+  easystats/datawizard,
+  easystats/parameters,
+  easystats/effectsize,
+  easystats/bayestestR,
+  easystats/performance
diff --git a/NEWS.md b/NEWS.md
@@ -2,11 +2,11 @@
 
 Major Changes
 
-* This release changes the licensing model of `{see}` to an MIT license.
+* This release changes the licensing model of `{see}` to an MIT license. 
 
 Minor changes 
 
-* `report` now supports variables of class `htest` for the Friedman test.
+* `report` now supports variables of class `htest` for the Chi2, Friedman test, and Fisher's exact test.
 
 * `report` now supports variables of class `Date`, treating them like factors.
 

diff --git a/R/report.htest.R b/R/report.htest.R
@@ -74,7 +74,11 @@ report_effectsize.htest <- function(x, ...) {
   ## For Chi2 ---------------
 
   if (model_info$is_chi2test) {
-    out <- .report_effectsize_chi2(x, table, dot_args)
+    if (chi2_type(x) == "fisher") {
+      out <- .report_effectsize_fisher(x, table, dot_args)
+    } else {
+      out <- .report_effectsize_chi2(x, table, dot_args)
+    }
   }
 
   # TODO: Chi-squared test -------------
@@ -161,7 +165,10 @@ report_statistics.htest <- function(x, table = NULL, ...) {
   text <- NULL
 
   # Estimate
-  candidates <- c("rho", "r", "tau", "Difference", "r_rank_biserial", "Chi2")
+  candidates <- c(
+    "rho", "r", "tau", "Difference", "r_rank_biserial",
+    "Chi2", "Odds Ratio"
+  )
   estimate <- candidates[candidates %in% names(table)][1]
   if (!is.null(estimate) && !is.na(estimate)) {
     text <- paste0(tolower(estimate), " = ", insight::format_value(table[[estimate]]))
@@ -257,7 +264,11 @@ report_parameters.htest <- function(x, table = NULL, ...) {
     out <- .report_parameters_friedman(table, stats, effsize, ...)
     # chi2
   } else if (model_info$is_chi2test) {
-    out <- .report_parameters_chi2(table, stats, effsize, ...)
+    if (chi2_type(x) == "fisher") {
+      out <- .report_parameters_fisher(table, stats, effsize, ...)
+    } else {
+      out <- .report_parameters_chi2(table, stats, effsize, ...)
+    }
   } else {
     # TODO: default, same as t-test?
     out <- .report_parameters_htest_default(table, stats, effsize, ...)
@@ -312,7 +323,11 @@ report_model.htest <- function(x, table = NULL, ...) {
   }
 
   if (model_info$is_chi2test) {
-    text <- .report_model_chi2(x, table)
+    if (chi2_type(x) == "fisher") {
+      text <- .report_model_fisher(x, table)
+    } else {
+      text <- .report_model_chi2(x, table)
+    }
   }
 
   as.report_model(text, summary = text)

diff --git a/R/report_htest_fisher.R b/R/report_htest_fisher.R
@@ -0,0 +1,111 @@
+# report_table -----------------
+
+.report_table_fisher <- function(table_full, effsize) {
+  table_full <- cbind(table_full, attributes(effsize)$table)
+  list(table = NULL, table_full = table_full)
+}
+
+
+# report_effectsize ---------------------
+
+.report_effectsize_fisher <- function(x, table, dot_args, rules = "funder2019") {
+  args <- c(list(x), dot_args)
+  table <- do.call(effectsize::effectsize, args)
+  ci <- attributes(table)$ci
+  estimate <- names(table)[1]
+  rules <- ifelse(is.null(dot_args$rules), rules, dot_args$rules)
+
+  args <- list(table, rules = rules, dot_args)
+  interpretation <- do.call(effectsize::interpret, args)$Interpretation
+  rules <- .text_effectsize(attr(attr(interpretation, "rules"), "rule_name"))
+
+  if (estimate == "Cramers_v_adjusted") {
+    main <- paste0("Adjusted Cramer's v = ", insight::format_value(table[[estimate]]))
+  } else if (estimate == "Tschuprows_t") {
+    main <- paste0("Tschuprow's t = ", insight::format_value(table[[estimate]]))
+  } else if (estimate == "Tschuprows_t_adjusted") {
+    main <- paste0("Adjusted Tschuprow's t = ", insight::format_value(table[[estimate]]))
+  } else if (estimate == "Pearsons_c") {
+    main <- paste0("Pearson's c = ", insight::format_value(table[[estimate]]))
+  } else if (estimate == "phi_adjusted") {
+    main <- paste0("Adjusted Phi = ", insight::format_value(table[[estimate]]))
+  } else if (estimate == "Cohens_h") {
+    main <- paste0("Cohen's h = ", insight::format_value(table[[estimate]]))
+  } else if (estimate == "Odds_ratio") {
+    main <- paste0("Odds ratio = ", insight::format_value(table[[estimate]]))
+  } else if (estimate == "Ris_kratio") {
+    main <- paste0("Risk ratio = ", insight::format_value(table[[estimate]]))
+  } else if (estimate == "cohens_h") {
+    main <- paste0("Cohen's w = ", insight::format_value(table[[estimate]]))
+  } else {
+    main <- paste0(estimate, " = ", insight::format_value(table[[estimate]]))
+  }
+
+  statistics <- paste0(
+    main,
+    ", ",
+    insight::format_ci(table$CI_low, table$CI_high, ci)
+  )
+
+  table <- datawizard::data_rename(
+    as.data.frame(table),
+    c("CI_low", "CI_high"),
+    paste0(estimate, c("_CI_low", "_CI_high"))
+  )
+
+  table <- table[c(estimate, paste0(estimate, c("_CI_low", "_CI_high")))]
+
+  list(
+    table = table, statistics = statistics, interpretation = interpretation,
+    rules = rules, ci = ci, main = main
+  )
+}
+
+# report_model ----------------------------
+
+.report_model_fisher <- function(x, table) {
+  vars_full <- paste0(names(attributes(x$observed)$dimnames), collapse = " and ")
+
+  text <- paste0(
+    trimws(x$method),
+    " testing the association between the variables of the ",
+    x$data.name, " dataset "
+  )
+
+  text
+}
+
+chi2_type <- function(x) {
+  if (grepl("probabilities", x$method, fixed = TRUE)) {
+    out <- "probabilities"
+  } else if (grepl("Pearson", x$method, fixed = TRUE)) {
+    out <- "pearson"
+  } else if (grepl("Fisher", x$method, fixed = TRUE)) {
+    out <- "fisher"
+  }
+  out
+}
+
+.report_parameters_fisher <- function(table, stats, effsize, ...) {
+  text_full <- paste0(
+    "statistically ",
+    effectsize::interpret_p(table$p, rules = "default"),
+    ", and ",
+    attributes(effsize)$interpretation,
+    " (",
+    stats,
+    ")"
+  )
+
+  text_short <- paste0(
+    "statistically ",
+    effectsize::interpret_p(table$p, rules = "default"),
+    ", and ",
+    attributes(effsize)$interpretation,
+    " (",
+    summary(stats),
+    ")"
+  )
+
+  list(text_short = text_short, text_full = text_full)
+}
diff --git a/tests/testthat/_snaps/windows/report.brmsfit.md b/tests/testthat/_snaps/windows/report.brmsfit.md
@@ -2,7 +2,7 @@
 
     Code
       report(model, verbose = FALSE)
-    Message <simpleMessage>
+    Message
       Start sampling
     Output
       We fitted a Bayesian linear model (estimated using MCMC sampling with 4 chains
@@ -30,13 +30,14 @@
       (Highest Density Interval), along the probability of direction (pd), the
       probability of significance and the probability of being large. The thresholds
       beyond which the effect is considered as significant (i.e., non-negligible) and
-      large are |0.30| and |1.81|. Convergence and stability of the Bayesian sampling
-      has been assessed using R-hat, which should be below 1.01 (Vehtari et al.,
-      2019), and Effective Sample Size (ESS), which should be greater than 1000
-      (Burkner, 2017)., We fitted a Bayesian linear model (estimated using MCMC
-      sampling with 4 chains of 300 iterations and a warmup of 150) to predict mpg
-      with qsec and wt (formula: mpg ~ qsec + wt). Priors over parameters were set as
-      uniform (location = , scale = ) distributions. The model's explanatory power is
+      large are |0.30| and |1.81| (corresponding respectively to 0.05 and 0.30 of the
+      outcome's SD). Convergence and stability of the Bayesian sampling has been
+      assessed using R-hat, which should be below 1.01 (Vehtari et al., 2019), and
+      Effective Sample Size (ESS), which should be greater than 1000 (Burkner,
+      2017)., We fitted a Bayesian linear model (estimated using MCMC sampling with 4
+      chains of 300 iterations and a warmup of 150) to predict mpg with qsec and wt
+      (formula: mpg ~ qsec + wt). Priors over parameters were set as uniform
+      (location = , scale = ) distributions. The model's explanatory power is
       substantial (R2 = 0.82, 95% CI [0.75, 0.85], adj. R2 = 0.79).  Within this
       model:
       
@@ -58,13 +59,14 @@
       (Highest Density Interval), along the probability of direction (pd), the
       probability of significance and the probability of being large. The thresholds
       beyond which the effect is considered as significant (i.e., non-negligible) and
-      large are |0.30| and |1.81|. Convergence and stability of the Bayesian sampling
-      has been assessed using R-hat, which should be below 1.01 (Vehtari et al.,
-      2019), and Effective Sample Size (ESS), which should be greater than 1000
-      (Burkner, 2017)., We fitted a Bayesian linear model (estimated using MCMC
-      sampling with 4 chains of 300 iterations and a warmup of 150) to predict mpg
-      with qsec and wt (formula: mpg ~ qsec + wt). Priors over parameters were set as
-      uniform (location = , scale = ) distributions. The model's explanatory power is
+      large are |0.30| and |1.81| (corresponding respectively to 0.05 and 0.30 of the
+      outcome's SD). Convergence and stability of the Bayesian sampling has been
+      assessed using R-hat, which should be below 1.01 (Vehtari et al., 2019), and
+      Effective Sample Size (ESS), which should be greater than 1000 (Burkner,
+      2017)., We fitted a Bayesian linear model (estimated using MCMC sampling with 4
+      chains of 300 iterations and a warmup of 150) to predict mpg with qsec and wt
+      (formula: mpg ~ qsec + wt). Priors over parameters were set as uniform
+      (location = , scale = ) distributions. The model's explanatory power is
       substantial (R2 = 0.82, 95% CI [0.75, 0.85], adj. R2 = 0.79).  Within this
       model:
       
@@ -86,15 +88,16 @@
       (Highest Density Interval), along the probability of direction (pd), the
       probability of significance and the probability of being large. The thresholds
       beyond which the effect is considered as significant (i.e., non-negligible) and
-      large are |0.30| and |1.81|. Convergence and stability of the Bayesian sampling
-      has been assessed using R-hat, which should be below 1.01 (Vehtari et al.,
-      2019), and Effective Sample Size (ESS), which should be greater than 1000
-      (Burkner, 2017). and We fitted a Bayesian linear model (estimated using MCMC
-      sampling with 4 chains of 300 iterations and a warmup of 150) to predict mpg
-      with qsec and wt (formula: mpg ~ qsec + wt). Priors over parameters were set as
-      student_t (location = 0.00, scale = 5.40) distributions. The model's
-      explanatory power is substantial (R2 = 0.82, 95% CI [0.75, 0.85], adj. R2 =
-      0.79).  Within this model:
+      large are |0.30| and |1.81| (corresponding respectively to 0.05 and 0.30 of the
+      outcome's SD). Convergence and stability of the Bayesian sampling has been
+      assessed using R-hat, which should be below 1.01 (Vehtari et al., 2019), and
+      Effective Sample Size (ESS), which should be greater than 1000 (Burkner, 2017).
+      and We fitted a Bayesian linear model (estimated using MCMC sampling with 4
+      chains of 300 iterations and a warmup of 150) to predict mpg with qsec and wt
+      (formula: mpg ~ qsec + wt). Priors over parameters were set as student_t
+      (location = 0.00, scale = 5.40) distributions. The model's explanatory power is
+      substantial (R2 = 0.82, 95% CI [0.75, 0.85], adj. R2 = 0.79).  Within this
+      model:
       
         - The effect of b Intercept (Median = 19.74, 95% CI [9.45, 32.02]) has a 99.83%
       probability of being positive (> 0), 99.83% of being significant (> 0.30), and
@@ -114,8 +117,8 @@
       (Highest Density Interval), along the probability of direction (pd), the
       probability of significance and the probability of being large. The thresholds
       beyond which the effect is considered as significant (i.e., non-negligible) and
-      large are |0.30| and |1.81|. Convergence and stability of the Bayesian sampling
-      has been assessed using R-hat, which should be below 1.01 (Vehtari et al.,
-      2019), and Effective Sample Size (ESS), which should be greater than 1000
-      (Burkner, 2017).
+      large are |0.30| and |1.81| (corresponding respectively to 0.05 and 0.30 of the
+      outcome's SD). Convergence and stability of the Bayesian sampling has been
+      assessed using R-hat, which should be below 1.01 (Vehtari et al., 2019), and
+      Effective Sample Size (ESS), which should be greater than 1000 (Burkner, 2017).