Skip to content

Commit

Permalink
Rename group/group_by arguments into by
Browse files Browse the repository at this point in the history
  • Loading branch information
strengejacke committed May 16, 2024
1 parent 201e979 commit 477c989
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 49 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: report
Type: Package
Title: Automated Reporting of Results and Statistical Models
Version: 0.5.8.2
Version: 0.5.8.3
Authors@R:
c(person(given = "Dominique",
family = "Makowski",
Expand Down
10 changes: 10 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
# report 0.5.10

Breaking

* Arguments named `group`, `at` and `group_by` will be deprecated in future
releases. of _easystats_ packages. Please use `by` instead. This affects
following functions in *report*:

* `report_sample()`

# report 0.5.9

Minor changes
Expand Down
95 changes: 51 additions & 44 deletions R/report_sample.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#' Create sample description table (also referred to as "Table 1").
#'
#' @param data A data frame for which descriptive statistics should be created.
#' @param group_by Character vector, indicating the column(s) for possible grouping
#' @param by Character vector, indicating the column(s) for possible grouping
#' of the descriptive table. Note that weighting (see `weights`) does not work
#' with more than one grouping column.
#' @param centrality Character, indicates the statistics that should be
Expand Down Expand Up @@ -43,6 +43,7 @@
#' @param digits Number of decimals.
#' @param n Logical, actual sample size used in the calculation of the
#' reported descriptive statistics (i.e., without the missing values).
#' @param group_by Deprecated. Use `by` instead.
#' @inheritParams report.data.frame
#'
#' @return A data frame of class `report_sample` with variable names and
Expand All @@ -61,8 +62,8 @@
#'
#' report_sample(iris[, 1:4])
#' report_sample(iris, select = c("Sepal.Length", "Petal.Length", "Species"))
#' report_sample(iris, group_by = "Species")
#' report_sample(airquality, group_by = "Month", n = TRUE, total = FALSE)
#' report_sample(iris, by = "Species")
#' report_sample(airquality, by = "Month", n = TRUE, total = FALSE)
#'
#' # confidence intervals for proportions
#' set.seed(123)
Expand All @@ -72,7 +73,7 @@
#' report_sample(d, ci = 0.95, ci_correct = TRUE) # continuity correction
#' @export
report_sample <- function(data,
group_by = NULL,
by = NULL,
centrality = "mean",
ci = NULL,
ci_method = "wilson",
Expand All @@ -83,7 +84,13 @@ report_sample <- function(data,
total = TRUE,
digits = 2,
n = FALSE,
group_by = NULL,
...) {
## TODO: deprecate later
if (!is.null(group_by)) {
by <- group_by
}

# check for correct input type
if (!is.data.frame(data)) {
data <- tryCatch(
Expand Down Expand Up @@ -119,16 +126,16 @@ report_sample <- function(data,
variables <- setdiff(variables, exclude)
}

# for grouped data frames, use groups as group_by argument
if (inherits(data, "grouped_df") && is.null(group_by)) {
group_by <- setdiff(colnames(attributes(data)$groups), ".rows")
# for grouped data frames, use groups as by argument
if (inherits(data, "grouped_df") && is.null(by)) {
by <- setdiff(colnames(attributes(data)$groups), ".rows")
}

# grouped by?
grouping <- !is.null(group_by) && all(group_by %in% colnames(data))
do_grouping <- !is.null(by) && all(by %in% colnames(data))

# sanity check - weights and grouping
if (!is.null(group_by) && length(group_by) > 1 && !is.null(weights)) {
if (!is.null(by) && length(by) > 1 && !is.null(weights)) {
insight::format_error("Cannot apply `weights` when grouping is done by more than one variable.")
}

Expand All @@ -143,12 +150,12 @@ report_sample <- function(data,
i
})

# coerce group_by columns to factor
groups <- as.data.frame(lapply(data[group_by], factor))
# coerce by columns to factor
groups <- as.data.frame(lapply(data[by], factor))

out <- if (isTRUE(grouping)) {
out <- if (isTRUE(do_grouping)) {
result <- lapply(split(data[variables], groups), function(x) {
x[group_by] <- NULL
x[by] <- NULL
.generate_descriptive_table(
x,
centrality,
Expand All @@ -162,7 +169,7 @@ report_sample <- function(data,
})
# for more than one group, fix column names. we don't want "a.b (n=10)",
# but rather ""a, b (n=10)""
if (length(group_by) > 1) {
if (length(by) > 1) {
old_names <- datawizard::data_unite(
unique(groups),
new_column = ".old_names",
Expand All @@ -179,23 +186,23 @@ report_sample <- function(data,
variable <- result[[1]]["Variable"]
# number of observation, based on weights
if (!is.null(weights)) {

Check warning on line 188 in R/report_sample.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/report_sample.R,line=188,col=9,[if_not_else_linter] Prefer `if (A) x else y` to the less-readable `if (!A) y else x` in a simple if/else statement.
n_obs <- round(as.vector(stats::xtabs(data[[weights]] ~ data[[group_by]])))
n_obs <- round(as.vector(stats::xtabs(data[[weights]] ~ data[[by]])))
} else {
n_obs <- as.vector(table(data[group_by]))
n_obs <- as.vector(table(data[by]))
}
# column names for groups
cn <- sprintf("%s (n=%g)", names(result), n_obs)
# just extract summary columns
summaries <- do.call(cbind, lapply(result, function(i) i["Summary"]))
colnames(summaries) <- cn
# generate data for total column, but make sure to remove missings
total_data <- data[stats::complete.cases(data[group_by]), unique(c(variables, group_by))]
total_data <- data[stats::complete.cases(data[by]), unique(c(variables, by))]
# bind all together, including total column
final <- cbind(
variable,
summaries,
Total = .generate_descriptive_table(
total_data[setdiff(variables, group_by)],
total_data[setdiff(variables, by)],
centrality,
weights,
digits,
Expand All @@ -211,9 +218,9 @@ report_sample <- function(data,
}
# define total N, based on weights
if (!is.null(weights)) {

Check warning on line 220 in R/report_sample.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/report_sample.R,line=220,col=9,[if_not_else_linter] Prefer `if (A) x else y` to the less-readable `if (!A) y else x` in a simple if/else statement.
total_n <- round(sum(as.vector(table(data[group_by]))) * mean(data[[weights]], na.rm = TRUE))
total_n <- round(sum(as.vector(table(data[by]))) * mean(data[[weights]], na.rm = TRUE))
} else {
total_n <- sum(as.vector(table(data[group_by])))
total_n <- sum(as.vector(table(data[by])))
}
# add N to column name
colnames(final)[ncol(final)] <- sprintf(
Expand Down Expand Up @@ -335,36 +342,36 @@ report_sample <- function(data,
weights[is.na(x)] <- NA
weights <- stats::na.omit(weights)
x <- stats::na.omit(x)
proportions <- prop.table(stats::xtabs(weights ~ x))
table_proportions <- prop.table(stats::xtabs(weights ~ x))
} else {
proportions <- prop.table(table(x))
table_proportions <- prop.table(table(x))
}

# for binary factors, just need one level
if (nlevels(x) == 2) {
proportions <- proportions[2]
table_proportions <- table_proportions[2]
}

# CI for proportions?
if (!is.null(ci)) {

Check warning on line 356 in R/report_sample.R

View workflow job for this annotation

GitHub Actions / lint-changed-files / lint-changed-files

file=R/report_sample.R,line=356,col=7,[if_not_else_linter] Prefer `if (A) x else y` to the less-readable `if (!A) y else x` in a simple if/else statement.
ci_low_high <- .ci_proportion(x, proportions, weights, ci, ci_method, ci_correct)
ci_low_high <- .ci_proportion(x, table_proportions, weights, ci, ci_method, ci_correct)
.summary <- sprintf(
"%.1f [%.1f, %.1f]",
100 * proportions,
100 * table_proportions,
100 * ci_low_high$ci_low,
100 * ci_low_high$ci_high
)
} else {
.summary <- sprintf("%.1f", 100 * proportions)
.summary <- sprintf("%.1f", 100 * table_proportions)
}

if (isTRUE(n)) {
.summary <- paste0(.summary, ", ", round(sum(!is.na(x)) * as.vector(proportions)))
.summary <- paste0(.summary, ", ", round(sum(!is.na(x)) * as.vector(table_proportions)))
}

n_label <- ifelse(n, ", n", "")
data.frame(
Variable = sprintf("%s [%s], %%%s", column, names(proportions), n_label),
Variable = sprintf("%s [%s], %%%s", column, names(table_proportions), n_label),
Summary = as.vector(.summary),
stringsAsFactors = FALSE
)
Expand All @@ -377,12 +384,12 @@ report_sample <- function(data,

# Standard error for confidence interval of proportions ----

.ci_proportion <- function(x, proportions, weights, ci, ci_method, ci_correct) {
.ci_proportion <- function(x, table_proportions, weights, ci, ci_method, ci_correct) {
ci_method <- match.arg(tolower(ci_method), c("wald", "wilson"))

# variables
p <- as.vector(proportions)
q <- 1 - p
p <- as.vector(table_proportions)
quant <- 1 - p
n <- length(stats::na.omit(x))
z <- stats::qnorm((1 + ci) / 2)

Expand All @@ -399,21 +406,21 @@ report_sample <- function(data,
if (ci_method == "wilson") {
# Wilson CIs -------------------
if (isTRUE(ci_correct)) {
ci_low <- (2 * n * p + z^2 - 1 - z * sqrt(z^2 - 2 - 1 / n + 4 * p * (n * q + 1))) / (2 * (n + z^2))
ci_high <- (2 * n * p + z^2 + 1 + z * sqrt(z^2 + 2 - 1 / n + 4 * p * (n * q - 1))) / (2 * (n + z^2))
ci_low <- (2 * n * p + z^2 - 1 - z * sqrt(z^2 - 2 - 1 / n + 4 * p * (n * quant + 1))) / (2 * (n + z^2))
ci_high <- (2 * n * p + z^2 + 1 + z * sqrt(z^2 + 2 - 1 / n + 4 * p * (n * quant - 1))) / (2 * (n + z^2))
# close to 0 or 1, then CI is 0 resp. 1
fix <- p < 0.00001 | ci_low < 0.00001
if (any(fix)) {
ci_low[fix] <- 0
fix_ci <- p < 0.00001 | ci_low < 0.00001
if (any(fix_ci)) {
ci_low[fix_ci] <- 0
}
fix <- p > 0.99999 | ci_high > 0.99999
if (any(fix)) {
ci_high[fix] <- 1
fix_ci <- p > 0.99999 | ci_high > 0.99999
if (any(fix_ci)) {
ci_high[fix_ci] <- 1
}
out <- list(ci_low = ci_low, ci_high = ci_high)
} else {
prop <- (2 * n * p) + z^2
moe <- z * sqrt(z^2 + 4 * n * p * q)
moe <- z * sqrt(z^2 + 4 * n * p * quant)
correction <- 2 * (n + z^2)
out <- list(
ci_low = (prop - moe) / correction,
Expand All @@ -422,7 +429,7 @@ report_sample <- function(data,
}
} else {
# Wald CIs -------------------
moe <- z * suppressWarnings(sqrt(p * q / n))
moe <- z * suppressWarnings(sqrt(p * quant / n))
if (isTRUE(ci_correct)) {
moe <- moe + 1 / (2 * n)
}
Expand Down Expand Up @@ -511,9 +518,9 @@ print_md.report_sample <- function(x, layout = "horizontal", ...) {
weights[is.na(x)] <- NA
weights <- stats::na.omit(weights)
x <- stats::na.omit(x)
order <- order(x)
x <- x[order]
weights <- weights[order]
x_order <- order(x)
x <- x[x_order]
weights <- weights[x_order]
rw <- cumsum(weights) / sum(weights)
md_values <- min(which(rw >= p))
if (rw[md_values] == p) {
Expand Down
11 changes: 7 additions & 4 deletions man/report_sample.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 477c989

Please sign in to comment.