Skip to content

Commit

Permalink
Resolves merge conflicts for SII and quantile function updates
Browse files Browse the repository at this point in the history
Merge branch 'DEVELOPMENT_MASTER' into dev_quantiles_smallnumbers

# Conflicts:
#	NEWS.md
#	R/sysdata.rda
#	tests/testthat/testQuantiles.R
  • Loading branch information
GeorgieAnderson committed Nov 3, 2023
2 parents 6cd5555 + c8bc9fa commit 0dff248
Show file tree
Hide file tree
Showing 22 changed files with 899 additions and 387 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,3 @@ vignettes/*.R
inst/doc
data-raw
README.Rmd
data-raw/LoadTestdata.R
8 changes: 5 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: PHEindicatormethods
Type: Package
Version: 2.0.1.9003
Version: 2.0.1.9004
Title: Common Public Health Statistics and their Confidence Intervals
Description: Functions to calculate commonly used public health statistics and
their confidence intervals using methods approved for use in the production
Expand All @@ -26,7 +26,8 @@ Authors@R: c(person("Anderson", "Georgina", email = "[email protected]
person("Fryers", "Paul", email = "[email protected]", role = c("ctb")),
person("Clegg", "Emma", role = c("ctb")),
person("Westermann", "Annabel", email = "[email protected]", role = c("ctb")),
person("Woolner", "Joshua", role = c("ctb"))
person("Woolner", "Joshua", role = c("ctb")),
person("Fellows", "Charlotte", , "[email protected]", c("ctb"))
)
BugReports: https://github.com/ukhsa-collaboration/PHEindicatormethods/issues
Depends: R (>= 3.1.0)
Expand All @@ -46,8 +47,9 @@ Suggests:
knitr,
readxl,
rmarkdown,
testthat,
testthat (>= 3.0.0),
withr
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
VignetteBuilder: knitr
Config/testthat/edition: 3
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ importFrom(purrr,map)
importFrom(purrr,map_chr)
importFrom(rlang,":=")
importFrom(rlang,.data)
importFrom(rlang,.env)
importFrom(rlang,quo_name)
importFrom(rlang,quo_text)
importFrom(rlang,sym)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
## PHEindicatormethods vWIP
* Amended phe_quantile function so it will not produce quantiles when the number of small areas within a group is less than the number of quantiles requested. A warning will be generated when quantiles cannot be produced for this reason.
* removed the highergeog argument from phe_quantile function, previously soft-deprecated in v1.2.0.
* `phe_sii` amended to allow data to be transformed prior to calculation of the
SII, and to allow the intercept value to be output.

## PHEindicatormethods v2.0.1
* `calculate_ISRate` and `calculate_ISRatio` updated so observed events can be passed as total without age breakdowns
Expand Down
2 changes: 1 addition & 1 deletion R/Proportions.R
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ phe_proportion <- function(data, x, n, type="full", confidence=0.95, multiplier=
uppercl = wilson_upper(({{ x }}),({{ n }}),confidence) * multiplier,
confidence = paste(confidence*100,"%",sep=""),
method = "Wilson") |>
relocate(.data$statistic, .after = confidence)
relocate("statistic", .after = "confidence")
}


Expand Down
444 changes: 363 additions & 81 deletions R/SII_function.R

Large diffs are not rendered by default.

Binary file modified R/sysdata.rda
Binary file not shown.
236 changes: 113 additions & 123 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -292,14 +292,14 @@ FindXValues <- function(xvals, no_quantiles){
# -------------------------------------------------------------------------------------------------
#' SimulationFunc
#'
#' Function to simulate SII range through random sampling of the indicator value
#' Function to simulate SII and RII range through random sampling of the indicator value
#' for each quantile, based on the associated mean and standard error
#'
#' @return returns lower and upper SII confidence limits according to user
#' @return returns lower and upper SII and RII confidence limits according to user
#' specified confidence
#'
#' @param data data.frame containing the data to calculate SII confidence limits
#' from; unquoted string; no default
#' @param data data.frame containing the data to calculate SII and RII
#' confidence limits from; unquoted string; no default
#' @param value field name within data that contains the indicator value; unquoted
#' string; no default
#' @param value_type indicates the indicator type (1 = rate, 2 = proportion, 0 = other);
Expand Down Expand Up @@ -337,6 +337,7 @@ SimulationFunc <- function(data,
sqrt_a,
b_sqrt_a,
rii = FALSE,
transform = FALSE,
reliability_stat = FALSE) {

# Use NSE on input fields - apply quotes
Expand All @@ -349,18 +350,22 @@ SimulationFunc <- function(data,
confidence_value <- confidence + ((1 - confidence) / 2)

# Set 10x no. of reps if reliability stats requested
no_reps <- ifelse(reliability_stat == TRUE, 10*repeats, repeats)
no_reps <- if (reliability_stat == TRUE) {
10 * repeats
} else {
repeats
}

# Take random samples from a normal distribution with mean as the indicator value
# sd. as the associated standard error. Store results in a
# (no. of quantiles)x(no. of repeats) dimensional matrix.
yvals <- matrix(stats::rnorm(no_reps*length(pull(data, !!value)), pull(data, !!value), pull(data, !!se)), ncol = no_reps)

# Retransform y values for rates (1) and proportions (2)
if (value_type == 1) {
if (value_type == 1 & transform == FALSE) {
yvals_transformed <- exp(yvals)
} else if (value_type == 2){
yvals_transformed <- exp(yvals)/(1+exp(yvals))
} else if (value_type == 2 & transform == FALSE) {
yvals_transformed <- exp(yvals)/(1 + exp(yvals))
} else {
yvals_transformed <- yvals
}
Expand All @@ -372,15 +377,12 @@ SimulationFunc <- function(data,
# Calculate inverse of (m*transpose (m))*m to use in calculation
# of least squares estimate of regression parameters
# Ref: https://onlinecourses.science.psu.edu/stat501/node/38
invm_m <- solve((m)%*%t(m))%*%m
invm_m <- solve((m) %*% t(m)) %*% m

# Multiply transformed yvals matrix element-wise by sqrta - this weights the sampled
# yvals by a measure of population
final_yvals <- yvals_transformed*pull(data, !!sqrt_a)

# Prepare empty numeric vector to hold parameter results
sloperesults <- numeric(no_reps)

# Define function to matrix multiply invm_m by a vector
matrix_mult <- function(x) {invm_m %*% x}

Expand All @@ -395,25 +397,27 @@ SimulationFunc <- function(data,
# RII only calculations
if (rii == TRUE) {

# Calculate the RII
RII_results <- (params_bsqrta + params_sqrta)/params_sqrta
# Calculate the RII
RII_results <- (params_bsqrta + params_sqrta)/params_sqrta

# Split simulated SII/RIIs into 10 samples if reliability stats requested
if (reliability_stat == TRUE) {
SII_results <- matrix(params_bsqrta, ncol = 10)
RII_results <- matrix(RII_results, ncol = 10)
} else {
SII_results <- matrix(params_bsqrta, ncol = 1)
RII_results <- matrix(RII_results, ncol = 1)
}
# Split simulated SII/RIIs into 10 samples if reliability stats requested
if (reliability_stat == TRUE) {
SII_results <- matrix(params_bsqrta, ncol = 10)
RII_results <- matrix(RII_results, ncol = 10)
} else {
SII_results <- matrix(params_bsqrta, ncol = 1)
RII_results <- matrix(RII_results, ncol = 1)
}

# Apply multiplicative factor to RII
if(multiplier < 0) {
RII_results <- 1/RII_results
}
# Apply multiplicative factor to RII if transform = FALSE
if (multiplier < 0 & transform == FALSE) {
RII_results <- 1/RII_results
} else {
RII_results <- RII_results
}

# Order simulated RIIs from lowest to highest
sortresults_RII <- apply(RII_results, 2, sort, decreasing = FALSE)
# Order simulated RIIs from lowest to highest
sortresults_RII <- apply(RII_results, 2, sort, decreasing = FALSE)

} else {

Expand All @@ -425,8 +429,12 @@ SimulationFunc <- function(data,
}
}

# Apply multiplicative factor to SII
SII_results <- multiplier * SII_results
# Apply multiplicative factor to SII if transform = FALSE
if (transform == FALSE) {
SII_results <- SII_results * multiplier
} else {
SII_results <- SII_results
}

# Order simulated SIIs from lowest to highest
sortresults_SII <- apply(SII_results, 2, sort, decreasing = FALSE)
Expand All @@ -435,116 +443,98 @@ SimulationFunc <- function(data,
# as confidence limits

# position of lower percentile
pos_lower <- round(repeats*(1-confidence_value), digits=0)
pos_lower <- round(repeats * (1 - confidence_value), digits = 0)
# position of upper percentile
pos_upper <- round(repeats*confidence_value, digits=0)
pos_upper <- round(repeats * confidence_value, digits = 0)

# Combine position indexes for SII CLs
pos <- rbind(pos_lower, pos_upper)
colnames(pos) <- formatC(confidence * 100, format = "f", digits = 1)

# Extract SII confidence limits from critical percentiles of first column of samples
SII_lower_cls <- sortresults_SII[pos_lower, 1]
SII_upper_cls <- sortresults_SII[pos_upper, 1]

# Define column names (adding in confidence level)
names(SII_lower_cls) <- paste0("sii_lower",
gsub("\\.", "_", formatC(confidence * 100, format = "f", digits = 1)),
"cl")
names(SII_upper_cls) <- paste0("sii_upper",
gsub("\\.", "_", formatC(confidence * 100, format = "f", digits = 1)),
"cl")

# Combine lower and upper SII CLs
SII_cls <- t(c(SII_lower_cls, SII_upper_cls))

# CASE 1 - Calculate RII CLs if requested
if(rii == TRUE) {

# Extract RII confidence limits from critical percentiles of first column of samples
RII_lower_cls <- sortresults_RII[pos_lower, 1]
RII_upper_cls <- sortresults_RII[pos_upper, 1]

# Define column names (adding in confidence level)
names(RII_lower_cls) <- paste0("rii_lower",
gsub("\\.", "_", formatC(confidence * 100, format = "f", digits = 1)),
"cl")
names(RII_upper_cls) <- paste0("rii_upper",
gsub("\\.", "_", formatC(confidence * 100, format = "f", digits = 1)),
"cl")

# Combine lower and upper RII CLs
RII_cls <- t(c(RII_lower_cls, RII_upper_cls))

if (reliability_stat == TRUE) {

# Calculate variability by taking the absolute difference between each of the lower/upper
# limits in the additional 9 sample sets and the initial lower/upper limits
SII_diffs <- apply(pos, 2, function(x) { # run function over each column of "pos" (i.e. for each confidence)

diffs_sample_original <- t(apply(sortresults_SII[c(x[1], x[2]),], 1, function(y) abs(y - y[1])))

# Calculate mean absolute difference over all 18 differences
sii_mad <- mean(diffs_sample_original[, 2:10])
})

RII_diffs <- apply(pos, 2, function(x) { # run function over each column of "pos" (i.e. for each confidence)
results_SII <- data.frame(sortresults_SII[pos, , drop = FALSE])
names(results_SII) <- paste0("Rep_", seq_along(results_SII))
rownames(results_SII) <- paste0(
rep(c("sii_lower", "sii_upper"), length(confidence)),
rep(paste0(gsub("\\.", "_", formatC(confidence * 100, format = "f", digits = 1)), "cl"), each = 2)
)

diffs_sample_original <- t(apply(sortresults_RII[c(x[1], x[2]),], 1, function(y) abs(y - y[1])))

# Calculate mean absolute difference over all 18 differences
rii_mad <- mean(diffs_sample_original[, 2:10])
})

# Define column names (adding in confidence level)
names(SII_diffs) <- paste0("sii_mad",
gsub("\\.", "_", formatC(confidence * 100, format = "f", digits = 1)))
names(RII_diffs) <- paste0("rii_mad",
gsub("\\.", "_", formatC(confidence * 100, format = "f", digits = 1)))

# Return SII confidence limits from first of the 10 samples, plus the
# reliability measures
results <- cbind(SII_cls, RII_cls, t(SII_diffs), t(RII_diffs))


} else {
# Return SII and RII confidence limits from single sample taken
results <- cbind(SII_cls, RII_cls)
}

# CASE 2 - Return SII stats only
} else {
if (isFALSE(rii)) {
results <- data.frame(t(results_SII))
}

if (reliability_stat == TRUE) {
if (isTRUE(rii)) {
results_RII <- data.frame(sortresults_RII[pos, , drop = FALSE])
names(results_RII) <- paste0("Rep_", seq_along(results_RII))
rownames(results_RII) <- paste0(
rep(c("rii_lower", "rii_upper"), length(confidence)),
rep(paste0(gsub("\\.", "_", formatC(confidence * 100, format = "f", digits = 1)), "cl"), each = 2)
)

# Calculate variability by taking the absolute difference between each of the lower/upper
# limits in the additional 9 sample sets and the initial lower/upper limits
SII_diffs <- apply(pos, 2, function(x) { # run function over each column of "pos" (i.e. for each confidence)
results <- data.frame(t(bind_rows(results_SII, results_RII)))
}

diffs_sample_original <- t(apply(sortresults_SII[c(x[1], x[2]),], 1, function(y) abs(y - y[1])))
results

# Calculate mean absolute difference over all 18 differences
sii_MAD <- mean(diffs_sample_original[, 2:10])
})
}

# Define column names (adding in confidence level)
names(SII_diffs) <- paste0("sii_mad",
gsub("\\.", "_", formatC(confidence * 100, format = "f", digits = 1)))

# Return SII confidence limits from first of the 10 samples, plus the
# reliability measures
results <- cbind(SII_cls, t(SII_diffs))
# ------------------------------------------------------------------------------
#' SII reliability stats
#' @param CI_data A nested dataframe containing the SII and RII CIs for each rep
#' @param confidence Confidence level used to calculate the lower and upper confidence limits of SII;
#' numeric between 0.5 and 0.9999 or 50 and 99.99; default 0.95
#' @param rii Option to return the Relative Index of Inequality (RII) with
#' associated confidence limits
#'
#' @return a data frame
#'
#' @noRd
# ------------------------------------------------------------------------------

} else {
# Return SII confidence limits only from single sample taken
results <- SII_cls
}
}
calc_reliability <- function(CI_data,
confidence,
rii) {

groups <- group_vars(CI_data)

reliabity_stats <- CI_data %>%
mutate(
reliabity_stats_data = purrr::map(.data$CI_calcs, function(x){
# Calculate mean average difference in SII and RII from first rep
diffs_sample_original <- x |>
mutate(across(everything(), function(y) {abs(y - y[1])})) |>
slice(-1)

map(confidence, function(conf) {
conf_formatted <-
gsub("\\.", "_", formatC(conf * 100, format = "f", digits = 1))

if (isTRUE(rii)) {
diffs_sample_original |>
select(contains(conf_formatted)) |>
summarise(
"sii_mad{conf_formatted}" := mean(c_across(contains("sii"))),
"rii_mad{conf_formatted}" := mean(c_across(contains("rii")))
)
} else {
diffs_sample_original |>
select(contains(conf_formatted)) |>
summarise(
"sii_mad{conf_formatted}" := mean(c_across(contains("sii")))
)
}

}) |>
bind_cols()

return(data.frame(results))
}
)
) |>
select(all_of(groups), "reliabity_stats_data") |>
unnest("reliabity_stats_data")

}


# ------------------------------------------------------------------------------
#' Poisson Function for funnel plots for ratios and rates
#'
Expand Down
Loading

0 comments on commit 0dff248

Please sign in to comment.