diff --git a/.Rbuildignore b/.Rbuildignore index 3b44336..cc3b90c 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -12,3 +12,4 @@ ^doc$ ^Meta$ ^\.vscode$ +^dev/ diff --git a/NEWS.md b/NEWS.md index 0e95ca5..7732ec4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,10 +7,11 @@ ## Other changes * reparameterized the Blume-capel model to use (score-baseline) instead of score. +* implemented a new way to compute the denominators and probabilities. This made their computation both faster and more stable. ## Bug fixes -* Fixed numerical problems with Blume-Capel variables using HMC and NUTS for bgm(). +* fixed numerical problems with Blume-Capel variables using HMC and NUTS. # bgms 0.1.6.1 @@ -22,9 +23,9 @@ ## Bug fixes -* Fixed a problem with warmup scheduling for adaptive-metropolis in bgmCompare() -* Fixed stability problems with parallel sampling for bgm() -* Fixed spurious output errors printing to console after user interrupt. +* fixed a problem with warmup scheduling for adaptive-metropolis in bgmCompare() +* fixed stability problems with parallel sampling for bgm() +* fixed spurious output errors printing to console after user interrupt. # bgms 0.1.6.0 diff --git a/R/RcppExports.R b/R/RcppExports.R index 6e04e6f..eb46298 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -25,8 +25,8 @@ sample_omrf_gibbs <- function(no_states, no_variables, no_categories, interactio .Call(`_bgms_sample_omrf_gibbs`, no_states, no_variables, no_categories, interactions, thresholds, iter) } -sample_bcomrf_gibbs <- function(no_states, no_variables, no_categories, interactions, thresholds, variable_type, reference_category, iter) { - .Call(`_bgms_sample_bcomrf_gibbs`, no_states, no_variables, no_categories, interactions, thresholds, variable_type, reference_category, iter) +sample_bcomrf_gibbs <- function(no_states, no_variables, no_categories, interactions, thresholds, variable_type, baseline_category, iter) { + .Call(`_bgms_sample_bcomrf_gibbs`, no_states, no_variables, no_categories, interactions, thresholds, variable_type, baseline_category, iter) } compute_Vn_mfm_sbm <- function(no_variables, dirichlet_alpha, t_max, lambda) { diff --git a/R/bgm.R b/R/bgm.R index 4d9e176..ad4c3a0 100644 --- a/R/bgm.R +++ b/R/bgm.R @@ -560,8 +560,9 @@ bgm = function( # Ordinal (variable_bool == TRUE) or Blume-Capel (variable_bool == FALSE) bc_vars = which(!variable_bool) for(i in bc_vars) { - blume_capel_stats[1, i] = sum(x[, i]) - blume_capel_stats[2, i] = sum((x[, i] - baseline_category[i])^2) + blume_capel_stats[1, i] = sum(x[, i] - baseline_category[i]) + blume_capel_stats[2, i] = sum((x[, i] - baseline_category[i]) ^ 2) + x[, i] = x[, i] - baseline_category[i] } } pairwise_stats = t(x) %*% x @@ -627,7 +628,6 @@ bgm = function( nThreads = cores, seed = seed, progress_type = progress_type ) - userInterrupt = any(vapply(out, FUN = `[[`, FUN.VALUE = logical(1L), "userInterrupt")) if(userInterrupt) { warning("Stopped sampling after user interrupt, results are likely uninterpretable.") diff --git a/R/bgmCompare.R b/R/bgmCompare.R index 6478ff9..ecd3968 100644 --- a/R/bgmCompare.R +++ b/R/bgmCompare.R @@ -321,7 +321,7 @@ bgmCompare = function( } else if(update_method == "hamiltonian-mc") { target_accept = 0.65 } else if(update_method == "nuts") { - target_accept = 0.80 + target_accept = 0.65 } } @@ -414,13 +414,15 @@ bgmCompare = function( blume_capel_stats = compute_blume_capel_stats( x, baseline_category, ordinal_variable, group ) + for (i in which(!ordinal_variable)) { + x[, i] = x[, i] - baseline_category[i] + } # Compute sufficient statistics for pairwise interactions pairwise_stats = compute_pairwise_stats( x, group ) - # Index vector used to sample interactions in a random order ----------------- Index = matrix(0, nrow = num_interactions, ncol = 3) counter = 0 @@ -490,7 +492,6 @@ bgmCompare = function( seed <- as.integer(seed) - # Call the Rcpp function out = run_bgmCompare_parallel( observations = observations, diff --git a/R/data_utils.R b/R/data_utils.R index 6559360..6756704 100644 --- a/R/data_utils.R +++ b/R/data_utils.R @@ -243,7 +243,7 @@ compute_counts_per_category = function(x, num_categories, group = NULL) { counts_per_category_gr[category, variable] = sum(x[group == g, variable] == category) } } - counts_per_category[[g]] = counts_per_category_gr + counts_per_category[[length(counts_per_category) + 1]] = counts_per_category_gr } return(counts_per_category) } @@ -253,9 +253,9 @@ compute_blume_capel_stats = function(x, baseline_category, ordinal_variable, gro if(is.null(group)) { # One-group design sufficient_stats = matrix(0, nrow = 2, ncol = ncol(x)) bc_vars = which(!ordinal_variable) - for(i in bc_vars) { - sufficient_stats[1, i] = sum(x[, i]) - sufficient_stats[2, i] = sum((x[, i] - baseline_category[i])^2) + for (i in bc_vars) { + sufficient_stats[1, i] = sum(x[, i] - baseline_category[i]) + sufficient_stats[2, i] = sum((x[, i] - baseline_category[i]) ^ 2) } return(sufficient_stats) } else { # Multi-group design @@ -263,11 +263,11 @@ compute_blume_capel_stats = function(x, baseline_category, ordinal_variable, gro for(g in unique(group)) { sufficient_stats_gr = matrix(0, nrow = 2, ncol = ncol(x)) bc_vars = which(!ordinal_variable) - for(i in bc_vars) { - sufficient_stats_gr[1, i] = sum(x[group == g, i]) - sufficient_stats_gr[2, i] = sum((x[group == g, i] - baseline_category[i])^2) + for (i in bc_vars) { + sufficient_stats_gr[1, i] = sum(x[group == g, i] - baseline_category[i]) + sufficient_stats_gr[2, i] = sum((x[group == g, i] - baseline_category[i]) ^ 2) } - sufficient_stats[[g]] = sufficient_stats_gr + sufficient_stats[[length(sufficient_stats) + 1]] = sufficient_stats_gr } return(sufficient_stats) } @@ -275,12 +275,12 @@ compute_blume_capel_stats = function(x, baseline_category, ordinal_variable, gro # Helper function for computing sufficient statistics for pairwise interactions compute_pairwise_stats <- function(x, group) { - result <- vector("list", length(unique(group))) + result <- list() for(g in unique(group)) { obs <- x[group == g, , drop = FALSE] # cross-product: gives number of co-occurrences of categories - result[[g]] <- t(obs) %*% obs + result[[length(result) + 1]] <- t(obs) %*% obs } result diff --git a/R/nuts_diagnostics.R b/R/nuts_diagnostics.R index dfb9e61..ba47746 100644 --- a/R/nuts_diagnostics.R +++ b/R/nuts_diagnostics.R @@ -42,18 +42,16 @@ summarize_nuts_diagnostics <- function(out, nuts_max_depth = 10, verbose = TRUE) 100 * divergence_rate, total_divergences, nrow(divergent_mat) * ncol(divergent_mat) - ), "Consider increasing the target acceptance rate.") - } else if(divergence_rate > 0) { - message( - sprintf( - "Note: %.3f%% of transitions ended with a divergence (%d of %d).\n", - 100 * divergence_rate, - total_divergences, - nrow(divergent_mat) * ncol(divergent_mat) - ), - "Check R-hat and effective sample size (ESS) to ensure the chains are\n", - "mixing well." - ) + ), "Consider increasing the target acceptance rate or change to update_method = ``adaptive-metropolis''.") + } else if (divergence_rate > 0) { + message(sprintf( + "Note: %.3f%% of transitions ended with a divergence (%d of %d).\n", + 100 * divergence_rate, + total_divergences, + nrow(divergent_mat) * ncol(divergent_mat) + ), + "Check R-hat and effective sample size (ESS) to ensure the chains are\n", + "mixing well.") } depth_hit_rate <- max_tree_depth_hits / (nrow(treedepth_mat) * ncol(treedepth_mat)) @@ -84,16 +82,14 @@ summarize_nuts_diagnostics <- function(out, nuts_max_depth = 10, verbose = TRUE) low_ebfmi_chains <- which(ebfmi_per_chain < 0.3) min_ebfmi <- min(ebfmi_per_chain) - if(length(low_ebfmi_chains) > 0) { - warning( - sprintf( - "E-BFMI below 0.3 detected in %d chain(s): %s.\n", - length(low_ebfmi_chains), - paste(low_ebfmi_chains, collapse = ", ") - ), - "This suggests inefficient momentum resampling in those chains.\n", - "Sampling efficiency may be reduced. Consider longer chains or checking convergence diagnostics." - ) + if (length(low_ebfmi_chains) > 0) { + warning(sprintf( + "E-BFMI below 0.3 detected in %d chain(s): %s.\n", + length(low_ebfmi_chains), + paste(low_ebfmi_chains, collapse = ", ") + ), + "This suggests inefficient momentum resampling in those chains.\n", + "Sampling efficiency may be reduced. Consider longer chains and check convergence diagnostics.") } } diff --git a/R/sampleMRF.R b/R/sampleMRF.R index 487c627..caacdf3 100644 --- a/R/sampleMRF.R +++ b/R/sampleMRF.R @@ -13,7 +13,7 @@ #' in specifying their model. #' #' The Blume-Capel option is specifically designed for ordinal variables that -#' have a special type of reference_category category, such as the neutral +#' have a special type of baseline_category category, such as the neutral #' category in a Likert scale. The Blume-Capel model specifies the following #' quadratic model for the threshold parameters: #' \deqn{\mu_{\text{c}} = \alpha \times \text{c} + \beta \times (\text{c} - \text{r})^2,}{{\mu_{\text{c}} = \alpha \times \text{c} + \beta \times (\text{c} - \text{r})^2,}} @@ -23,8 +23,8 @@ #' \eqn{\alpha > 0}{\alpha > 0} and decreasing threshold values if #' \eqn{\alpha <0}{\alpha <0}), if \eqn{\beta < 0}{\beta < 0}, it offers an #' increasing penalty for responding in a category further away from the -#' reference_category category r, while \eqn{\beta > 0}{\beta > 0} suggests a -#' preference for responding in the reference_category category. +#' baseline_category category r, while \eqn{\beta > 0}{\beta > 0} suggests a +#' preference for responding in the baseline_category category. #' #' @param no_states The number of states of the ordinal MRF to be generated. #' @@ -53,8 +53,8 @@ #' ``blume-capel''. Binary variables are automatically treated as ``ordinal’’. #' Defaults to \code{variable_type = "ordinal"}. #' -#' @param reference_category An integer vector of length \code{no_variables} specifying the -#' reference_category category that is used for the Blume-Capel model (details below). +#' @param baseline_category An integer vector of length \code{no_variables} specifying the +#' baseline_category category that is used for the Blume-Capel model (details below). #' Can be any integer value between \code{0} and \code{no_categories} (or #' \code{no_categories[i]}). #' @@ -106,7 +106,7 @@ #' interactions = Interactions, #' thresholds = Thresholds, #' variable_type = c("b", "b", "o", "b", "o"), -#' reference_category = 2 +#' baseline_category = 2 #' ) #' #' @export @@ -116,7 +116,7 @@ mrfSampler = function(no_states, interactions, thresholds, variable_type = "ordinal", - reference_category, + baseline_category, iter = 1e3) { # Check no_states, no_variables, iter -------------------------------------------- if(no_states <= 0 || @@ -187,24 +187,20 @@ mrfSampler = function(no_states, } } - # Check the reference_category for Blume-Capel variables --------------------- + # Check the baseline_category for Blume-Capel variables --------------------- if(any(variable_type == "blume-capel")) { - if(length(reference_category) == 1) { - reference_category = rep(reference_category, no_variables) + if(length(baseline_category) == 1) { + baseline_category = rep(baseline_category, no_variables) } - if(any(reference_category < 0) || any(abs(reference_category - round(reference_category)) > .Machine$double.eps)) { - stop(paste0( - "For variables ", - which(reference_category < 0), - " ``reference_category'' was either negative or not integer." - )) + if(any(baseline_category < 0) || any(abs(baseline_category - round(baseline_category)) > .Machine$double.eps)) { + stop(paste0("For variables ", + which(baseline_category < 0), + " ``baseline_category'' was either negative or not integer.")) } - if(any(reference_category - no_categories > 0)) { - stop(paste0( - "For variables ", - which(reference_category - no_categories > 0), - " the ``reference_category'' category was larger than the maximum category value." - )) + if(any(baseline_category - no_categories > 0)) { + stop(paste0("For variables ", + which(baseline_category - no_categories > 0), + " the ``baseline_category'' category was larger than the maximum category value.")) } } @@ -347,7 +343,7 @@ mrfSampler = function(no_states, interactions = interactions, thresholds = thresholds, variable_type = variable_type, - reference_category = reference_category, + baseline_category = baseline_category, iter = iter ) } diff --git a/dev/numerical_analyses/bgm_blumecapel_normalization_PL.R b/dev/numerical_analyses/bgm_blumecapel_normalization_PL.R new file mode 100644 index 0000000..2635985 --- /dev/null +++ b/dev/numerical_analyses/bgm_blumecapel_normalization_PL.R @@ -0,0 +1,647 @@ +# ============================================================================== +# Blume–Capel Numerical Stability Study (reparametrized) +# File: dev/numerical_analyses/BCvar_normalization_PL.r +# +# Goal +# ---- +# Compare numerical stability of four ways to compute the Blume–Capel +# normalizing constant across a range of residual scores r, using the +# reparametrized form +# +# Z(r) = sum_{s=0}^C exp( θ_part(s) + s * r ), +# +# where +# +# θ_part(s) = θ_lin * (s - ref) + θ_quad * (s - ref)^2. +# +# This corresponds to the reformulated denominator where: +# - scores s are in {0, 1, ..., C}, +# - the quadratic/linear θ-part is in terms of the centered score (s - ref), +# - the “residual” r enters only through s * r. +# +# Methods (exactly four): +# 1) Direct +# Unbounded sum of exp(θ_part(s) + s * r). +# +# 2) Preexp +# Unbounded “power-chain” over s, precomputing exp(θ_part(s)) and +# reusing exp(r): +# Z(r) = sum_s exp(θ_part(s)) * (exp(r))^s . +# +# 3) Direct + max-bound +# Per-r max-term bound M(r) = max_s (θ_part(s) + s * r), +# computing +# Z(r) = exp(M(r)) * sum_s exp(θ_part(s) + s * r - M(r)), +# but returning only the *scaled* sum: +# sum_s exp(θ_part(s) + s * r - M(r)). +# +# 4) Preexp + max-bound +# Same max-term bound M(r) as in (3), but using the power-chain: +# sum_s exp(θ_part(s)) * exp(s * r - M(r)). +# +# References (for error calculation): +# - ref_unscaled = MPFR sum_s exp(θ_part(s) + s * r) +# - ref_scaled = MPFR sum_s exp(θ_part(s) + s * r - M(r)), +# where M(r) = max_s (θ_part(s) + s * r) in MPFR. +# +# Dependencies +# ------------ +# - Rmpfr +# +# Outputs +# ------- +# compare_bc_all_methods(...) returns a data.frame with: +# r : grid of residual scores +# direct : numeric, Σ_s exp(θ_part(s) + s * r) +# preexp : numeric, Σ_s via power-chain (unbounded) +# direct_bound : numeric, Σ_s exp(θ_part(s) + s * r - M(r)) +# preexp_bound : numeric, Σ_s via power-chain with max-term bound +# err_direct : |(direct - ref_unscaled)/ref_unscaled| +# err_preexp : |(preexp - ref_unscaled)/ref_unscaled| +# err_direct_bound : |(direct_bound - ref_scaled )/ref_scaled | +# err_preexp_bound : |(preexp_bound - ref_scaled )/ref_scaled | +# ref_unscaled : numeric MPFR reference (unbounded) +# ref_scaled : numeric MPFR reference (max-term scaled) +# +# Plotting helpers (unchanged interface): +# - plot_bc_four(res, ...) +# - summarize_bc_four(res) +# +# ============================================================================== + +library(Rmpfr) + +# ------------------------------------------------------------------------------ +# compare_bc_all_methods +# ------------------------------------------------------------------------------ +# Compute all four methods and MPFR references over a vector of r-values +# for the reparametrized Blume–Capel normalizing constant +# +# Z(r) = sum_{s=0}^C exp( θ_lin * (s - ref) + θ_quad * (s - ref)^2 + s * r ). +# +# Args: +# max_cat : integer, max category C (scores are s = 0..C) +# ref : integer, baseline category index for centering (s - ref) +# r_vals : numeric vector of r values to scan +# theta_lin : numeric, linear θ parameter +# theta_quad : numeric, quadratic θ parameter +# mpfr_prec : integer, MPFR precision (bits) for reference calculations +# +# Returns: +# data.frame with columns described in the file header (see “Outputs”). +# ------------------------------------------------------------------------------ + +compare_bc_all_methods <- function(max_cat = 10, + ref = 3, + r_vals = seq(-70, 70, length.out = 2000), + theta_lin = 0.12, + theta_quad = -0.02, + mpfr_prec = 256) { + + # --- score grid and θ-part --------------------------------------------------- + scores <- 0:max_cat # s = 0..C + centered <- scores - ref # (s - ref) + + # θ_part(s) = θ_lin*(s - ref) + θ_quad*(s - ref)^2 + theta_part <- theta_lin * centered + theta_quad * centered^2 + + # For the unbounded power-chain: exp(θ_part(s)) + exp_m <- exp(theta_part) + + # Output container ------------------------------------------------------------ + res <- data.frame( + r = r_vals, + direct = NA_real_, + preexp = NA_real_, + direct_bound = NA_real_, + preexp_bound = NA_real_, + err_direct = NA_real_, + err_preexp = NA_real_, + err_direct_bound = NA_real_, + err_preexp_bound = NA_real_, + ref_unscaled = NA_real_, + ref_scaled = NA_real_, + bound = NA_real_, # term_max = M(r), puur ter inspectie + theta_lin = theta_lin, + theta_quad = theta_quad, + max_cat = max_cat, + ref = ref + ) + + # --- MPFR constants independent of r ---------------------------------------- + tl_mpfr <- mpfr(theta_lin, mpfr_prec) + tq_mpfr <- mpfr(theta_quad, mpfr_prec) + sc_center_mpfr <- mpfr(centered, mpfr_prec) # (s - ref) + sc_raw_mpfr <- mpfr(scores, mpfr_prec) # s + + # --- Main loop over r -------------------------------------------------------- + for (i in seq_along(r_vals)) { + r <- r_vals[i] + + # Standard double-precision exponents + term <- theta_part + scores * r + + # ---------- MPFR references ---------- + r_mpfr <- mpfr(r, mpfr_prec) + term_mpfr <- tl_mpfr * sc_center_mpfr + + tq_mpfr * sc_center_mpfr * sc_center_mpfr + + sc_raw_mpfr * r_mpfr + + term_max_mpfr <- mpfr(max(asNumeric(term_mpfr)), mpfr_prec) + ref_unscaled_mpfr <- sum(exp(term_mpfr)) + ref_scaled_mpfr <- sum(exp(term_mpfr - term_max_mpfr)) + + # Store numeric references + res$ref_unscaled[i] <- asNumeric(ref_unscaled_mpfr) + res$ref_scaled[i] <- asNumeric(ref_scaled_mpfr) + + # ---------- (1) Direct (unbounded) ---------- + v_direct <- sum(exp(term)) + res$direct[i] <- v_direct + + # ---------- (2) Preexp (unbounded) ---------- + # Power-chain on exp(r): s = 0..max_cat, so start at s=0 with pow = 1 + eR <- exp(r) + pow <- 1.0 + S_pre <- 0.0 + for (j in seq_along(scores)) { + S_pre <- S_pre + exp_m[j] * pow + pow <- pow * eR + } + res$preexp[i] <- S_pre + + # ---------- (3) Direct + max-bound ---------- + term_max <- max(term) # M(r) + res$bound[i] <- term_max + + sum_direct_bound <- 0.0 + for (j in seq_along(scores)) { + sum_direct_bound <- sum_direct_bound + + exp(theta_part[j] + scores[j] * r - term_max) + } + res$direct_bound[i] <- sum_direct_bound + + # ---------- (4) Preexp + max-bound ---------- + pow_b <- exp(-term_max) # s = 0 → exp(0*r - term_max) + S_pre_b <- 0.0 + for (j in seq_along(scores)) { + S_pre_b <- S_pre_b + exp_m[j] * pow_b + pow_b <- pow_b * eR + } + res$preexp_bound[i] <- S_pre_b + + # ---------- Errors (vs MPFR) ---------- + res$err_direct[i] <- + asNumeric(abs((mpfr(v_direct, mpfr_prec) - ref_unscaled_mpfr) / ref_unscaled_mpfr)) + res$err_preexp[i] <- + asNumeric(abs((mpfr(S_pre, mpfr_prec) - ref_unscaled_mpfr) / ref_unscaled_mpfr)) + res$err_direct_bound[i] <- + asNumeric(abs((mpfr(sum_direct_bound, mpfr_prec) - ref_scaled_mpfr) / ref_scaled_mpfr)) + res$err_preexp_bound[i] <- + asNumeric(abs((mpfr(S_pre_b, mpfr_prec) - ref_scaled_mpfr) / ref_scaled_mpfr)) + } + + res +} + + + +# ------------------------------------------------------------------------------ +# plot_bc_four +# ------------------------------------------------------------------------------ +# Plot the four relative error curves on a log y-axis. +# +# Args: +# res : data.frame produced by compare_bc_all_methods() +# draw_order : character vector with any ordering of: +# c("err_direct","err_direct_bound","err_preexp_bound","err_preexp") +# alpha : named numeric vector (0..1) alphas for the same names +# lwd : line width +# +# Returns: (invisible) NULL. Draws a plot. +# +plot_bc_four = function(res, + draw_order = c("err_direct","err_direct_bound", + "err_preexp_bound","err_preexp"), + alpha = c(err_direct = 0.00, + err_direct_bound = 0.00, + err_preexp_bound = 0.40, + err_preexp = 0.40), + lwd = 2) { + + base_cols = c(err_direct = "#000000", + err_preexp = "#D62728", + err_direct_bound = "#1F77B4", + err_preexp_bound = "#9467BD") + + to_rgba = function(hex, a) rgb(t(col2rgb(hex))/255, alpha = a) + + cols = mapply(to_rgba, base_cols[draw_order], alpha[draw_order], + SIMPLIFY = TRUE, USE.NAMES = TRUE) + + vals = unlist(res[draw_order]) + vals = vals[is.finite(vals)] + ylim = if (length(vals)) { + q = stats::quantile(vals, c(.01, .99), na.rm = TRUE) + c(q[1] / 10, q[2] * 10) + } else c(1e-20, 1e-12) + + first = draw_order[1] + plot(res$r, res[[first]], type = "l", log = "y", + col = cols[[1]], lwd = lwd, ylim = ylim, + xlab = "r", ylab = "Relative error (vs MPFR)", + main = "Blume–Capel: Direct / Preexp / (Split) Bound") + + if (length(draw_order) > 1) { + for (k in 2:length(draw_order)) { + lines(res$r, res[[draw_order[k]]], col = cols[[k]], lwd = lwd) + } + } + + abline(h = .Machine$double.eps, col = "gray70", lty = 2) + + ## --- Theoretical bound where max term hits exp(709) + scores <- 0:res$max_cat[1] + centered <- scores - res$ref[1] + + # θ_part(s) = θ_lin*(s-ref) + θ_quad*(s-ref)^2 + theta_part <- res$theta_lin[1] * centered + + res$theta_quad[1] * centered * centered + + U <- 709 + pos <- scores > 0 + + if (any(pos)) { + r_up_vec <- (U - theta_part[pos]) / scores[pos] + r_up <- min(r_up_vec) + } else { + r_up <- Inf + } + + # Geen zinvolle beneden-grens voor overflow met s >= 0 + r_low <- -Inf + + if (is.finite(r_up)) { + abline(v = r_up, col = "darkgreen", lty = 2, lwd = 2) + } + + print(r_low) + print(r_up) + + legend("top", + legend = c("Direct", + "Direct + bound (split)", + "Preexp + bound (split)", + "Preexp") + [match(draw_order, + c("err_direct","err_direct_bound", + "err_preexp_bound","err_preexp"))], + col = cols, lwd = lwd, bty = "n") + + invisible(NULL) +} + + +# ------------------------------------------------------------------------------ +# summarize_bc_four +# ------------------------------------------------------------------------------ +# Summarize accuracy per method. +# +# Args: +# res : data.frame from compare_bc_all_methods() +# +# Returns: +# data.frame with columns: Method, Mean, Median, Max, Finite +# +summarize_bc_four = function(res) { + cols = c("err_direct","err_direct_bound","err_preexp_bound","err_preexp") + labs = c("Direct","Direct+Bound(split)","Preexp+Bound(split)","Preexp") + mk = function(v){ + f = is.finite(v) & v > 0 + c(Mean=mean(v[f]), Median=median(v[f]), Max=max(v[f]), Finite=mean(f)) + } + out = t(sapply(cols, function(nm) mk(res[[nm]]))) + data.frame(Method=labs, out, row.names=NULL, check.names=FALSE) +} + +# ============================================================================== +# Example usage (uncomment to run locally) +# ------------------------------------------------------------------------------ +# res = compare_bc_all_methods( +# max_cat = 4, +# ref = 0, +# r_vals = seq(170, 175, length.out = 1000), +# theta_lin = 0, +# theta_quad = 1.00, +# mpfr_prec = 256 +# ) +# plot_bc_four(res, +# draw_order = c("err_direct","err_direct_bound","err_preexp_bound","err_preexp"), +# alpha = c(err_direct = 0.00, +# err_direct_bound = 1.00, +# err_preexp_bound = 1.00, +# err_preexp = 0.00), +# lwd = 1) +# print(summarize_bc_four(res), digits = 3) +# ============================================================================== + +scan_bc_configs <- function(max_cat_vec = c(4, 10), + ref_vec = c(0, 2), + theta_lin_vec = c(0.0, 0.12), + theta_quad_vec = c(-0.02, 0.0, 0.02), + r_vals = seq(-80, 80, length.out = 2000), + mpfr_prec = 256, + tol = 1e-12) { + + cfg_grid <- expand.grid( + max_cat = max_cat_vec, + ref = ref_vec, + theta_lin = theta_lin_vec, + theta_quad = theta_quad_vec, + KEEP.OUT.ATTRS = FALSE, + stringsAsFactors = FALSE + ) + + all_summaries <- vector("list", nrow(cfg_grid)) + + for (i in seq_len(nrow(cfg_grid))) { + cfg <- cfg_grid[i, ] + cat("Config", i, "of", nrow(cfg_grid), ":", + "max_cat =", cfg$max_cat, + "ref =", cfg$ref, + "theta_lin =", cfg$theta_lin, + "theta_quad =", cfg$theta_quad, "\n") + + res_i <- compare_bc_all_methods( + max_cat = cfg$max_cat, + ref = cfg$ref, + r_vals = r_vals, + theta_lin = cfg$theta_lin, + theta_quad = cfg$theta_quad, + mpfr_prec = mpfr_prec + ) + + summ_i <- summarize_bc_methods(res_i, tol = tol) + all_summaries[[i]] <- summ_i + } + + do.call(rbind, all_summaries) +} + +classify_bc_bound_methods <- function(res, tol = 1e-12, + eps_better = 1e-3) { + # tol : threshold for "good enough" relative error + # eps_better : multiplicative margin to call one method "better" when both good + + r <- res$r + eD <- res$err_direct_bound + eP <- res$err_preexp_bound + + finiteD <- is.finite(eD) & eD > 0 + finiteP <- is.finite(eP) & eP > 0 + + goodD <- finiteD & (eD < tol) + goodP <- finiteP & (eP < tol) + + state <- character(length(r)) + + for (i in seq_along(r)) { + if (!goodD[i] && !goodP[i]) { + state[i] <- "neither_good" + } else if (goodD[i] && !goodP[i]) { + state[i] <- "only_direct_good" + } else if (!goodD[i] && goodP[i]) { + state[i] <- "only_preexp_good" + } else { + # both good: compare which is better + # e.g. if preexp_bound error is at least eps_better times smaller than direct_bound + if (eP[i] <= eD[i] * (1 - eps_better)) { + state[i] <- "both_good_preexp_better" + } else if (eD[i] <= eP[i] * (1 - eps_better)) { + state[i] <- "both_good_direct_better" + } else { + # both good and within eps_better fraction: treat as "tie" + state[i] <- "both_good_similar" + } + } + } + + data.frame( + r = r, + err_direct_bound = eD, + err_preexp_bound = eP, + state = factor(state), + bound = res$bound, + max_cat = res$max_cat[1], + ref = res$ref[1], + theta_lin = res$theta_lin[1], + theta_quad = res$theta_quad[1], + stringsAsFactors = FALSE + ) +} + +summarize_bc_bound_classification <- function(class_df) { + # class_df is the output of classify_bc_bound_methods() + + r <- class_df$r + state <- as.character(class_df$state) + + if (length(r) == 0) { + return(class_df[FALSE, ]) # empty + } + + # Identify run boundaries where state changes + blocks <- list() + start_idx <- 1 + current_state <- state[1] + + for (i in 2:length(r)) { + if (state[i] != current_state) { + # close previous block + blocks[[length(blocks) + 1]] <- list( + state = current_state, + i_start = start_idx, + i_end = i - 1 + ) + # start new block + start_idx <- i + current_state <- state[i] + } + } + # close last block + blocks[[length(blocks) + 1]] <- list( + state = current_state, + i_start = start_idx, + i_end = length(r) + ) + + # Turn into a data.frame with r-intervals and some diagnostics + out_list <- vector("list", length(blocks)) + for (k in seq_along(blocks)) { + b <- blocks[[k]] + idx <- b$i_start:b$i_end + out_list[[k]] <- data.frame( + state = b$state, + r_min = min(r[idx]), + r_max = max(r[idx]), + # a few handy diagnostics per block: + max_err_direct_bound = max(class_df$err_direct_bound[idx], na.rm = TRUE), + max_err_preexp_bound = max(class_df$err_preexp_bound[idx], na.rm = TRUE), + min_bound = min(class_df$bound[idx], na.rm = TRUE), + max_bound = max(class_df$bound[idx], na.rm = TRUE), + n_points = length(idx), + max_cat = class_df$max_cat[1], + ref = class_df$ref[1], + theta_lin = class_df$theta_lin[1], + theta_quad = class_df$theta_quad[1], + stringsAsFactors = FALSE + ) + } + + do.call(rbind, out_list) +} + +# 1. Run the basic comparison +r_vals <- seq(0, 100, length.out = 2000) + +res4 <- compare_bc_all_methods( + max_cat = 4, + ref = 0, + r_vals = r_vals, + theta_lin = 0.12, + theta_quad = -0.02, + mpfr_prec = 256 +) + +# 2. Classify per-r which bound-method wins +class4 <- classify_bc_bound_methods(res4, tol = 1e-12, eps_better = 1e-3) + +# 3. Compress into r-intervals +summary4 <- summarize_bc_bound_classification(class4) +print(summary4, digits = 3) + + + + +simulate_bc_fast_safe <- function(param_grid, + r_vals = seq(-80, 80, length.out = 2000), + mpfr_prec = 256, + tol = 1e-12) { + # param_grid: data.frame with columns + # max_cat, ref, theta_lin, theta_quad + # r_vals : vector of residual r values + # tol : tolerance for "ok" numerics (relative error) + # + # Returns one big data.frame with columns: + # config_id, max_cat, ref, theta_lin, theta_quad, + # r, bound, fast_val, safe_val, + # err_fast, err_safe, ok_fast, ok_safe, + # ref_scaled (MPFR reference) + + if (!all(c("max_cat", "ref", "theta_lin", "theta_quad") %in% names(param_grid))) { + stop("param_grid must have columns: max_cat, ref, theta_lin, theta_quad") + } + + out_list <- vector("list", nrow(param_grid)) + + for (cfg_idx in seq_len(nrow(param_grid))) { + cfg <- param_grid[cfg_idx, ] + max_cat <- as.integer(cfg$max_cat) + ref <- as.integer(cfg$ref) + theta_lin <- as.numeric(cfg$theta_lin) + theta_quad <- as.numeric(cfg$theta_quad) + + # --- score grid and θ-part for this config -------------------------------- + scores <- 0:max_cat + centered <- scores - ref + + theta_part <- theta_lin * centered + theta_quad * centered^2 + exp_m <- exp(theta_part) # for fast method + + # MPFR constants + tl_mpfr <- mpfr(theta_lin, mpfr_prec) + tq_mpfr <- mpfr(theta_quad, mpfr_prec) + sc_center_mpfr <- mpfr(centered, mpfr_prec) + sc_raw_mpfr <- mpfr(scores, mpfr_prec) + + # Storage for this config + n_r <- length(r_vals) + res_cfg <- data.frame( + config_id = rep(cfg_idx, n_r), + max_cat = rep(max_cat, n_r), + ref = rep(ref, n_r), + theta_lin = rep(theta_lin, n_r), + theta_quad = rep(theta_quad, n_r), + r = r_vals, + bound = NA_real_, + fast_val = NA_real_, + safe_val = NA_real_, + err_fast = NA_real_, + err_safe = NA_real_, + ok_fast = NA, + ok_safe = NA, + ref_scaled = NA_real_, + stringsAsFactors = FALSE + ) + + # --- main loop over r for this config ------------------------------------- + for (i in seq_along(r_vals)) { + r <- r_vals[i] + + ## Double-precision exponents: + term <- theta_part + scores * r # θ_part(s) + s*r + term_max <- max(term) # M(r) = bound + res_cfg$bound[i] <- term_max + + ## MPFR reference (scaled with max-term): + r_mpfr <- mpfr(r, mpfr_prec) + term_mpfr <- tl_mpfr * sc_center_mpfr + + tq_mpfr * sc_center_mpfr * sc_center_mpfr + + sc_raw_mpfr * r_mpfr + term_max_mpfr <- mpfr(max(asNumeric(term_mpfr)), mpfr_prec) + ref_scaled_mpfr <- sum(exp(term_mpfr - term_max_mpfr)) + ref_scaled_num <- asNumeric(ref_scaled_mpfr) + res_cfg$ref_scaled[i] <- ref_scaled_num + + # --- SAFE: Direct + max-bound ------------------------------------------ + # Z_safe = sum_s exp(θ_part(s) + s*r - term_max) + safe_sum <- 0.0 + for (j in seq_along(scores)) { + safe_sum <- safe_sum + exp(theta_part[j] + scores[j] * r - term_max) + } + res_cfg$safe_val[i] <- safe_sum + + # --- FAST: Preexp + max-bound (power-chain) ---------------------------- + # Z_fast = sum_s exp(θ_part(s)) * exp(s*r - term_max) + eR <- exp(r) + pow_b <- exp(-term_max) # s = 0 → exp(0*r - term_max) + fast_sum <- 0.0 + for (j in seq_along(scores)) { + fast_sum <- fast_sum + exp_m[j] * pow_b + pow_b <- pow_b * eR + } + res_cfg$fast_val[i] <- fast_sum + + # --- Relative errors vs MPFR (scaled) ---------------------------------- + if (is.finite(ref_scaled_num) && ref_scaled_num > 0) { + res_cfg$err_safe[i] <- abs(safe_sum - ref_scaled_num) / ref_scaled_num + res_cfg$err_fast[i] <- abs(fast_sum - ref_scaled_num) / ref_scaled_num + } else { + res_cfg$err_safe[i] <- NA_real_ + res_cfg$err_fast[i] <- NA_real_ + } + + res_cfg$ok_safe[i] <- !is.na(res_cfg$err_safe[i]) && + is.finite(res_cfg$err_safe[i]) && + (res_cfg$err_safe[i] < tol) + + res_cfg$ok_fast[i] <- !is.na(res_cfg$err_fast[i]) && + is.finite(res_cfg$err_fast[i]) && + (res_cfg$err_fast[i] < tol) + } + + out_list[[cfg_idx]] <- res_cfg + } + + do.call(rbind, out_list) +} \ No newline at end of file diff --git a/dev/numerical_analyses/bgm_blumecapel_normalization_PL_extra.R b/dev/numerical_analyses/bgm_blumecapel_normalization_PL_extra.R new file mode 100644 index 0000000..43ebeb0 --- /dev/null +++ b/dev/numerical_analyses/bgm_blumecapel_normalization_PL_extra.R @@ -0,0 +1,900 @@ +############################################################ +# Blume–Capel normalization analysis: +# Numerical comparison of FAST vs SAFE exponentiation methods +# +# Objective +# --------- +# This script provides a full numerical investigation of two methods +# to compute the *scaled* Blume–Capel partition sum: +# +# Z_scaled(r) = sum_{s=0}^C exp( θ_part(s) + s*r - M(r) ) +# +# where +# θ_part(s) = θ_lin * (s - ref) + θ_quad * (s - ref)^2 +# and +# M(r) = max_s ( θ_part(s) + s*r ). +# +# We compare two computational approaches: +# +# SAFE = Direct computation : sum_s exp(θ_part + s*r - M(r)) +# FAST = Power-chain precompute : sum_s exp(θ_part(s)) * exp(s*r - M(r)) +# +# MPFR (256-bit) is used as the ground-truth reference. +# +# The goals are: +# +# 1. Determine each method's numerical stability across a wide range +# of (max_cat, ref, θ_lin, θ_quad, r). +# +# 2. Map all cases where FAST becomes inaccurate or produces NaN. +# +# 3. Identify the correct switching rule for the C++ implementation: +# if (bound <= ~709) use FAST +# else use SAFE +# +# where `bound = M(r)` is the maximum exponent before rescaling. +# +# 4. Produce plots and summary statistics to permanently document the +# reasoning behind this rule. +# +# Key numerical fact +# ------------------ +# exp(x) in IEEE double precision overflows at x ≈ 709.782712893. +# Therefore any exponent near ±709 is dangerous. +# +# Outcome summary +# --------------- +# - SAFE is stable across the entire tested range. +# - FAST is perfectly accurate **as long as bound ≤ ~709** +# - All FAST failures (NaN or large error) occur only when bound > ~709 +# - No FAST failures were observed below this threshold. +# +# This provides strong empirical justification for the C++ switching rule. +############################################################ + +library(Rmpfr) +library(dplyr) +library(ggplot2) + +############################################################ +# 1. Simulation function +# +# Simulates FAST vs SAFE across: +# - parameter grid (max_cat, ref, θ_lin, θ_quad) +# - range of r values +# +# Returns one large data.frame containing: +# - the computed bound M(r) +# - FAST and SAFE values +# - MPFR reference +# - relative errors +# - logical OK flags (err < tol) +############################################################ + +simulate_bc_fast_safe <- function(param_grid, + r_vals = seq(-80, 80, length.out = 2000), + mpfr_prec = 256, + tol = 1e-12) { + + if (!all(c("max_cat", "ref", "theta_lin", "theta_quad") %in% names(param_grid))) { + stop("param_grid must have columns: max_cat, ref, theta_lin, theta_quad") + } + + out_list <- vector("list", nrow(param_grid)) + + for (cfg_idx in seq_len(nrow(param_grid))) { + cfg <- param_grid[cfg_idx, ] + max_cat <- as.integer(cfg$max_cat) + ref <- as.integer(cfg$ref) + theta_lin <- as.numeric(cfg$theta_lin) + theta_quad <- as.numeric(cfg$theta_quad) + + # Score grid and θ(s) + scores <- 0:max_cat + centered <- scores - ref + theta_part <- theta_lin * centered + theta_quad * centered^2 + exp_m <- exp(theta_part) # used by FAST + + # Build MPFR constants + tl_mpfr <- mpfr(theta_lin, mpfr_prec) + tq_mpfr <- mpfr(theta_quad, mpfr_prec) + sc_center_mpfr <- mpfr(centered, mpfr_prec) + sc_raw_mpfr <- mpfr(scores, mpfr_prec) + + n_r <- length(r_vals) + + res_cfg <- data.frame( + config_id = rep(cfg_idx, n_r), + max_cat = rep(max_cat, n_r), + ref = rep(ref, n_r), + theta_lin = rep(theta_lin, n_r), + theta_quad = rep(theta_quad, n_r), + r = r_vals, + bound = NA_real_, + fast_val = NA_real_, + safe_val = NA_real_, + err_fast = NA_real_, + err_safe = NA_real_, + ok_fast = NA, + ok_safe = NA, + ref_scaled = NA_real_, + stringsAsFactors = FALSE + ) + + # Compute for all r + for (i in seq_along(r_vals)) { + r <- r_vals[i] + + term <- theta_part + scores * r # θ(s) + s*r + term_max <- max(term) # numerical bound + res_cfg$bound[i] <- term_max + + # MPFR scaled reference + r_mpfr <- mpfr(r, mpfr_prec) + term_mpfr <- tl_mpfr * sc_center_mpfr + + tq_mpfr * sc_center_mpfr * sc_center_mpfr + + sc_raw_mpfr * r_mpfr + term_max_mpfr <- mpfr(max(asNumeric(term_mpfr)), mpfr_prec) + ref_scaled_mpfr <- sum(exp(term_mpfr - term_max_mpfr)) + ref_scaled_num <- asNumeric(ref_scaled_mpfr) + res_cfg$ref_scaled[i] <- ref_scaled_num + + # SAFE method: direct evaluation + safe_sum <- 0.0 + for (j in seq_along(scores)) { + safe_sum <- safe_sum + exp(theta_part[j] + scores[j] * r - term_max) + } + res_cfg$safe_val[i] <- safe_sum + + # FAST method: preexp power-chain + eR <- exp(r) + pow_b <- exp(-term_max) + fast_sum <- 0.0 + for (j in seq_along(scores)) { + fast_sum <- fast_sum + exp_m[j] * pow_b + pow_b <- pow_b * eR + } + res_cfg$fast_val[i] <- fast_sum + + # Relative errors + if (is.finite(ref_scaled_num) && ref_scaled_num > 0) { + res_cfg$err_safe[i] <- abs(safe_sum - ref_scaled_num) / ref_scaled_num + res_cfg$err_fast[i] <- abs(fast_sum - ref_scaled_num) / ref_scaled_num + } + + res_cfg$ok_safe[i] <- !is.na(res_cfg$err_safe[i]) && + is.finite(res_cfg$err_safe[i]) && + (res_cfg$err_safe[i] < tol) + + res_cfg$ok_fast[i] <- !is.na(res_cfg$err_fast[i]) && + is.finite(res_cfg$err_fast[i]) && + (res_cfg$err_fast[i] < tol) + } + + out_list[[cfg_idx]] <- res_cfg + } + + do.call(rbind, out_list) +} + +############################################################ +# 2. Parameter grid and simulation +############################################################ + +param_grid <- expand.grid( + max_cat = c(10), + ref = c(0, 5, 10), + theta_lin = c(-0.5, 0.0, 0.5), + theta_quad = c(-0.2, 0.0, 0.2), + KEEP.OUT.ATTRS = FALSE, + stringsAsFactors = FALSE +) + +# Very wide r-range so that bound covers deep negative and deep positive +r_vals <- seq(-100, 100, length.out = 5001) + +tol <- 1e-12 + +sim_res <- simulate_bc_fast_safe( + param_grid = param_grid, + r_vals = r_vals, + mpfr_prec = 256, + tol = tol +) + +############################################################ +# 3. Post-processing: classify regions, log-errors, abs(bound) +############################################################ + +df <- sim_res %>% + mutate( + err_fast_clipped = pmax(err_fast, 1e-300), + err_safe_clipped = pmax(err_safe, 1e-300), + + log_err_fast = log10(err_fast_clipped), + log_err_safe = log10(err_safe_clipped), + + abs_bound = abs(bound), + + region = case_when( + ok_fast & ok_safe ~ "both_ok", + !ok_fast & ok_safe ~ "only_safe_ok", + ok_fast & !ok_safe ~ "only_fast_ok", + TRUE ~ "neither_ok" + ) + ) + +############################################################ +# 4. NaN analysis for FAST +# +# We explicitly check: +# +# Are there *any* NaN occurrences for FAST with |bound| < 709 ? +# +# This is essential: if NaN occurs for FAST even when |bound| is small, +# then the switching rule would fail. +############################################################ + +df_nan <- sim_res %>% filter(is.nan(err_fast)) + +nan_summary <- df_nan %>% + summarise( + n_nan = n(), + min_bound = min(bound, na.rm = TRUE), + max_bound = max(bound, na.rm = TRUE) + ) + +print(nan_summary) + +df_nan_inside <- df_nan %>% filter(abs(bound) < 709) + +cat("\nNumber of FAST NaN cases with |bound| < 709: ", + nrow(df_nan_inside), "\n\n") + +############################################################ +# 5. FAST and SAFE plots vs bound +# +# We also explicitly count how many cases fail (ok_* == FALSE) +# while |bound| < 709. If the switching rule is correct, this +# number should be zero for FAST in the region where we intend +# to use it. +############################################################ + +# Count failures for FAST and SAFE when |bound| < 709 +fast_fail_inside <- df %>% + filter(abs(bound) < 709, !ok_fast) %>% + nrow() + +safe_fail_inside <- df %>% + filter(abs(bound) < 709, !ok_safe) %>% + nrow() + +cat("\nFAST failures with |bound| < 709:", fast_fail_inside, "\n") +cat("SAFE failures with |bound| < 709:", safe_fail_inside, "\n\n") + +# FAST +ggplot(df, aes(x = bound, y = log_err_fast, colour = region)) + + geom_point(alpha = 0.3, size = 0.6, na.rm = TRUE) + + geom_hline(yintercept = log10(tol), linetype = 2) + + geom_vline(xintercept = 709, linetype = 2) + + geom_vline(xintercept = -709, linetype = 2) + + scale_color_manual(values = c( + both_ok = "darkgreen", + only_safe_ok = "orange", + only_fast_ok = "blue", + neither_ok = "red" + )) + + labs( + x = "bound = max_s (theta_part(s) + s*r)", + y = "log10(relative error) of FAST", + colour = "region", + subtitle = paste( + "FAST failures with |bound| < 709:", fast_fail_inside + ) + ) + + ggtitle("FAST method vs bound") + + theme_minimal() + +# SAFE +ggplot(df, aes(x = bound, y = log_err_safe, colour = region)) + + geom_point(alpha = 0.3, size = 0.6, na.rm = TRUE) + + geom_hline(yintercept = log10(tol), linetype = 2) + + geom_vline(xintercept = 709, linetype = 2) + + geom_vline(xintercept = -709, linetype = 2) + + scale_color_manual(values = c( + both_ok = "darkgreen", + only_safe_ok = "orange", + only_fast_ok = "blue", + neither_ok = "red" + )) + + labs( + x = "bound = max_s (theta_part(s) + s*r)", + y = "log10(relative error) of SAFE", + colour = "region", + subtitle = paste( + "SAFE failures with |bound| < 709:", safe_fail_inside + ) + ) + + ggtitle("SAFE method vs bound") + + theme_minimal() + + +############################################################ +# 6. Fraction of configurations per |bound|-bin +############################################################ + +df_bins <- df %>% + filter(is.finite(bound)) %>% + mutate( + abs_bound = abs(bound), + bound_bin = cut( + abs_bound, + breaks = seq(0, max(abs_bound, na.rm = TRUE) + 10, by = 10), + include_lowest = TRUE + ) + ) %>% + group_by(bound_bin) %>% + summarise( + mid_abs_bound = mean(abs_bound, na.rm = TRUE), + frac_fast_ok = mean(ok_fast, na.rm = TRUE), + frac_safe_ok = mean(ok_safe, na.rm = TRUE), + n = n(), + .groups = "drop" + ) + +ggplot(df_bins, aes(x = mid_abs_bound)) + + geom_line(aes(y = frac_fast_ok, colour = "FAST ok")) + + geom_line(aes(y = frac_safe_ok, colour = "SAFE ok")) + + geom_vline(xintercept = 709, linetype = 2) + + scale_colour_manual(values = c("FAST ok" = "blue", "SAFE ok" = "darkgreen")) + + labs( + x = "|bound| bin center", + y = "fraction of configurations with err < tol", + colour = "" + ) + + ggtitle("FAST vs SAFE numerical stability by |bound|") + + theme_minimal() + +############################################################ +# 7. Summary printed to console +############################################################ + +cat("\n================ SUMMARY =================\n") +print(nan_summary) + +cat("\nFAST NaN cases with |bound| < 709: ", + nrow(df_nan_inside), "\n\n") + +cat(" +Interpretation: +-------------- +- The SAFE method (direct + bound) remains stable and accurate across the + entire tested parameter and residual range. + +- The FAST method (preexp + bound) is extremely accurate when the maximum + exponent before rescaling, `bound = M(r)`, satisfies: + + |bound| ≤ ~709 + +- As soon as bound exceeds approximately +709, FAST becomes unstable: + * large numerical error + * or NaN (observed systematically) + * No such failures appear below this threshold. + +C++ Implementation Rule (recommended): +-------------------------------------- +if (bound <= 709.0) { + // FAST: preexp + bound (power-chain) +} else { + // SAFE: direct + bound +} + +This script constitutes the full reproducible analysis supporting the choice +of this switching threshold in the C++ Blume–Capel normalization code. +") + +############################################################ +# End of script +############################################################ + + + + + + + + +############################################################ +# Blume–Capel probability analysis: +# Numerical comparison of FAST vs SAFE probability evaluation +# +# Objective +# --------- +# This script provides a numerical investigation of two methods +# to compute the *probabilities* under the Blume–Capel +# pseudolikelihood: +# +# p_s(r) = exp( θ_part(s) + s*r - M(r) ) / Z_scaled(r) +# +# where +# θ_part(s) = θ_lin * (s - ref) + θ_quad * (s - ref)^2 +# M(r) = max_s ( θ_part(s) + s*r ) +# Z_scaled = sum_s exp( θ_part(s) + s*r - M(r) ) +# +# We compare two implementations: +# +# SAFE = direct exponentials with numerical bound M(r) +# FAST = preexp + power-chain for exp(s*r - M(r)) +# +# MPFR (256-bit) is used as the ground-truth reference. +# +# Goals +# ----- +# 1. Check numerical stability of SAFE vs FAST for probabilities +# across wide ranges of (max_cat, ref, θ_lin, θ_quad, r). +# 2. Confirm that the same switching rule used for the +# normalization carries over safely to probabilities: +# +# FAST is used only if +# +# |M(r)| <= EXP_BOUND AND +# pow_bound = max_cat * r - M(r) <= EXP_BOUND +# +# where EXP_BOUND ≈ 709. +# +# 3. Document the error behaviour in terms of: +# - max absolute difference per probability vector +# - max relative difference +# - KL divergence to MPFR reference. +# +# Outcome (to be checked empirically) +# ----------------------------------- +# - SAFE should be stable across the tested ranges. +# - FAST should exhibit negligible error whenever the +# switching bounds are satisfied. +# +############################################################ + +library(Rmpfr) +library(dplyr) +library(ggplot2) + +EXP_BOUND <- 709 # double overflow limit for exp() + + +############################################################ +# 1. Helper: MPFR probability reference for a single config +############################################################ + +bc_prob_ref_mpfr <- function(max_cat, ref, theta_lin, theta_quad, + r_vals, + mpfr_prec = 256) { + # Categories and centered scores + scores <- 0:max_cat + centered <- scores - ref + + # MPFR parameters + tl <- mpfr(theta_lin, mpfr_prec) + tq <- mpfr(theta_quad, mpfr_prec) + sc <- mpfr(scores, mpfr_prec) + s0 <- mpfr(centered, mpfr_prec) + + n_r <- length(r_vals) + n_s <- length(scores) + + # reference probability matrix (rows = r, cols = s) + P_ref <- matrix(NA_real_, nrow = n_r, ncol = n_s) + + for (i in seq_len(n_r)) { + r_mp <- mpfr(r_vals[i], mpfr_prec) + + # exponent(s) = θ_part(s) + s*r + term <- tl * s0 + tq * s0 * s0 + sc * r_mp + + # numeric bound M(r) + term_max <- max(asNumeric(term)) + term_max_mp <- mpfr(term_max, mpfr_prec) + + num <- exp(term - term_max_mp) # scaled numerators + Z <- sum(num) + p <- num / Z + + P_ref[i, ] <- asNumeric(p) + } + + P_ref +} + + +############################################################ +# 2. SAFE probabilities (double) for a single config +############################################################ + +bc_prob_safe <- function(max_cat, ref, theta_lin, theta_quad, + r_vals) { + scores <- 0:max_cat + centered <- scores - ref + theta_part <- theta_lin * centered + theta_quad * centered^2 + + n_r <- length(r_vals) + n_s <- length(scores) + + P_safe <- matrix(NA_real_, nrow = n_r, ncol = n_s) + + for (i in seq_len(n_r)) { + r <- r_vals[i] + + # exponents before scaling + exps <- theta_part + scores * r + b <- max(exps) + + numer <- exp(exps - b) + denom <- sum(numer) + + # NO fallback here; let denom=0 or non-finite propagate + p <- numer / denom + + P_safe[i, ] <- p + } + + P_safe +} + + + +############################################################ +# 3. FAST probabilities (double) for a single config +# +# This mirrors what a C++ compute_probs_blume_capel(FAST) +# implementation would do: precompute exp(theta_part), +# then use a power chain for exp(s*r - b). +############################################################ + +bc_prob_fast <- function(max_cat, ref, theta_lin, theta_quad, + r_vals) { + scores <- 0:max_cat + centered <- scores - ref + theta_part <- theta_lin * centered + theta_quad * centered^2 + exp_theta <- exp(theta_part) + + n_r <- length(r_vals) + n_s <- length(scores) + + P_fast <- matrix(NA_real_, nrow = n_r, ncol = n_s) + bounds <- numeric(n_r) + pow_bounds <- numeric(n_r) + + for (i in seq_len(n_r)) { + r <- r_vals[i] + + # exponents before scaling + exps <- theta_part + scores * r + b <- max(exps) + bounds[i] <- b + + # pow_bound = max_s (s*r - b) is attained at s = max_cat + pow_bounds[i] <- max_cat * r - b + + eR <- exp(r) + pow <- exp(-b) + + numer <- numeric(n_s) + denom <- 0.0 + + for (j in seq_along(scores)) { + numer[j] <- exp_theta[j] * pow + denom <- denom + numer[j] + pow <- pow * eR + } + + # Again: NO fallback, just divide and let problems show + p <- numer / denom + + P_fast[i, ] <- p + } + + list( + probs = P_fast, + bound = bounds, + pow_bound = pow_bounds + ) +} + + + +############################################################ +# 4. Main simulation: +# Explore param_grid × r_vals and compare: +# - P_ref (MPFR) +# - P_safe +# - P_fast +############################################################ + +simulate_bc_prob_fast_safe <- function(param_grid, + r_vals, + mpfr_prec = 256, + tol_prob = 1e-12) { + + if (!all(c("max_cat", "ref", "theta_lin", "theta_quad") %in% names(param_grid))) { + stop("param_grid must have columns: max_cat, ref, theta_lin, theta_quad") + } + + out_list <- vector("list", nrow(param_grid)) + + for (cfg_idx in seq_len(nrow(param_grid))) { + cfg <- param_grid[cfg_idx, ] + max_cat <- as.integer(cfg$max_cat) + ref <- as.integer(cfg$ref) + theta_lin <- as.numeric(cfg$theta_lin) + theta_quad <- as.numeric(cfg$theta_quad) + + n_r <- length(r_vals) + + # Reference + P_ref <- bc_prob_ref_mpfr(max_cat, ref, theta_lin, theta_quad, + r_vals, mpfr_prec = mpfr_prec) + # SAFE + P_safe <- bc_prob_safe(max_cat, ref, theta_lin, theta_quad, + r_vals) + # FAST (+ bounds) + fast_res <- bc_prob_fast(max_cat, ref, theta_lin, theta_quad, + r_vals) + P_fast <- fast_res$probs + bound <- fast_res$bound + pow_bound <- fast_res$pow_bound + + # Error metrics per r + max_abs_fast <- numeric(n_r) + max_rel_fast <- numeric(n_r) + kl_fast <- numeric(n_r) + + max_abs_safe <- numeric(n_r) + max_rel_safe <- numeric(n_r) + kl_safe <- numeric(n_r) + + # Helper: KL divergence D(p || q) + kl_div <- function(p, q) { + # If either vector has non-finite entries, KL is undefined → NA + if (!all(is.finite(p)) || !all(is.finite(q))) { + return(NA_real_) + } + + # Valid domain for KL: where both p and q are strictly positive + mask <- (p > 0) & (q > 0) + + # mask may contain NA → remove NA via na.rm=TRUE + if (!any(mask, na.rm = TRUE)) { + return(NA_real_) + } + + sum(p[mask] * (log(p[mask]) - log(q[mask]))) + } + + + for (i in seq_len(n_r)) { + p_ref <- P_ref[i, ] + p_safe <- P_safe[i, ] + p_fast <- P_fast[i, ] + + # max abs diff + max_abs_fast[i] <- max(abs(p_fast - p_ref)) + max_abs_safe[i] <- max(abs(p_safe - p_ref)) + + # max relative diff (avoid divide-by-zero) + rel_fast <- abs(p_fast - p_ref) + rel_safe <- abs(p_safe - p_ref) + + rel_fast[p_ref > 0] <- rel_fast[p_ref > 0] / p_ref[p_ref > 0] + rel_safe[p_ref > 0] <- rel_safe[p_ref > 0] / p_ref[p_ref > 0] + + rel_fast[p_ref == 0] <- 0 + rel_safe[p_ref == 0] <- 0 + + max_rel_fast[i] <- max(rel_fast) + max_rel_safe[i] <- max(rel_safe) + + # KL + kl_fast[i] <- kl_div(p_ref, p_fast) + kl_safe[i] <- kl_div(p_ref, p_safe) + } + + # "ok" flags using tol_prob on max_abs + ok_fast <- is.finite(max_abs_fast) & (max_abs_fast < tol_prob) + ok_safe <- is.finite(max_abs_safe) & (max_abs_safe < tol_prob) + + # FAST switching condition as in C++: + # use FAST only if |bound| <= EXP_BOUND and pow_bound <= EXP_BOUND + use_fast <- (abs(bound) <= EXP_BOUND) & (pow_bound <= EXP_BOUND) + + res_cfg <- data.frame( + config_id = rep(cfg_idx, n_r), + max_cat = rep(max_cat, n_r), + ref = rep(ref, n_r), + theta_lin = rep(theta_lin, n_r), + theta_quad = rep(theta_quad, n_r), + r = r_vals, + bound = bound, + pow_bound = pow_bound, + use_fast = use_fast, + max_abs_fast = max_abs_fast, + max_rel_fast = max_rel_fast, + kl_fast = kl_fast, + max_abs_safe = max_abs_safe, + max_rel_safe = max_rel_safe, + kl_safe = kl_safe, + ok_fast = ok_fast, + ok_safe = ok_safe, + stringsAsFactors = FALSE + ) + + out_list[[cfg_idx]] <- res_cfg + } + + do.call(rbind, out_list) +} + + +############################################################ +# 5. Example simulation setup +############################################################ + +# Parameter grid similar in spirit to the BC normalization script +param_grid <- expand.grid( + max_cat = c(4, 10), # Blume–Capel max categories (example) + ref = c(0, 2, 4, 5, 10), # include both interior & boundary refs + theta_lin = c(-0.5, 0.0, 0.5), + theta_quad = c(-0.2, 0.0, 0.2), + KEEP.OUT.ATTRS = FALSE, + stringsAsFactors = FALSE +) + +# Wide r-range; adjust as needed to match your empirical residuals +r_vals <- seq(-80, 80, length.out = 2001) + +tol_prob <- 1e-12 + +sim_probs <- simulate_bc_prob_fast_safe( + param_grid = param_grid, + r_vals = r_vals, + mpfr_prec = 256, + tol_prob = tol_prob +) + + +############################################################ +# 6. Post-processing and diagnostics +############################################################ + +df <- sim_probs %>% + mutate( + abs_bound = abs(bound), + region = case_when( + use_fast & ok_fast ~ "fast_ok_when_used", + use_fast & !ok_fast ~ "fast_bad_when_used", + !use_fast & ok_safe ~ "safe_ok_when_used", + !use_fast & !ok_safe ~ "safe_bad_when_used" + ) + ) + +# Check: any bad FAST cases *within* the intended FAST region? +fast_bad_inside <- df %>% + filter(use_fast, !ok_fast) + +cat("\nNumber of FAST probability failures where use_fast == TRUE: ", + nrow(fast_bad_inside), "\n\n") + +# Also track purely based on bounds (even if not marked use_fast) +fast_bad_bound_region <- df %>% + filter(abs(bound) <= EXP_BOUND, + pow_bound <= EXP_BOUND, + !ok_fast) + +cat("Number of FAST probability failures with |bound| <= 709 & pow_bound <= 709: ", + nrow(fast_bad_bound_region), "\n\n") + + +############################################################ +# 7. Plots: error vs bound (FAST only) +############################################################ + +df_fast <- df %>% + filter(use_fast) %>% + mutate( + log10_max_abs_fast = log10(pmax(max_abs_fast, 1e-300)) + ) + +ggplot(df_fast, aes(x = bound, y = log10_max_abs_fast)) + + geom_point(alpha = 0.3, size = 0.6) + + geom_hline(yintercept = log10(tol_prob), linetype = 2, colour = "darkgreen") + + geom_vline(xintercept = EXP_BOUND, linetype = 2, colour = "red") + + geom_vline(xintercept = -EXP_BOUND, linetype = 2, colour = "red") + + labs( + x = "bound = max_s (θ_part(s) + s*r)", + y = "log10(max absolute error) of FAST p_s(r)", + title = "FAST Blume–Capel probabilities vs bound (used region only)", + subtitle = paste( + "FAST failures in use_fast region:", nrow(fast_bad_inside) + ) + ) + + theme_minimal() + + +############################################################ +# 8. Binned summary by |bound| +############################################################ + +df_bins <- df %>% + mutate( + abs_bound = abs(bound), + bound_bin = cut( + abs_bound, + breaks = seq(0, max(abs_bound, na.rm = TRUE) + 10, by = 10), + include_lowest = TRUE + ) + ) %>% + group_by(bound_bin) %>% + summarise( + mid_abs_bound = mean(abs_bound, na.rm = TRUE), + frac_fast_ok = mean(ok_fast[use_fast], na.rm = TRUE), + frac_safe_ok = mean(ok_safe[!use_fast], na.rm = TRUE), + max_abs_fast_99 = quantile(max_abs_fast[use_fast], 0.99, na.rm = TRUE), + max_abs_safe_99 = quantile(max_abs_safe[!use_fast], 0.99, na.rm = TRUE), + n = n(), + .groups = "drop" + ) + +ggplot(df_bins, aes(x = mid_abs_bound)) + + geom_line(aes(y = frac_fast_ok, colour = "FAST ok (used)"), na.rm = TRUE) + + geom_line(aes(y = frac_safe_ok, colour = "SAFE ok (used)"), na.rm = TRUE) + + geom_vline(xintercept = EXP_BOUND, linetype = 2) + + scale_colour_manual(values = c( + "FAST ok (used)" = "blue", + "SAFE ok (used)" = "darkgreen" + )) + + labs( + x = "|bound| bin center", + y = "fraction of configurations with max_abs_error < tol_prob", + colour = "", + title = "Numerical stability of Blume–Capel probabilities by |bound|" + ) + + theme_minimal() + + +############################################################ +# 9. Console summary +############################################################ + +cat("\n================ PROBABILITY SUMMARY =================\n") + +cat("Total rows in simulation:", nrow(df), "\n\n") + +cat("FAST probability failures where use_fast == TRUE: ", + nrow(fast_bad_inside), "\n") +cat("FAST probability failures with |bound| <= 709 & pow_bound <= 709: ", + nrow(fast_bad_bound_region), "\n\n") + +cat("Typical 99th percentile max_abs_error per |bound|-bin (FAST used):\n") +print( + df_bins %>% + select(bound_bin, mid_abs_bound, max_abs_fast_99) %>% + arrange(mid_abs_bound), + digits = 4 +) + +cat(" +Interpretation guide +-------------------- +- `ok_fast`/`ok_safe` are defined by max absolute error vs MPFR reference + being below tol_prob (default 1e-12). + +- `use_fast` encodes the **intended** C++ switching rule: + use_fast = (|bound| <= 709) & (pow_bound <= 709) + +- Ideally: + * `fast_bad_inside` should be empty or extremely rare, + showing that FAST is safe whenever used. + * errors for SAFE should be negligible everywhere. + +You can tighten the switching margin if needed (e.g. require +`pow_bound <= 700`) by adjusting `use_fast` in the code above. +") \ No newline at end of file diff --git a/dev/numerical_analyses/bgm_blumecapel_probs_PL.R b/dev/numerical_analyses/bgm_blumecapel_probs_PL.R new file mode 100644 index 0000000..6d887ba --- /dev/null +++ b/dev/numerical_analyses/bgm_blumecapel_probs_PL.R @@ -0,0 +1,248 @@ +############################################################ +# Blume–Capel probabilities: +# Numerical comparison of 4 methods vs MPFR reference +# +# Methods: +# - direct_unscaled : naive softmax +# - direct_bound : softmax with subtraction of M(r) +# - preexp_unscaled : preexp(theta_part) + power chain (no bound) +# - preexp_bound : preexp(theta_part) + power chain (with bound) +# +# Reference: +# - MPFR softmax with scaling by M(r) +############################################################ + +library(Rmpfr) +library(dplyr) +library(ggplot2) + +EXP_BOUND <- 709 + +############################################################ +# 1. Compare 4 methods for one BC configuration +############################################################ + +compare_bc_prob_4methods_one <- function(max_cat, + ref, + theta_lin, + theta_quad, + r_vals, + mpfr_prec = 256) { + + s_vals <- 0:max_cat + c_vals <- s_vals - ref + n_s <- length(s_vals) + n_r <- length(r_vals) + + # theta_part(s) + theta_part_num <- theta_lin * c_vals + theta_quad * c_vals^2 + + # MPFR parameters + tl_mp <- mpfr(theta_lin, mpfr_prec) + tq_mp <- mpfr(theta_quad, mpfr_prec) + s_mp <- mpfr(s_vals, mpfr_prec) + c_mp <- mpfr(c_vals, mpfr_prec) + + # Precompute for preexp methods + exp_theta <- exp(theta_part_num) + + res <- data.frame( + r = r_vals, + bound = NA_real_, + pow_bound = NA_real_, + err_direct = NA_real_, + err_bound = NA_real_, + err_preexp = NA_real_, + err_preexp_bound= NA_real_ + ) + + for (i in seq_len(n_r)) { + r <- r_vals[i] + r_mp <- mpfr(r, mpfr_prec) + + ## MPFR reference probabilities (softmax with scaling) + term_mp <- tl_mp * c_mp + + tq_mp * c_mp * c_mp + + s_mp * r_mp + + M_num <- max(asNumeric(term_mp)) + M_mp <- mpfr(M_num, mpfr_prec) + + num_ref_mp <- exp(term_mp - M_mp) + Z_ref_mp <- sum(num_ref_mp) + p_ref_mp <- num_ref_mp / Z_ref_mp + p_ref <- asNumeric(p_ref_mp) + + ## Double: exponents + term_num <- theta_part_num + s_vals * r + M <- max(term_num) + res$bound[i] <- M + res$pow_bound[i] <- max_cat * r - M + + ## (1) direct_unscaled + num_dir <- exp(term_num) + den_dir <- sum(num_dir) + p_dir <- num_dir / den_dir + + ## (2) direct_bound + num_b <- exp(term_num - M) + den_b <- sum(num_b) + p_b <- num_b / den_b + + ## (3) preexp_unscaled + eR <- exp(r) + pow <- eR + num_pre <- numeric(n_s) + den_pre <- 0.0 + + # s = 0 term + num_pre[1] <- exp_theta[1] * 1.0 + den_pre <- den_pre + num_pre[1] + + if (max_cat >= 1) { + for (s in 1:max_cat) { + num_pre[s + 1] <- exp_theta[s + 1] * pow + den_pre <- den_pre + num_pre[s + 1] + pow <- pow * eR + } + } + p_pre <- num_pre / den_pre + + ## (4) preexp_bound + eR2 <- exp(r) + pow_b <- exp(-M) + num_preB <- numeric(n_s) + den_preB <- 0.0 + + for (s in 0:max_cat) { + idx <- s + 1 + num_preB[idx] <- exp_theta[idx] * pow_b + den_preB <- den_preB + num_preB[idx] + pow_b <- pow_b * eR2 + } + p_preB <- num_preB / den_preB + + ## Relative errors vs MPFR reference on non-negligible support + tau <- 1e-15 # <-- tweak this + + support_mask <- p_ref >= tau + if (!any(support_mask)) { + support_mask <- p_ref == max(p_ref) # degenerate case: all tiny, pick the max + } + + rel_direct <- abs(p_dir - p_ref)[support_mask] / p_ref[support_mask] + rel_bound <- abs(p_b - p_ref)[support_mask] / p_ref[support_mask] + rel_preexp <- abs(p_pre - p_ref)[support_mask] / p_ref[support_mask] + rel_preB <- abs(p_preB - p_ref)[support_mask] / p_ref[support_mask] + + res$err_direct[i] <- max(rel_direct) + res$err_bound[i] <- max(rel_bound) + res$err_preexp[i] <- max(rel_preexp) + res$err_preexp_bound[i] <- max(rel_preB) + + + + } + + res +} + +############################################################ +# 2. Sweep across param_grid × r_vals +############################################################ + +simulate_bc_prob_4methods <- function(param_grid, + r_vals, + mpfr_prec = 256, + tol = 1e-12) { + + if (!all(c("max_cat", "ref", "theta_lin", "theta_quad") %in% names(param_grid))) { + stop("param_grid must have columns: max_cat, ref, theta_lin, theta_quad") + } + + out_list <- vector("list", nrow(param_grid)) + + for (cfg_idx in seq_len(nrow(param_grid))) { + cfg <- param_grid[cfg_idx, ] + + res_cfg <- compare_bc_prob_4methods_one( + max_cat = cfg$max_cat, + ref = cfg$ref, + theta_lin = cfg$theta_lin, + theta_quad = cfg$theta_quad, + r_vals = r_vals, + mpfr_prec = mpfr_prec + ) + + res_cfg$config_id <- cfg_idx + res_cfg$max_cat <- cfg$max_cat + res_cfg$ref <- cfg$ref + res_cfg$theta_lin <- cfg$theta_lin + res_cfg$theta_quad <- cfg$theta_quad + + # simple ok flags + res_cfg$ok_direct <- is.finite(res_cfg$err_direct) & (res_cfg$err_direct < tol) + res_cfg$ok_bound <- is.finite(res_cfg$err_bound) & (res_cfg$err_bound < tol) + res_cfg$ok_preexp <- is.finite(res_cfg$err_preexp) & (res_cfg$err_preexp < tol) + res_cfg$ok_preexp_bound <- is.finite(res_cfg$err_preexp_bound) & (res_cfg$err_preexp_bound < tol) + + out_list[[cfg_idx]] <- res_cfg + } + + do.call(rbind, out_list) +} + +############################################################ +# 3. Example broad analysis (you can adjust this) +############################################################ + +param_grid <- expand.grid( + max_cat = c(4, 10), + ref = c(0, 2, 4, 5, 10), + theta_lin = c(-0.5, 0.0, 0.5), + theta_quad = c(-0.2, 0.0, 0.2), + KEEP.OUT.ATTRS = FALSE, + stringsAsFactors = FALSE +) + +r_vals <- seq(-80, 80, length.out = 2001) +tol <- 1e-12 + +sim4 <- simulate_bc_prob_4methods( + param_grid = param_grid, + r_vals = r_vals, + mpfr_prec = 256, + tol = tol +) + +############################################################ +# 4. Summaries: where each method fails, as a function of bound/pow_bound +############################################################ + +df4 <- sim4 %>% + mutate( + abs_bound = abs(bound), + err_direct_cl = pmax(err_direct, 1e-300), + err_bound_cl = pmax(err_bound, 1e-300), + err_preexp_cl = pmax(err_preexp, 1e-300), + err_preexp_bound_cl = pmax(err_preexp_bound, 1e-300), + log_err_direct = log10(err_direct_cl), + log_err_bound = log10(err_bound_cl), + log_err_preexp = log10(err_preexp_cl), + log_err_preexp_bound= log10(err_preexp_bound_cl) + ) + +# Example: failures for each method inside |bound| <= 709 & pow_bound <= 709 +inside <- df4 %>% + filter(abs(bound) <= EXP_BOUND, pow_bound <= EXP_BOUND) + +n_direct_fail <- sum(!inside$ok_direct) +n_bound_fail <- sum(!inside$ok_bound) +n_preexp_fail <- sum(!inside$ok_preexp) +n_preexp_bound_fail <- sum(!inside$ok_preexp_bound) + +cat("\nFailures inside fast region (|bound| <= 709 & pow_bound <= 709):\n") +cat(" direct_unscaled :", n_direct_fail, "\n") +cat(" direct_bound :", n_bound_fail, "\n") +cat(" preexp_unscaled :", n_preexp_fail, "\n") +cat(" preexp_bound (FAST) :", n_preexp_bound_fail, "\n\n") diff --git a/dev/numerical_analyses/bgm_regularordinal_normalization_PL.R b/dev/numerical_analyses/bgm_regularordinal_normalization_PL.R new file mode 100644 index 0000000..8a6219b --- /dev/null +++ b/dev/numerical_analyses/bgm_regularordinal_normalization_PL.R @@ -0,0 +1,695 @@ +################################################################################ +# Reference: Numerical stability study for bounded vs. unbounded exponential sums +# Author: [Your Name] +# Date: [YYYY-MM-DD] +# +# Purpose: +# Evaluate and compare four ways to compute the sum +# +# S = 1 + Σ_{c=1..K} exp( m_c + (c+1)*r ) +# +# where r may vary widely. The goal is to identify numerically stable and +# computationally efficient formulations for use in gradient calculations. +# +# Methods compared: +# (1) direct – naive computation using raw exp() +# (2) bounded – stabilized by subtracting a "bound" (i.e., scaled domain) +# (3) preexp – precomputes exp(m_c) and exp(r) to replace repeated calls +# (4) preexp_bound – preexp variant with the same "bound" scaling +# +# For each method, we compute both unscaled and scaled variants where relevant, +# and compare them against a high-precision MPFR reference. +# +# Key insight: +# - For large negative r, preexp can lose precision (tiny multiplicative updates). +# - For large positive r, bounded scaling avoids overflow. +# - The combination (preexp + bound) gives the best general stability. +# +# Output: +# - res: data frame with per-r results and relative errors +# - Diagnostic plots and summary tables for numerical accuracy +################################################################################ + +library(Rmpfr) # for arbitrary precision reference computations + + +################################################################################ +# 1. Core comparison function +################################################################################ +compare_all_methods <- function(K = 5, + r_vals = seq(-10, 10, length.out = 200), + m_vals = NULL, + mpfr_prec = 256) { + # --------------------------------------------------------------------------- + # Parameters: + # K – number of categories (terms in the sum) + # r_vals – vector of r values to evaluate over + # m_vals – optional vector of m_c values; random if NULL + # mpfr_prec – bits of precision for the high-precision reference + # + # Returns: + # A data.frame containing per-r computed values, reference values, + # relative errors, and failure flags. + # --------------------------------------------------------------------------- + + if (is.null(m_vals)) m_vals <- runif(K, -1, 1) + + results <- data.frame( + r = r_vals, + direct = NA_real_, + bounded = NA_real_, # scaled-domain computation (exp(-bound) factor) + preexp = NA_real_, + preexp_bound = NA_real_, # scaled-domain computation + ref = NA_real_, # unscaled MPFR reference + ref_scaled = NA_real_, # scaled reference + err_direct = NA_real_, + err_bounded = NA_real_, + err_preexp = NA_real_, + err_preexp_bound = NA_real_, + ref_failed_unscaled = FALSE, + ref_failed_scaled = FALSE + ) + + # Loop over all r-values + for (i in seq_along(r_vals)) { + r <- r_vals[i] + bound <- K * r # can be unclipped; use max(0, K*r) for the clipped version + + # --- (0) High-precision MPFR reference ----------------------------------- + r_mp <- mpfr(r, precBits = mpfr_prec) + m_mp <- mpfr(m_vals, precBits = mpfr_prec) + b_mp <- mpfr(bound, precBits = mpfr_prec) + + ref_unscaled_mp <- 1 + sum(exp(m_mp + (1:K) * r_mp)) + ref_scaled_mp <- exp(-b_mp) * ref_unscaled_mp + + # Convert to doubles for inspection + ref_unscaled_num <- asNumeric(ref_unscaled_mp) + ref_scaled_num <- asNumeric(ref_scaled_mp) + results$ref_failed_unscaled[i] <- !is.finite(ref_unscaled_num) + results$ref_failed_scaled[i] <- !is.finite(ref_scaled_num) + results$ref[i] <- if (is.finite(ref_unscaled_num)) ref_unscaled_num else NA_real_ + results$ref_scaled[i] <- if (is.finite(ref_scaled_num)) ref_scaled_num else NA_real_ + + # --- (1) Direct exponential sum (unscaled) ------------------------------- + results$direct[i] <- 1 + sum(exp(m_vals + (1:K) * r)) + + # --- (2) Current bounded implementation (scaled) ------------------------- + eB <- exp(-bound) + results$bounded[i] <- eB + sum(exp(m_vals + (1:K) * r - bound)) + + # --- (3) Precomputed exp only (unscaled) --------------------------------- + exp_r <- exp(r) + exp_m <- exp(m_vals) + powE <- exp_r + S_pre <- 1.0 + for (c in 1:K) { + S_pre <- S_pre + exp_m[c] * powE + powE <- powE * exp_r + } + results$preexp[i] <- S_pre + + # --- (4) Precomputed exp + bound scaling (scaled) ------------------------ + exp_r <- exp(r) + exp_m <- exp(m_vals) + powE <- exp_r + S_preB <- eB + for (c in 1:K) { + S_preB <- S_preB + exp_m[c] * powE * eB + powE <- powE * exp_r + } + results$preexp_bound[i] <- S_preB + + # --- (5) Relative errors vs references ----------------------------------- + # Unscaled methods + for (m in c("direct", "preexp")) { + val <- results[[m]][i] + if (is.finite(val)) { + val_mp <- mpfr(val, precBits = mpfr_prec) + err_mp <- abs((val_mp - ref_unscaled_mp) / ref_unscaled_mp) + results[[paste0("err_", m)]][i] <- asNumeric(err_mp) + } + } + + # Scaled methods + for (m in c("bounded", "preexp_bound")) { + val <- results[[m]][i] + if (is.finite(val)) { + val_mp <- mpfr(val, precBits = mpfr_prec) + err_mp <- abs((val_mp - ref_scaled_mp) / ref_scaled_mp) + results[[paste0("err_", m)]][i] <- asNumeric(err_mp) + } + } + } + + msg_a <- mean(results$ref_failed_unscaled) + msg_b <- mean(results$ref_failed_scaled) + message(sprintf("Ref (unscaled) non-finite in %.1f%%; Ref (scaled) non-finite in %.1f%% of r-values", + 100 * msg_a, 100 * msg_b)) + results +} + + +################################################################################ +# 2. Plotting: log-scale accuracy with failure marking +################################################################################ +plot_errors <- function(res) { + err_cols <- c("err_bounded", "err_direct", "err_preexp", "err_preexp_bound") + cols <- c("gray", "black", "red", "blue") + names(cols) <- err_cols + + # Compute a robust ylim (1st–99th percentile) + finite_vals <- unlist(res[err_cols]) + finite_vals <- finite_vals[is.finite(finite_vals) & finite_vals > 0] + if (length(finite_vals) > 0) { + lower <- quantile(finite_vals, 0.01, na.rm = TRUE) + upper <- quantile(finite_vals, 0.99, na.rm = TRUE) + ylim <- c(lower / 10, upper * 10) + } else { + ylim <- c(1e-20, 1e-12) + } + + # Baseline curve: bounded + plot(res$r, res$err_bounded, type = "l", log = "y", + col = cols["err_bounded"], lwd = 2, + ylim = ylim, + xlab = "r", ylab = "Relative error", + main = "Accuracy and failure regions") + + # Add other methods + for (e in setdiff(err_cols, "err_bounded")) + lines(res$r, res[[e]], col = cols[e], lwd = 2) + + abline(h = .Machine$double.eps, col = "darkgray", lty = 2) + + legend("bottomright", + legend = c("Current bounded", "Direct exp", + "Preexp only", "Preexp + bound"), + col = cols, lwd = 2, bty = "n") + + # Mark numeric failures + for (e in err_cols) { + bad <- which(!is.finite(res[[e]]) | res[[e]] <= 0) + if (length(bad) > 0) + points(res$r[bad], rep(ylim[1], length(bad)), + pch = 21, col = cols[e], bg = cols[e], cex = 0.6) + } + + legend("bottomleft", legend = "dots = 0/Inf/NaN failures", bty = "n") +} + + +################################################################################ +# 3. Summarize accuracy across r +################################################################################ +summarize_accuracy <- function(res) { + err_cols <- c("err_direct", "err_bounded", "err_preexp", "err_preexp_bound") + + summary <- data.frame( + Method = c("Direct exp", "Current bounded", + "Preexp only", "Preexp + bound"), + Mean_error = NA_real_, + Median_error = NA_real_, + Max_error = NA_real_, + Finite_fraction = NA_real_, + Zero_or_Inf_fraction = NA_real_ + ) + + for (j in seq_along(err_cols)) { + e <- res[[err_cols[j]]] + finite_mask <- is.finite(e) & e > 0 + summary$Mean_error[j] <- mean(e[finite_mask], na.rm = TRUE) + summary$Median_error[j] <- median(e[finite_mask], na.rm = TRUE) + summary$Max_error[j] <- max(e[finite_mask], na.rm = TRUE) + summary$Finite_fraction[j] <- mean(finite_mask) + summary$Zero_or_Inf_fraction[j] <- 1 - mean(finite_mask) + } + + summary +} + + +################################################################################ +# 4. Alternate jitter plot for fine-scale comparison +################################################################################ +plot_errors_jitter <- function(res, offset_for_visibility = TRUE) { + err_cols <- c("err_bounded", "err_direct", "err_preexp", "err_preexp_bound") + cols <- c("gray", "black", "red", "blue") + + message("Plotting columns:") + for (i in seq_along(err_cols)) + message(sprintf(" %-15s -> %s", err_cols[i], cols[i])) + + offset_factor <- if (offset_for_visibility) c(1, 5, 100, 1e4) else rep(1, 4) + + finite_vals <- unlist(res[err_cols]) + finite_vals <- finite_vals[is.finite(finite_vals) & finite_vals > 0] + if (length(finite_vals) > 0) { + lower <- quantile(finite_vals, 0.01, na.rm = TRUE) + upper <- quantile(finite_vals, 0.99, na.rm = TRUE) + ylim <- c(lower / 10, upper * 10) + } else ylim <- c(1e-20, 1e-12) + + plot(res$r, res$err_bounded * offset_factor[1], + type = "l", log = "y", lwd = 2, col = cols[1], + ylim = ylim, + xlab = "r", ylab = "Relative error", + main = "Accuracy (offset for visibility)") + + for (j in 2:length(err_cols)) + lines(res$r, res[[err_cols[j]]] * offset_factor[j], col = cols[j], lwd = 2) + + abline(h = .Machine$double.eps, col = "darkgray", lty = 2) + legend("bottomright", + legend = c("Current bounded", "Direct exp", "Preexp only", "Preexp + bound"), + col = cols, lwd = 2) +} + + +################################################################################ +# 5. Example usage +################################################################################ +# Run test for a moderate K and r-range. +# Expand range (e.g. seq(-100, 80, 1)) to probe overflow/underflow limits. +# res <- compare_all_methods(K = 10, r_vals = seq(-71, 71, length.out = 1e4)) +# +# # Plot and summarize +# plot_errors(res) +# summary_table <- summarize_accuracy(res) +# print(summary_table, digits = 3) +# plot_errors_jitter(res) # optional visualization with offsets +################################################################################ + + +################################################################################ +# 6. Ratio stability check (direct vs preexp) × (bound vs clipped) +################################################################################ +compare_prob_ratios <- function(K = 5, + r_vals = seq(-20, 20, length.out = 200), + m_vals = NULL, + mpfr_prec = 256) { + + if (!requireNamespace("Rmpfr", quietly = TRUE)) + stop("Please install Rmpfr: install.packages('Rmpfr')") + + if (is.null(m_vals)) m_vals <- runif(K, -1, 1) + + res <- data.frame( + r = numeric(length(r_vals)), + err_direct_bound = numeric(length(r_vals)), + err_direct_clip = numeric(length(r_vals)), + err_preexp_bound = numeric(length(r_vals)), + err_preexp_clip = numeric(length(r_vals)) + ) + + for (i in seq_along(r_vals)) { + r <- r_vals[i] + b_raw <- K * r + b_clip <- max(0, b_raw) + + # --- High-precision reference --------------------------------------------- + r_mp <- Rmpfr::mpfr(r, precBits = mpfr_prec) + m_mp <- Rmpfr::mpfr(m_vals, precBits = mpfr_prec) + exp_terms_ref <- exp(m_mp + (1:K) * r_mp) + denom_ref <- 1 + sum(exp_terms_ref) + p_ref_num <- as.numeric(exp_terms_ref / denom_ref) + + # --- (1) Direct, un-clipped bound ---------------------------------------- + exp_terms_dB <- exp(m_vals + (1:K) * r - b_raw) + denom_dB <- exp(-b_raw) + sum(exp_terms_dB) + p_dB <- exp_terms_dB / denom_dB + res$err_direct_bound[i] <- max(abs(p_dB - p_ref_num) / p_ref_num) + + # --- (2) Direct, clipped bound ------------------------------------------- + exp_terms_dC <- exp(m_vals + (1:K) * r - b_clip) + denom_dC <- exp(-b_clip) + sum(exp_terms_dC) + p_dC <- exp_terms_dC / denom_dC + res$err_direct_clip[i] <- max(abs(p_dC - p_ref_num) / p_ref_num) + + # --- (3) Preexp, un-clipped bound --------------------------------------- + eR <- exp(r) + eM <- exp(m_vals) + eB <- exp(-b_raw) + powE <- eR + S_preB <- eB + terms_preB <- numeric(K) + for (c in 1:K) { + term <- eM[c] * powE * eB + terms_preB[c] <- term + S_preB <- S_preB + term + powE <- powE * eR + } + p_preB <- terms_preB / S_preB + res$err_preexp_bound[i] <- max(abs(p_preB - p_ref_num) / p_ref_num) + + # --- (4) Preexp, clipped bound ------------------------------------------ + eR <- exp(r) + eM <- exp(m_vals) + eB <- exp(-b_clip) + powE <- eR + S_preC <- eB + terms_preC <- numeric(K) + for (c in 1:K) { + term <- eM[c] * powE * eB + terms_preC[c] <- term + S_preC <- S_preC + term + powE <- powE * eR + } + p_preC <- terms_preC / S_preC + res$err_preexp_clip[i] <- max(abs(p_preC - p_ref_num) / p_ref_num) + + res$r[i] <- r + } + + return(res) +} + + +################################################################################ +# 7. Example usage: compare probability ratio stability +################################################################################ + +# K <- 10 +# r_vals <- seq(-75, 75, length.out = 1e4) +# set.seed(123) +# m_vals <- runif(K, -1, 1) +# +# res_ratio <- compare_prob_ratios(K = K, r_vals = r_vals, m_vals = m_vals) +# +# eps <- .Machine$double.eps +# plot(res_ratio$r, pmax(res_ratio$err_direct_bound, eps), +# type = "l", log = "y", lwd = 2, col = "red", +# xlab = "r", ylab = "Relative error (vs MPFR reference)", +# main = "Numerical stability of p_c ratio computations — 4 variants") +# +# lines(res_ratio$r, pmax(res_ratio$err_direct_clip, eps), col = "blue", lwd = 2) +# lines(res_ratio$r, pmax(res_ratio$err_preexp_bound, eps), col = "orange", lwd = 2) +# lines(res_ratio$r, pmax(res_ratio$err_preexp_clip, eps), col = "purple", lwd = 2) +# +# abline(h = .Machine$double.eps, col = "darkgray", lty = 2) +# legend("top", +# legend = c("Direct + Bound", "Direct + Clipped Bound", +# "Preexp + Bound", "Preexp + Clipped Bound"), +# col = c("red", "blue", "orange", "purple"), +# lwd = 2, bty = "n") +# +# abline(v = -70) +# abline(v = 70) +# +# # Summarize numeric accuracy +# summary_df <- data.frame( +# Method = c("Direct + Bound", "Direct + Clipped Bound", +# "Preexp + Bound", "Preexp + Clipped Bound"), +# Mean_error = c(mean(res_ratio$err_direct_bound, na.rm = TRUE), +# mean(res_ratio$err_direct_clip, na.rm = TRUE), +# mean(res_ratio$err_preexp_bound, na.rm = TRUE), +# mean(res_ratio$err_preexp_clip, na.rm = TRUE)), +# Median_error = c(median(res_ratio$err_direct_bound, na.rm = TRUE), +# median(res_ratio$err_direct_clip, na.rm = TRUE), +# median(res_ratio$err_preexp_bound, na.rm = TRUE), +# median(res_ratio$err_preexp_clip, na.rm = TRUE)), +# Max_error = c(max(res_ratio$err_direct_bound, na.rm = TRUE), +# max(res_ratio$err_direct_clip, na.rm = TRUE), +# max(res_ratio$err_preexp_bound, na.rm = TRUE), +# max(res_ratio$err_preexp_clip, na.rm = TRUE)) +# ) +# print(summary_df, digits = 3) +################################################################################ + +############################################################ +# Blume–Capel probabilities: +# Numerical comparison of FAST vs SAFE methods +# +# Objective +# --------- +# For a single Blume–Capel configuration (max_cat, ref, theta_lin, theta_quad), +# and a grid of residual scores r, we compare +# +# p_s(r) ∝ exp( theta_part(s) + s * r ), s = 0..max_cat +# +# with +# +# theta_part(s) = theta_lin * (s - ref) + theta_quad * (s - ref)^2 +# +# computed three ways: +# +# (1) MPFR reference softmax (high precision) +# (2) SAFE : double, direct exponentials with bound (subtract M(r)) +# (3) FAST : double, preexp(theta_part) + power chain for exp(s*r - M(r)) +# +# We record, for each r: +# +# - numeric bound M(r) = max_s [theta_part(s) + s * r] +# - pow_bound = max_cat * r - M(r) +# - max relative error of SAFE +# - max relative error of FAST +# +# No fallbacks, no patching of non-finite values: we let under/overflow +# show up as Inf/NaN in the errors and inspect those. +############################################################ + +library(Rmpfr) # for high-precision reference + +############################################################ +# 1. Reference probabilities using MPFR +############################################################ + +bc_prob_ref_mpfr <- function(max_cat, ref, theta_lin, theta_quad, + r_vals, + mpfr_prec = 256) { + # categories and centered scores + s_vals <- 0:max_cat + c_vals <- s_vals - ref + + # MPFR parameters + tl <- mpfr(theta_lin, precBits = mpfr_prec) + tq <- mpfr(theta_quad, precBits = mpfr_prec) + s_mp <- mpfr(s_vals, precBits = mpfr_prec) + c_mp <- mpfr(c_vals, precBits = mpfr_prec) + + n_r <- length(r_vals) + n_s <- length(s_vals) + + P_ref <- matrix(NA_real_, nrow = n_r, ncol = n_s) + + for (i in seq_len(n_r)) { + r_mp <- mpfr(r_vals[i], precBits = mpfr_prec) + + # exponent(s) = theta_part(s) + s * r + term_mp <- tl * c_mp + tq * c_mp * c_mp + s_mp * r_mp + + # numeric bound M(r) + M_num <- max(asNumeric(term_mp)) + M_mp <- mpfr(M_num, precBits = mpfr_prec) + + # scaled numerators + num_mp <- exp(term_mp - M_mp) + Z_mp <- sum(num_mp) + p_mp <- num_mp / Z_mp + + P_ref[i, ] <- asNumeric(p_mp) + } + + P_ref +} + +############################################################ +# 2. SAFE probabilities (double, direct + bound) +############################################################ + +bc_prob_safe <- function(max_cat, ref, theta_lin, theta_quad, + r_vals) { + s_vals <- 0:max_cat + c_vals <- s_vals - ref + + theta_part <- theta_lin * c_vals + theta_quad * c_vals^2 + + n_r <- length(r_vals) + n_s <- length(s_vals) + + P_safe <- matrix(NA_real_, nrow = n_r, ncol = n_s) + bound <- numeric(n_r) + + for (i in seq_len(n_r)) { + r <- r_vals[i] + + exps <- theta_part + s_vals * r + M <- max(exps) + bound[i] <- M + + numer <- exp(exps - M) + denom <- sum(numer) + + # no fallback here; denom can be 0 or Inf + P_safe[i, ] <- numer / denom + } + + list( + probs = P_safe, + bound = bound + ) +} + +############################################################ +# 3. FAST probabilities (double, preexp + power chain) +############################################################ + +bc_prob_fast <- function(max_cat, ref, theta_lin, theta_quad, + r_vals) { + s_vals <- 0:max_cat + c_vals <- s_vals - ref + + theta_part <- theta_lin * c_vals + theta_quad * c_vals^2 + exp_theta <- exp(theta_part) + + n_r <- length(r_vals) + n_s <- length(s_vals) + + P_fast <- matrix(NA_real_, nrow = n_r, ncol = n_s) + bound <- numeric(n_r) + pow_bound <- numeric(n_r) + + for (i in seq_len(n_r)) { + r <- r_vals[i] + + # exponents before scaling + exps <- theta_part + s_vals * r + M <- max(exps) + bound[i] <- M + + # pow_bound = max_s (s*r - M) attained at s = max_cat + pow_bound[i] <- max_cat * r - M + + eR <- exp(r) + pow <- exp(-M) + + numer <- numeric(n_s) + denom <- 0 + + for (j in seq_len(n_s)) { + numer[j] <- exp_theta[j] * pow + denom <- denom + numer[j] + pow <- pow * eR + } + + # again: no fallback; denom can be 0/Inf + P_fast[i, ] <- numer / denom + } + + list( + probs = P_fast, + bound = bound, + pow_bound = pow_bound + ) +} + +############################################################ +# 4. Core comparison function (one BC config) +############################################################ + +compare_bc_prob_methods <- function(max_cat = 4, + ref = 2, + theta_lin = 0.0, + theta_quad = 0.0, + r_vals = seq(-20, 20, length.out = 200), + mpfr_prec = 256) { + # MPFR reference + P_ref <- bc_prob_ref_mpfr( + max_cat = max_cat, + ref = ref, + theta_lin = theta_lin, + theta_quad = theta_quad, + r_vals = r_vals, + mpfr_prec = mpfr_prec + ) + + # SAFE + safe_res <- bc_prob_safe( + max_cat = max_cat, + ref = ref, + theta_lin = theta_lin, + theta_quad = theta_quad, + r_vals = r_vals + ) + P_safe <- safe_res$probs + bound_safe <- safe_res$bound + + # FAST + fast_res <- bc_prob_fast( + max_cat = max_cat, + ref = ref, + theta_lin = theta_lin, + theta_quad = theta_quad, + r_vals = r_vals + ) + P_fast <- fast_res$probs + bound_fast <- fast_res$bound + pow_bound <- fast_res$pow_bound + + stopifnot(all.equal(bound_safe, bound_fast)) + + n_r <- length(r_vals) + + res <- data.frame( + r = r_vals, + bound = bound_fast, + pow_bound = pow_bound, + err_safe = NA_real_, + err_fast = NA_real_ + ) + + for (i in seq_len(n_r)) { + p_ref <- P_ref[i, ] + p_safe <- P_safe[i, ] + p_fast <- P_fast[i, ] + + # max relative error vs MPFR reference + # (this is exactly in the spirit of compare_prob_ratios) + res$err_safe[i] <- max(abs(p_safe - p_ref) / p_ref) + res$err_fast[i] <- max(abs(p_fast - p_ref) / p_ref) + } + + res +} + +############################################################ +# 5. Example usage +############################################################ + +# Example: small BC variable +# max_cat <- 4 +# ref <- 2 +# theta_lin <- 0.3 +# theta_quad <- -0.1 +# r_vals <- seq(-80, 80, length.out = 2000) +# +# res_bc <- compare_bc_prob_methods( +# max_cat = max_cat, +# ref = ref, +# theta_lin = theta_lin, +# theta_quad = theta_quad, +# r_vals = r_vals, +# mpfr_prec = 256 +# ) +# +# # Quick inspection: log10 errors +# eps <- .Machine$double.eps +# plot(res_bc$r, pmax(res_bc$err_safe, eps), +# type = "l", log = "y", col = "black", lwd = 2, +# xlab = "r", ylab = "Relative error (vs MPFR)", +# main = "Blume–Capel probabilities: SAFE vs FAST") +# lines(res_bc$r, pmax(res_bc$err_fast, eps), col = "red", lwd = 2) +# abline(h = eps, col = "darkgray", lty = 2) +# legend("topright", +# legend = c("SAFE (direct + bound)", "FAST (preexp + power chain)"), +# col = c("black", "red"), +# lwd = 2, bty = "n") +# +# # You can then condition on bound/pow_bound just like in the +# # Blume–Capel normalization script to decide where FAST is safe. +############################################################ + + + + + diff --git a/man/mrfSampler.Rd b/man/mrfSampler.Rd index f57122b..d4b233c 100644 --- a/man/mrfSampler.Rd +++ b/man/mrfSampler.Rd @@ -11,7 +11,7 @@ mrfSampler( interactions, thresholds, variable_type = "ordinal", - reference_category, + baseline_category, iter = 1000 ) } @@ -43,8 +43,8 @@ for each variable separately. Currently, bgm supports ``ordinal'' and ``blume-capel''. Binary variables are automatically treated as ``ordinal’’. Defaults to \code{variable_type = "ordinal"}.} -\item{reference_category}{An integer vector of length \code{no_variables} specifying the -reference_category category that is used for the Blume-Capel model (details below). +\item{baseline_category}{An integer vector of length \code{no_variables} specifying the +baseline_category category that is used for the Blume-Capel model (details below). Can be any integer value between \code{0} and \code{no_categories} (or \code{no_categories[i]}).} @@ -71,7 +71,7 @@ useful for any type of ordinal variable and gives the user the most freedom in specifying their model. The Blume-Capel option is specifically designed for ordinal variables that -have a special type of reference_category category, such as the neutral +have a special type of baseline_category category, such as the neutral category in a Likert scale. The Blume-Capel model specifies the following quadratic model for the threshold parameters: \deqn{\mu_{\text{c}} = \alpha \times \text{c} + \beta \times (\text{c} - \text{r})^2,}{{\mu_{\text{c}} = \alpha \times \text{c} + \beta \times (\text{c} - \text{r})^2,}} @@ -81,8 +81,8 @@ across categories (increasing threshold values if \eqn{\alpha > 0}{\alpha > 0} and decreasing threshold values if \eqn{\alpha <0}{\alpha <0}), if \eqn{\beta < 0}{\beta < 0}, it offers an increasing penalty for responding in a category further away from the -reference_category category r, while \eqn{\beta > 0}{\beta > 0} suggests a -preference for responding in the reference_category category. +baseline_category category r, while \eqn{\beta > 0}{\beta > 0} suggests a +preference for responding in the baseline_category category. } \examples{ # Generate responses from a network of five binary and ordinal variables. @@ -125,7 +125,7 @@ x = mrfSampler( interactions = Interactions, thresholds = Thresholds, variable_type = c("b", "b", "o", "b", "o"), - reference_category = 2 + baseline_category = 2 ) } diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index f3fc966..2df4004 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -150,8 +150,8 @@ BEGIN_RCPP END_RCPP } // sample_bcomrf_gibbs -IntegerMatrix sample_bcomrf_gibbs(int no_states, int no_variables, IntegerVector no_categories, NumericMatrix interactions, NumericMatrix thresholds, StringVector variable_type, IntegerVector reference_category, int iter); -RcppExport SEXP _bgms_sample_bcomrf_gibbs(SEXP no_statesSEXP, SEXP no_variablesSEXP, SEXP no_categoriesSEXP, SEXP interactionsSEXP, SEXP thresholdsSEXP, SEXP variable_typeSEXP, SEXP reference_categorySEXP, SEXP iterSEXP) { +IntegerMatrix sample_bcomrf_gibbs(int no_states, int no_variables, IntegerVector no_categories, NumericMatrix interactions, NumericMatrix thresholds, StringVector variable_type, IntegerVector baseline_category, int iter); +RcppExport SEXP _bgms_sample_bcomrf_gibbs(SEXP no_statesSEXP, SEXP no_variablesSEXP, SEXP no_categoriesSEXP, SEXP interactionsSEXP, SEXP thresholdsSEXP, SEXP variable_typeSEXP, SEXP baseline_categorySEXP, SEXP iterSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; @@ -161,9 +161,9 @@ BEGIN_RCPP Rcpp::traits::input_parameter< NumericMatrix >::type interactions(interactionsSEXP); Rcpp::traits::input_parameter< NumericMatrix >::type thresholds(thresholdsSEXP); Rcpp::traits::input_parameter< StringVector >::type variable_type(variable_typeSEXP); - Rcpp::traits::input_parameter< IntegerVector >::type reference_category(reference_categorySEXP); + Rcpp::traits::input_parameter< IntegerVector >::type baseline_category(baseline_categorySEXP); Rcpp::traits::input_parameter< int >::type iter(iterSEXP); - rcpp_result_gen = Rcpp::wrap(sample_bcomrf_gibbs(no_states, no_variables, no_categories, interactions, thresholds, variable_type, reference_category, iter)); + rcpp_result_gen = Rcpp::wrap(sample_bcomrf_gibbs(no_states, no_variables, no_categories, interactions, thresholds, variable_type, baseline_category, iter)); return rcpp_result_gen; END_RCPP } diff --git a/src/bgmCompare_logp_and_grad.cpp b/src/bgmCompare_logp_and_grad.cpp index 789c6af..4807ff6 100644 --- a/src/bgmCompare_logp_and_grad.cpp +++ b/src/bgmCompare_logp_and_grad.cpp @@ -4,6 +4,7 @@ #include #include "explog_switch.h" #include "common_helpers.h" +#include "variable_helpers.h" @@ -91,7 +92,7 @@ double log_pseudoposterior( const arma::vec proj_g = projection.row(group).t(); // length = num_groups-1 // ---- build group-specific main & pairwise effects ---- - for (int v = 0; v < num_variables; ++v) { + for (int v = 0; v < num_variables; v++) { arma::vec me = compute_group_main_effects( v, num_groups, main_effects, main_effect_indices, proj_g ); @@ -100,7 +101,7 @@ double log_pseudoposterior( main_group(v, arma::span(0, me.n_elem - 1)) = me.t(); // upper triangle incl. base value; mirror to keep symmetry - for (int u = v; u < num_variables; ++u) { // Combines with loop over v + for (int u = v; u < num_variables; u++) { // Combines with loop over v if(u == v) continue; double w = compute_group_pairwise_effects( v, u, num_groups, pairwise_effects, pairwise_effect_indices, @@ -114,7 +115,7 @@ double log_pseudoposterior( const int num_cats = num_categories(v); if (is_ordinal_variable(v)) { // use group-specific main_effects - for (int c = 0; c < num_cats; ++c) { + for (int c = 0; c < num_cats; c++) { const double val = main_group(v, c); log_pp += static_cast(counts_per_category(c, v)) * val; } @@ -141,32 +142,25 @@ double log_pseudoposterior( // bound to stabilize exp; use group-specific params consistently arma::vec bound = num_cats * rest_score; - bound = arma::clamp(bound, 0.0, arma::datum::inf); - arma::vec denom(rest_score.n_elem, arma::fill::zeros); if (is_ordinal_variable(v)) { - // base term exp(-bound) - denom = ARMA_MY_EXP(-bound); - // main_effects from main_group - for (int c = 0; c < num_cats; ++c) { - const double th = main_group(v, c); - const arma::vec exponent = th + (c + 1) * rest_score - bound; - denom += ARMA_MY_EXP(exponent); - } + arma::vec main_eff = main_group.row(v).cols(0, num_cats - 1).t(); + denom = compute_denom_ordinal( + rest_score, main_eff, bound + ); } else { // linear/quadratic main effects from main_group const double lin_effect = main_group(v, 0); const double quad_effect = main_group(v, 1); const int ref = baseline_category(v); - for (int c = 0; c <= num_cats; ++c) { - const int centered = c - ref; - const double quad = quad_effect * centered * centered; - const double lin = lin_effect * c; - const arma::vec exponent = lin + quad + c * rest_score - bound; - denom += ARMA_MY_EXP(exponent); - } + + denom = compute_denom_blume_capel( + rest_score, lin_effect, quad_effect, ref, num_cats, + /*updated in place:*/bound + ); } + // - sum_i [ bound_i + log denom_i ] log_pp -= arma::accu(bound + ARMA_MY_LOG(denom)); } @@ -178,27 +172,27 @@ double log_pseudoposterior( }; // Main effects prior - for (int v = 0; v < num_variables; ++v) { + for (int v = 0; v < num_variables; v++) { const int row0 = main_effect_indices(v, 0); const int row1 = main_effect_indices(v, 1); - for (int r = row0; r <= row1; ++r) { + for (int r = row0; r <= row1; r++) { log_pp += log_beta_prior(main_effects(r, 0)); if (inclusion_indicator(v, v) == 0) continue; - for (int eff = 1; eff < num_groups; ++eff) { + for (int eff = 1; eff < num_groups; eff++) { log_pp += R::dcauchy(main_effects(r, eff), 0.0, difference_scale, true); } } } // Pairwise effects prior - for (int v1 = 0; v1 < num_variables - 1; ++v1) { - for (int v2 = v1 + 1; v2 < num_variables; ++v2) { + for (int v1 = 0; v1 < num_variables - 1; v1++) { + for (int v2 = v1 + 1; v2 < num_variables; v2++) { const int idx = pairwise_effect_indices(v1, v2); log_pp += R::dcauchy(pairwise_effects(idx, 0), 0.0, interaction_scale, true); if (inclusion_indicator(v1, v2) == 0) continue; - for (int eff = 1; eff < num_groups; ++eff) { + for (int eff = 1; eff < num_groups; eff++) { log_pp += R::dcauchy(pairwise_effects(idx, eff), 0.0, difference_scale, true); } } @@ -344,19 +338,19 @@ arma::vec gradient_observed_active( // ------------------------------- // Observed sufficient statistics // ------------------------------- - for (int g = 0; g < num_groups; ++g) { + for (int g = 0; g < num_groups; g++) { // list access arma::imat counts_per_category = counts_per_category_group[g]; arma::imat blume_capel_stats = blume_capel_stats_group[g]; const arma::vec proj_g = projection.row(g).t(); // length = num_groups-1 // Main effects - for (int v = 0; v < num_variables; ++v) { - const int base = main_effect_indices(v, 0); + for (int v = 0; v < num_variables; v++) { + const int base = main_effect_indices(v, 0); const int num_cats = num_categories(v); if (is_ordinal_variable(v)) { - for (int c = 0; c < num_cats; ++c) { + for (int c = 0; c < num_cats; c++) { const int count = counts_per_category(c, v); // overall off = main_index(base + c, 0); @@ -364,7 +358,7 @@ arma::vec gradient_observed_active( // diffs if(inclusion_indicator(v, v) != 0) { - for (int k = 1; k < num_groups; ++k) { + for (int k = 1; k < num_groups; k++) { off = main_index(base + c, k); grad_obs(off) += count * proj_g(k-1); } @@ -383,7 +377,7 @@ arma::vec gradient_observed_active( // diffs if(inclusion_indicator(v, v) != 0) { - for (int k = 1; k < num_groups; ++k) { + for (int k = 1; k < num_groups; k++) { off = main_index(base, k); grad_obs(off) += bc_0 * proj_g(k-1); @@ -396,8 +390,8 @@ arma::vec gradient_observed_active( // Pairwise (observed) arma::mat pairwise_stats = pairwise_stats_group[g]; - for (int v1 = 0; v1 < num_variables - 1; ++v1) { - for (int v2 = v1 + 1; v2 < num_variables; ++v2) { + for (int v1 = 0; v1 < num_variables - 1; v1++) { + for (int v2 = v1 + 1; v2 < num_variables; v2++) { const int row = pairwise_effect_indices(v1, v2); const double pw_stats = 2.0 * pairwise_stats(v1, v2); @@ -405,7 +399,7 @@ arma::vec gradient_observed_active( grad_obs(off) += pw_stats; // upper tri counted once if(inclusion_indicator(v1, v2) != 0){ - for (int k = 1; k < num_groups; ++k) { + for (int k = 1; k < num_groups; k++) { off = pair_index(row, k); grad_obs(off) += pw_stats * proj_g(k-1); } @@ -548,39 +542,30 @@ arma::vec gradient( const int num_group_obs = obs.n_rows; for (int v = 0; v < num_variables; ++v) { - const int K = num_categories(v); + const int K = num_categories(v); const int ref = baseline_category(v); arma::vec rest_score = residual_matrix.col(v); - arma::vec bound = K * rest_score; - bound.clamp(0.0, arma::datum::inf); - - arma::mat exponents(num_group_obs, K + 1, arma::fill::none); + arma::vec bound = K * rest_score; + arma::mat probs; if (is_ordinal_variable(v)) { - exponents.col(0) = -bound; - for (int j = 0; j < K; ++j) { - exponents.col(j + 1) = main_group(v, j) + (j + 1) * rest_score - bound; - } + arma::vec main_param = main_group.row(v).cols(0, K - 1).t(); + probs = compute_probs_ordinal( + main_param, rest_score, bound, K + ); } else { - const double lin_effect = main_group(v, 0); + const double lin_effect = main_group(v, 0); const double quad_effect = main_group(v, 1); - for (int s = 0; s <= K; ++s) { - const int centered = s - ref; - const double lin = lin_effect * s; - const double quad = quad_effect * centered * centered; - exponents.col(s) = lin + quad + s * rest_score - bound; - } + probs = compute_probs_blume_capel( + rest_score, lin_effect, quad_effect, ref, K, bound + ); } - arma::mat probs = ARMA_MY_EXP(exponents); - arma::vec denom = arma::sum(probs, 1); // base term - probs.each_col() /= denom; - // ---- MAIN expected ---- const int base = main_effect_indices(v, 0); if (is_ordinal_variable(v)) { - for (int s = 1; s <= K; ++s) { + for (int s = 1; s <= K; s++) { const int j = s - 1; double sum_col_s = arma::accu(probs.col(s)); @@ -588,14 +573,14 @@ arma::vec gradient( grad(off) -= sum_col_s; if (inclusion_indicator(v, v) == 0) continue; - for (int k = 1; k < num_groups; ++k) { + for (int k = 1; k < num_groups; k++) { off = main_index(base + j, k); grad(off) -= proj_g(k - 1) * sum_col_s; } } } else { - arma::vec lin_score = arma::regspace(0, K); // length K+1 - arma::vec quad_score = arma::square(lin_score - ref); + arma::vec lin_score = arma::regspace(0 - ref, K - ref); // length K+1 + arma::vec quad_score = arma::square(lin_score); double sum_lin = arma::accu(probs * lin_score); double sum_quad = arma::accu(probs * quad_score); @@ -606,7 +591,7 @@ arma::vec gradient( grad(off) -= sum_quad; if (inclusion_indicator(v, v) == 0) continue; - for (int k = 1; k < num_groups; ++k) { + for (int k = 1; k < num_groups; k++) { off = main_index(base, k); grad(off) -= proj_g(k - 1) * sum_lin; off = main_index(base + 1, k); @@ -615,12 +600,19 @@ arma::vec gradient( } // ---- PAIRWISE expected ---- - for (int v2 = 0; v2 < num_variables; ++v2) { + for (int v2 = 0; v2 < num_variables; v2++) { if (v == v2) continue; arma::vec expected_value(num_group_obs, arma::fill::zeros); - for (int s = 1; s <= K; ++s) { - expected_value += s * probs.col(s) % obs.col(v2); + if (is_ordinal_variable(v)) { + for (int s = 1; s <= K; ++s) { + expected_value += s * probs.col(s) % obs.col(v2); + } + } else { + for (int s = 0; s <= K; s++) { + int score = s - ref; + expected_value += score * probs.col(s) % obs.col(v2); + } } double sum_expectation = arma::accu(expected_value); @@ -631,7 +623,7 @@ arma::vec gradient( grad(off) -= sum_expectation; if (inclusion_indicator(v, v2) == 0) continue; - for (int k = 1; k < num_groups; ++k) { + for (int k = 1; k < num_groups; k++) { off = pair_index(row, k); grad(off) -= proj_g(k - 1) * sum_expectation; @@ -782,7 +774,7 @@ double log_pseudoposterior_main_component( int variable, int category, // for ordinal variables only int par, // for Blume-Capel variables only - int h // Overall = 0, differences are 1, .... + int h // Overall = 0, differences are 1,2,... ) { if(h > 0 && inclusion_indicator(variable, variable) == 0) { return 0.0; // No contribution if differences not included @@ -807,7 +799,7 @@ double log_pseudoposterior_main_component( variable, num_groups, main_effects, main_effect_indices, proj_g ); - // store into row v (padded with zeros if variable has < max_num_categories params) + // store into row v main_group(variable, arma::span(0, me.n_elem - 1)) = me.t(); // upper triangle incl. base value; mirror to keep symmetry @@ -824,8 +816,7 @@ double log_pseudoposterior_main_component( // ---- data contribution pseudolikelihood (linear terms) ---- if (is_ordinal_variable(variable)) { const double val = main_group(variable, category); - log_pp += static_cast(counts_per_category(category, variable)) * - val; + log_pp += static_cast(counts_per_category(category, variable)) * val; } else { log_pp += static_cast(blume_capel_stats(par, variable)) * main_group(variable, par); @@ -842,31 +833,25 @@ double log_pseudoposterior_main_component( // bound to stabilize exp; use group-specific params consistently arma::vec bound = num_cats * rest_score; - bound = arma::clamp(bound, 0.0, arma::datum::inf); - arma::vec denom(rest_score.n_elem, arma::fill::zeros); + if (is_ordinal_variable(variable)) { - // base term exp(-bound) - denom = ARMA_MY_EXP(-bound); - // main_effects from main_group - for (int cat = 0; cat < num_cats; cat++) { - const double th = main_group(variable, cat); - const arma::vec exponent = th + (cat + 1) * rest_score - bound; - denom += ARMA_MY_EXP(exponent); - } + arma::vec main_eff = main_group.row(variable).cols(0, num_cats - 1).t(); + denom = compute_denom_ordinal( + rest_score, main_eff, bound + ); } else { // linear/quadratic main effects from main_group const double lin_effect = main_group(variable, 0); const double quad_effect = main_group(variable, 1); const int ref = baseline_category(variable); - for (int cat = 0; cat <= num_cats; cat++) { - const int centered = cat - ref; - const double quad = quad_effect * centered * centered; - const double lin = lin_effect * cat; - const arma::vec exponent = lin + quad + cat * rest_score - bound; - denom += ARMA_MY_EXP(exponent); - } + + denom = compute_denom_blume_capel( + rest_score, lin_effect, quad_effect, ref, num_cats, + /*updated in place:*/bound + ); } + // - sum_i [ bound_i + log denom_i ] log_pp -= arma::accu(bound + ARMA_MY_LOG(denom)); } @@ -1025,32 +1010,25 @@ double log_pseudoposterior_pair_component( // bound to stabilize exp; use group-specific params consistently arma::vec bound = num_cats * rest_score; - bound = arma::clamp(bound, 0.0, arma::datum::inf); - arma::vec denom(rest_score.n_elem, arma::fill::zeros); if (is_ordinal_variable(v)) { - // base term exp(-bound) - denom = ARMA_MY_EXP(-bound); - // main_effects from main_group - for (int c = 0; c < num_cats; ++c) { - const double th = main_group(v, c); - const arma::vec exponent = th + (c + 1) * rest_score - bound; - denom += ARMA_MY_EXP(exponent); - } + arma::vec main_eff = main_group.row(v).cols(0, num_cats - 1).t(); + denom = compute_denom_ordinal( + rest_score, main_eff, bound + ); } else { // linear/quadratic main effects from main_group const double lin_effect = main_group(v, 0); const double quad_effect = main_group(v, 1); const int ref = baseline_category(v); - for (int c = 0; c <= num_cats; ++c) { - const int centered = c - ref; - const double quad = quad_effect * centered * centered; - const double lin = lin_effect * c; - const arma::vec exponent = lin + quad + c * rest_score - bound; - denom += ARMA_MY_EXP(exponent); - } + + denom = compute_denom_blume_capel( + rest_score, lin_effect, quad_effect, ref, num_cats, + /*updated in place:*/bound + ); } + // - sum_i [ bound_i + log denom_i ] log_pp -= arma::accu(bound + ARMA_MY_LOG(denom)); } @@ -1173,40 +1151,31 @@ double log_ratio_pseudolikelihood_constant_variable( arma::vec denom_current(rest_current.n_elem, arma::fill::zeros); arma::vec denom_proposed(rest_proposed.n_elem, arma::fill::zeros); - if (is_ordinal_variable(variable)) { - // regular ordinal/binary - bound_current = num_cats * arma::clamp(rest_current, 0.0, arma::datum::inf); - bound_proposed = num_cats * arma::clamp(rest_proposed, 0.0, arma::datum::inf); - - denom_current = ARMA_MY_EXP(-bound_current); - denom_proposed = ARMA_MY_EXP(-bound_proposed); + if (is_ordinal_variable (variable)) { + bound_current = rest_current * num_cats; + bound_proposed = rest_proposed * num_cats; - for (int c = 0; c < num_cats; ++c) { - denom_current += ARMA_MY_EXP(main_current(c) + (c + 1) * rest_current - bound_current); - denom_proposed += ARMA_MY_EXP(main_proposed(c) + (c + 1) * rest_proposed - bound_proposed); - } + denom_current += compute_denom_ordinal( + rest_current, main_current, bound_current + ); + denom_proposed += compute_denom_ordinal( + rest_proposed, main_proposed, bound_proposed + ); } else { - // Blume-Capel: linear + quadratic - const int ref = baseline_category(variable); - - arma::vec const_current(num_cats + 1, arma::fill::zeros); - arma::vec const_proposed(num_cats + 1, arma::fill::zeros); - for (int s = 0; s <= num_cats; ++s) { - const int centered = s - ref; - const_current(s) = main_current(0) * s + main_current(1) * centered * centered; - const_proposed(s) = main_proposed(0) * s + main_proposed(1) * centered * centered; - } - - double lbound = std::max(const_current.max(), const_proposed.max()); - if (lbound < 0.0) lbound = 0.0; - - bound_current = lbound + num_cats * arma::clamp(rest_current, 0.0, arma::datum::inf); - bound_proposed = lbound + num_cats * arma::clamp(rest_proposed, 0.0, arma::datum::inf); + // Binary or categorical variable: linear + quadratic score + const int ref_cat = baseline_category (variable); + bound_current = rest_current * num_cats; + bound_proposed = rest_proposed * num_cats; + + denom_current = compute_denom_blume_capel( + rest_current, main_current (0), main_current (1), + ref_cat, num_cats, /*Updated in place:*/bound_current + ); - for (int s = 0; s <= num_cats; ++s) { - denom_current += ARMA_MY_EXP(const_current(s) + s * rest_current - bound_current); - denom_proposed += ARMA_MY_EXP(const_proposed(s) + s * rest_proposed - bound_proposed); - } + denom_proposed = compute_denom_blume_capel( + rest_proposed, main_proposed (0), main_proposed (1), + ref_cat, num_cats, /*Updated in place:*/bound_proposed + ); } // --- accumulate contribution --- diff --git a/src/bgmCompare_sampler.cpp b/src/bgmCompare_sampler.cpp index cf6b91d..1485346 100644 --- a/src/bgmCompare_sampler.cpp +++ b/src/bgmCompare_sampler.cpp @@ -17,7 +17,6 @@ - /** * Imputes missing observations for the bgmCompare model. * @@ -89,7 +88,7 @@ void impute_missing_bgmcompare( arma::vec category_response_probabilities(max_num_categories + 1); double exponent, cumsum, u; - int score, person, variable, new_observation, old_observation, group; + int score, person, variable, new_value, old_value, group; //Impute missing data for(int missing = 0; missing < num_missings; missing++) { @@ -132,12 +131,12 @@ void impute_missing_bgmcompare( } else { // For Blume-Capel variables cumsum = 0.0; + const int ref = baseline_category[variable]; for(int category = 0; category <= num_categories(variable); category++) { - exponent = group_main_effects[0] * category; - exponent += group_main_effects[1] * - (category - baseline_category[variable]) * - (category - baseline_category[variable]); - exponent += category * rest_score; + score = category - ref; + exponent = group_main_effects[0] * score; + exponent += group_main_effects[1] * score * score; + exponent += rest_score * score; cumsum += MY_EXP(exponent); category_response_probabilities[category] = cumsum; } @@ -149,31 +148,30 @@ void impute_missing_bgmcompare( while (u > category_response_probabilities[score]) { score++; } - new_observation = score; - old_observation = observations(person, variable); - if(old_observation != new_observation) { + new_value = score; + if(!is_ordinal_variable[variable]) + new_value -= baseline_category[variable]; + old_value = observations(person, variable); + + if(old_value != new_value) { // Update raw observations - observations(person, variable) = new_observation; + observations(person, variable) = new_value; // Update sufficient statistics for main effects if(is_ordinal_variable[variable] == true) { arma::imat counts_per_category_group = counts_per_category[group]; - if(old_observation > 0) - counts_per_category_group(old_observation-1, variable)--; - if(new_observation > 0) - counts_per_category_group(new_observation-1, variable)++; + if(old_value > 0) + counts_per_category_group(old_value-1, variable)--; + if(new_value > 0) + counts_per_category_group(new_value-1, variable)++; counts_per_category[group] = counts_per_category_group; } else { arma::imat blume_capel_stats_group = blume_capel_stats[group]; - blume_capel_stats_group(0, variable) -= old_observation; - blume_capel_stats_group(0, variable) += new_observation; - blume_capel_stats_group(1, variable) -= - (old_observation - baseline_category[variable]) * - (old_observation - baseline_category[variable]); - blume_capel_stats_group(1, variable) += - (new_observation - baseline_category[variable]) * - (new_observation - baseline_category[variable]); + blume_capel_stats_group(0, variable) -= old_value; + blume_capel_stats_group(0, variable) += new_value; + blume_capel_stats_group(1, variable) -= old_value * old_value; + blume_capel_stats_group(1, variable) += new_value * new_value; blume_capel_stats[group] = blume_capel_stats_group; } diff --git a/src/bgm_logp_and_grad.cpp b/src/bgm_logp_and_grad.cpp index a6c21b6..a574b75 100644 --- a/src/bgm_logp_and_grad.cpp +++ b/src/bgm_logp_and_grad.cpp @@ -3,6 +3,7 @@ #include "bgm_logp_and_grad.h" #include "common_helpers.h" #include "explog_switch.h" +#include "variable_helpers.h" @@ -61,6 +62,7 @@ double log_pseudoposterior_main_effects_component ( }; const int num_cats = num_categories(variable); + arma::vec bound = num_cats * residual_matrix.col(variable); // numerical bound vector if (is_ordinal_variable(variable)) { // Prior contribution + sufficient statistic @@ -68,22 +70,12 @@ double log_pseudoposterior_main_effects_component ( log_posterior += value * counts_per_category(category + 1, variable); log_posterior += log_beta_prior (value); - // Vectorized likelihood contribution - // For each person, we compute the unnormalized log-likelihood denominator: - // denom = exp (-bound) + sum_c exp (main_effect_param_c + (c+1) * residual_score - bound) - // Where: - // - residual_score is the summed interaction score excluding the variable itself - // - bound = num_cats * residual_score (for numerical stability) - // - main_effect_param_c is the main_effect parameter for category c (0-based) - arma::vec residual_score = residual_matrix.col (variable); // rest scores for all persons - arma::vec bound = num_cats * residual_score; // numerical bound vector - arma::vec denom = ARMA_MY_EXP (-bound); // initialize with base term + arma::vec residual_score = residual_matrix.col (variable); // rest scores for all persons arma::vec main_effect_param = main_effects.row (variable).cols (0, num_cats - 1).t (); // main_effect parameters - for (int cat = 0; cat < num_cats; cat++) { - arma::vec exponent = main_effect_param(cat) + (cat + 1) * residual_score - bound; // exponent per person - denom += ARMA_MY_EXP (exponent); // accumulate exp terms - } + arma::vec denom = compute_denom_ordinal( + residual_score, main_effect_param, bound + ); // We then compute the total log-likelihood contribution as: // log_posterior -= bound + log (denom), summed over all persons @@ -98,24 +90,14 @@ double log_pseudoposterior_main_effects_component ( log_posterior += value * blume_capel_stats(parameter, variable); log_posterior += log_beta_prior(value); - // Vectorized likelihood contribution - // For each person, we compute the unnormalized log-likelihood denominator: - // denom = sum_c exp (θ_lin * c + θ_quad * (c - ref)^2 + c * residual_score - bound) - // Where: - // - θ_lin, θ_quad are linear and quadratic main_effects - // - ref is the reference category (used for centering) - // - bound = num_cats * residual_score (stabilizes exponentials) arma::vec residual_score = residual_matrix.col(variable); // rest scores for all persons - arma::vec bound = num_cats * residual_score; // numerical bound vector arma::vec denom(num_persons, arma::fill::zeros); // initialize denominator - for (int cat = 0; cat <= num_cats; cat++) { - int centered = cat - ref; // centered category - double quad_term = quadratic_main_effect * centered * centered; // precompute quadratic term - double lin_term = linear_main_effect * cat; // precompute linear term - arma::vec exponent = lin_term + quad_term + cat * residual_score - bound; - denom += ARMA_MY_EXP (exponent); // accumulate over categories - } + denom = compute_denom_blume_capel( + residual_score, linear_main_effect, quadratic_main_effect, ref, + num_cats, bound + ); + // The final log-likelihood contribution is then: // log_posterior -= bound + log (denom), summed over all persons @@ -175,37 +157,33 @@ double log_pseudoposterior_interactions_component ( double log_pseudo_posterior = 2.0 * pairwise_effects(var1, var2) * pairwise_stats(var1, var2); for (int var : {var1, var2}) { - int num_categories_var = num_categories (var); + int num_cats = num_categories (var); // Compute rest score: contribution from other variables - arma::vec residual_scores = observations * pairwise_effects.col (var); - arma::vec bounds = arma::max (residual_scores, arma::zeros (num_observations)) * num_categories_var; + arma::vec residual_score = observations * pairwise_effects.col (var); arma::vec denominator = arma::zeros (num_observations); + arma::vec bound = num_cats * residual_score; // numerical bound vector if (is_ordinal_variable (var)) { - // Ordinal variable: denominator includes exp (-bounds) + arma::vec main_effect_param = main_effects.row (var).cols (0, num_cats - 1).t (); // main_effect parameters - denominator += ARMA_MY_EXP (-bounds); - for (int category = 0; category < num_categories_var; category++) { - arma::vec exponent = main_effects (var, category) + (category + 1) * residual_scores - bounds; - denominator += ARMA_MY_EXP(exponent); - } + denominator += compute_denom_ordinal( + residual_score, main_effect_param, bound + ); } else { - // Binary/categorical variable: quadratic + linear term - const int ref_cat = baseline_category (var); - for (int category = 0; category <= num_categories_var; category++) { - int centered_cat = category - ref_cat; - double lin_term = main_effects (var, 0) * category; - double quad_term = main_effects (var, 1) * centered_cat * centered_cat; - arma::vec exponent = lin_term + quad_term + category * residual_scores - bounds; - denominator += ARMA_MY_EXP (exponent); - } + const int ref = baseline_category (var); + + denominator = compute_denom_blume_capel( + residual_score, main_effects (var, 0), main_effects (var, 1), ref, + num_cats, bound + ); + } // Subtract log partition function and bounds adjustment log_pseudo_posterior -= arma::accu (ARMA_MY_LOG (denominator)); - log_pseudo_posterior -= arma::accu (bounds); + log_pseudo_posterior -= arma::accu (bound); } // Add Cauchy prior terms for included pairwise effects @@ -311,34 +289,26 @@ double log_pseudoposterior ( // Calculate the log denominators for (int variable = 0; variable < num_variables; variable++) { const int num_cats = num_categories(variable); - arma::vec residual_score = residual_matrix.col (variable); // rest scores for all persons - arma::vec bound = num_cats * residual_score; // numerical bound vector - bound = arma::clamp(bound, 0.0, arma::datum::inf); // only positive bounds to prevent overflow + arma::vec residual_score = residual_matrix.col (variable); // rest scores for all persons + arma::vec bound = num_cats * residual_score; // numerical bound vector - arma::vec denom; + arma::vec denom(num_persons, arma::fill::zeros); if (is_ordinal_variable(variable)) { - denom = ARMA_MY_EXP (-bound); // initialize with base term arma::vec main_effect_param = main_effects.row (variable).cols (0, num_cats - 1).t (); // main_effect parameters for variable - for (int cat = 0; cat < num_cats; cat++) { - arma::vec exponent = main_effect_param(cat) + (cat + 1) * residual_score - bound; // exponent per person - denom += ARMA_MY_EXP (exponent); // accumulate exp terms - } + denom += compute_denom_ordinal( + residual_score, main_effect_param, bound + ); } else { + const int ref = baseline_category(variable); const double lin_effect = main_effects(variable, 0); const double quad_effect = main_effects(variable, 1); - const int ref = baseline_category(variable); - denom.zeros(num_persons); - for (int cat = 0; cat <= num_cats; cat++) { - int centered = cat - ref; // centered category - double quad = quad_effect * centered * centered; // precompute quadratic term - double lin = lin_effect * cat; // precompute linear term - arma::vec exponent = lin + quad + cat * residual_score - bound; - denom += ARMA_MY_EXP (exponent); // accumulate over categories - } + //This updates bound + denom = compute_denom_blume_capel( + residual_score, lin_effect, quad_effect, ref, num_cats, bound + ); } - - log_pseudoposterior -= arma::accu (bound + ARMA_MY_LOG (denom)); // total contribution + log_pseudoposterior -= arma::accu (bound + ARMA_MY_LOG (denom)); // total contribution } return log_pseudoposterior; @@ -346,39 +316,7 @@ double log_pseudoposterior ( -/** - * Computes the gradient of the log-pseudoposterior for main and active pairwise parameters. - * - * Gradient components: - * - Observed sufficient statistics (from counts_per_category, blume_capel_stats, pairwise_stats). - * - Minus expected sufficient statistics (computed via probabilities over categories). - * - Plus gradient contributions from priors: - * * Beta priors on main effects. - * * Cauchy priors on active pairwise effects. - * - * Inputs: - * - main_effects: Matrix of main-effect parameters (variables × categories). - * - pairwise_effects: Symmetric matrix of pairwise interaction strengths. - * - inclusion_indicator: Symmetric binary matrix of active pairwise effects. - * - observations: Matrix of categorical observations (persons × variables). - * - num_categories: Number of categories per variable. - * - counts_per_category: Category counts per variable (for ordinal variables). - * - blume_capel_stats: Sufficient statistics for Blume–Capel variables. - * - baseline_category: Reference categories for Blume–Capel variables. - * - is_ordinal_variable: Indicator (1 = ordinal, 0 = Blume–Capel). - * - main_alpha, main_beta: Hyperparameters for Beta priors. - * - pairwise_scale: Scale parameter of the Cauchy prior on interactions. - * - pairwise_stats: Sufficient statistics for pairwise effects. - * - residual_matrix: Matrix of residual scores (persons × variables). - * - * Returns: - * - A vector containing the gradient of the log-pseudoposterior with respect to - * all main and active pairwise parameters, in the same order as - * `vectorize_model_parameters_bgm()`. - */ -arma::vec gradient_log_pseudoposterior( - const arma::mat& main_effects, - const arma::mat& pairwise_effects, +std::pair gradient_observed_active( const arma::imat& inclusion_indicator, const arma::imat& observations, const arma::ivec& num_categories, @@ -386,15 +324,10 @@ arma::vec gradient_log_pseudoposterior( const arma::imat& blume_capel_stats, const arma::ivec& baseline_category, const arma::uvec& is_ordinal_variable, - const double main_alpha, - const double main_beta, - const double pairwise_scale, - const arma::imat& pairwise_stats, - const arma::mat& residual_matrix + const arma::imat& pairwise_stats ) { const int num_variables = observations.n_cols; - const int num_persons = observations.n_rows; - const int num_main = count_num_main_effects(num_categories, is_ordinal_variable); + const int num_main = count_num_main_effects(num_categories, is_ordinal_variable); arma::imat index_matrix(num_variables, num_variables, arma::fill::zeros); // Count active pairwise effects + Index map for pairwise parameters @@ -434,38 +367,86 @@ arma::vec gradient_log_pseudoposterior( } } + return {gradient, index_matrix}; +} + + + +/** + * Computes the gradient of the log-pseudoposterior for main and active pairwise parameters. + * + * Gradient components: + * - Observed sufficient statistics (from counts_per_category, blume_capel_stats, pairwise_stats). + * - Minus expected sufficient statistics (computed via probabilities over categories). + * - Plus gradient contributions from priors: + * * Beta priors on main effects. + * * Cauchy priors on active pairwise effects. + * + * Inputs: + * - main_effects: Matrix of main-effect parameters (variables × categories). + * - pairwise_effects: Symmetric matrix of pairwise interaction strengths. + * - inclusion_indicator: Symmetric binary matrix of active pairwise effects. + * - observations: Matrix of categorical observations (persons × variables). + * - num_categories: Number of categories per variable. + * - counts_per_category: Category counts per variable (for ordinal variables). + * - blume_capel_stats: Sufficient statistics for Blume–Capel variables. + * - baseline_category: Reference categories for Blume–Capel variables. + * - is_ordinal_variable: Indicator (1 = ordinal, 0 = Blume–Capel). + * - main_alpha, main_beta: Hyperparameters for Beta priors. + * - pairwise_scale: Scale parameter of the Cauchy prior on interactions. + * - pairwise_stats: Sufficient statistics for pairwise effects. + * - residual_matrix: Matrix of residual scores (persons × variables). + * + * Returns: + * - A vector containing the gradient of the log-pseudoposterior with respect to + * all main and active pairwise parameters, in the same order as + * `vectorize_model_parameters_bgm()`. + */ +arma::vec gradient_log_pseudoposterior( + const arma::mat& main_effects, + const arma::mat& pairwise_effects, + const arma::imat& inclusion_indicator, + const arma::imat& observations, + const arma::ivec& num_categories, + const arma::ivec& baseline_category, + const arma::uvec& is_ordinal_variable, + const double main_alpha, + const double main_beta, + const double pairwise_scale, + const arma::mat& residual_matrix, + const arma::imat index_matrix, + const arma::vec grad_obs +) { + const int num_variables = observations.n_cols; + const int num_persons = observations.n_rows; + + // Allocate gradient vector (main + active pairwise only) + arma::vec gradient = grad_obs; + // ---- STEP 2: Expected statistics ---- - offset = 0; + int offset = 0; for (int variable = 0; variable < num_variables; variable++) { const int num_cats = num_categories(variable); arma::vec residual_score = residual_matrix.col(variable); arma::vec bound = num_cats * residual_score; - bound = arma::clamp(bound, 0.0, arma::datum::inf); if (is_ordinal_variable(variable)) { arma::vec main_param = main_effects.row(variable).cols(0, num_cats - 1).t(); - bound += main_param.max(); - - arma::mat exponents(num_persons, num_cats); - for (int cat = 0; cat < num_cats; cat++) { - exponents.col(cat) = main_param(cat) + (cat + 1) * residual_score - bound; - } - - arma::mat probs = ARMA_MY_EXP(exponents); - arma::vec denom = arma::sum(probs, 1) + ARMA_MY_EXP(-bound); - probs.each_col() /= denom; + arma::mat probs = compute_probs_ordinal( + main_param, residual_score, bound, num_cats + ); // main effects for (int cat = 0; cat < num_cats; cat++) { - gradient(offset + cat) -= arma::accu(probs.col(cat)); + gradient(offset + cat) -= arma::accu(probs.col(cat + 1)); } // pairwise effects for (int j = 0; j < num_variables; j++) { if (inclusion_indicator(variable, j) == 0 || variable == j) continue; arma::vec expected_value = arma::zeros(num_persons); - for (int cat = 0; cat < num_cats; cat++) { - expected_value += (cat + 1) * probs.col(cat) % observations.col(j); + for (int cat = 1; cat <= num_cats; cat++) { + expected_value += cat * probs.col(cat) % observations.col(j); } int location = (variable < j) ? index_matrix(variable, j) : index_matrix(j, variable); gradient(location) -= arma::accu(expected_value); @@ -476,33 +457,30 @@ arma::vec gradient_log_pseudoposterior( const double lin_eff = main_effects(variable, 0); const double quad_eff = main_effects(variable, 1); - arma::mat exponents(num_persons, num_cats + 1); - for (int cat = 0; cat <= num_cats; cat++) { - int score = cat; - int centered = score - ref; - double lin = lin_eff * score; - double quad = quad_eff * centered * centered; - exponents.col(cat) = lin + quad + score * residual_score - bound; - } - arma::mat probs = ARMA_MY_EXP(exponents); - arma::vec denom = arma::sum(probs, 1); - probs.each_col() /= denom; + arma::mat probs = compute_probs_blume_capel( + residual_score, lin_eff, quad_eff, ref, num_cats, bound + ); - arma::ivec lin_score = arma::regspace(0, num_cats); - arma::ivec quad_score = arma::square(lin_score - ref); + arma::vec score = arma::regspace(0, num_cats) - double(ref); + arma::vec sq_score = arma::square(score); // main effects - gradient(offset) -= arma::accu(probs * lin_score); - gradient(offset + 1) -= arma::accu(probs * quad_score); + gradient(offset) -= arma::accu(probs * score); + gradient(offset + 1) -= arma::accu(probs * sq_score); // pairwise effects for (int j = 0; j < num_variables; j++) { if (inclusion_indicator(variable, j) == 0 || variable == j) continue; arma::vec expected_value = arma::zeros(num_persons); - for (int cat = 0; cat < num_cats; cat++) { - expected_value += (cat + 1) * probs.col(cat + 1) % observations.col(j); + for (int cat = 0; cat <= num_cats; cat++) { + int s = score(cat); + expected_value += s * probs.col(cat) % observations.col(j); } - int location = (variable < j) ? index_matrix(variable, j) : index_matrix(j, variable); + + int location = (variable < j) + ? index_matrix(variable, j) + : index_matrix(j, variable); + gradient(location) -= arma::accu(expected_value); } offset += 2; @@ -589,48 +567,43 @@ double compute_log_likelihood_ratio_for_variable ( arma::vec interaction = arma::conv_to::from (interacting_score); const int num_persons = residual_matrix.n_rows; - const int num_categories_var = num_categories (variable); + const int num_cats = num_categories (variable); // Compute adjusted linear predictors without the current interaction - arma::vec residual_scores = residual_matrix.col (variable) - interaction * current_state; - - // Stability bound for softmax (scaled by number of categories) - arma::vec bounds = arma::max (residual_scores, arma::zeros (num_persons)) * num_categories_var; + arma::vec residual_score = residual_matrix.col (variable) - interaction * current_state; + arma::vec bounds = residual_score * num_cats; arma::vec denom_current = arma::zeros (num_persons); arma::vec denom_proposed = arma::zeros (num_persons); if (is_ordinal_variable (variable)) { - denom_current += ARMA_MY_EXP(-bounds); - denom_proposed += ARMA_MY_EXP(-bounds); - - for (int category = 0; category < num_categories_var; category++) { - const double main = main_effects(variable, category); - const int score = category + 1; + arma::vec main_param = main_effects.row(variable).cols(0, num_cats - 1).t(); - for (int person = 0; person < num_persons; person++) { - const double base = main + score * residual_scores[person] - bounds[person]; + // ---- main change: use safe helper ---- + denom_current += compute_denom_ordinal( + residual_score + interaction * current_state, main_param, bounds + ); + denom_proposed += compute_denom_ordinal( + residual_score + interaction * proposed_state, main_param, bounds + ); - const double exp_current = MY_EXP(base + score * interaction[person] * current_state); - const double exp_proposed = MY_EXP(base + score * interaction[person] * proposed_state); - - denom_current[person] += exp_current; - denom_proposed[person] += exp_proposed; - } - } } else { // Binary or categorical variable: linear + quadratic score const int ref_cat = baseline_category (variable); - for (int category = 0; category <= num_categories_var; category++) { - int centered = category - ref_cat; - double lin_term = main_effects (variable, 0) * category; - double quad_term = main_effects (variable, 1) * centered * centered; - arma::vec exponent = lin_term + quad_term + category * residual_scores - bounds; + denom_current = compute_denom_blume_capel( + residual_score + interaction * current_state, main_effects (variable, 0), + main_effects (variable, 1), ref_cat, num_cats, bounds + ); + double log_ratio = arma::accu(ARMA_MY_LOG (denom_current) + bounds); - denom_current += ARMA_MY_EXP (exponent + category * interaction * current_state); - denom_proposed += ARMA_MY_EXP (exponent + category * interaction * proposed_state); - } + denom_proposed = compute_denom_blume_capel( + residual_score + interaction * proposed_state, main_effects (variable, 0), + main_effects (variable, 1), ref_cat, num_cats, bounds + ); + log_ratio -= arma::accu(ARMA_MY_LOG (denom_proposed) + bounds); + + return log_ratio; } // Accumulated log-likelihood difference across persons diff --git a/src/bgm_logp_and_grad.h b/src/bgm_logp_and_grad.h index 6e72812..2affd1a 100644 --- a/src/bgm_logp_and_grad.h +++ b/src/bgm_logp_and_grad.h @@ -51,21 +51,31 @@ double log_pseudoposterior ( const arma::mat& residual_matrix ); +std::pair gradient_observed_active( + const arma::imat& inclusion_indicator, + const arma::imat& observations, + const arma::ivec& num_categories, + const arma::imat& counts_per_category, + const arma::imat& blume_capel_stats, + const arma::ivec& baseline_category, + const arma::uvec& is_ordinal_variable, + const arma::imat& pairwise_stats +); + arma::vec gradient_log_pseudoposterior( const arma::mat& main_effects, const arma::mat& pairwise_effects, const arma::imat& inclusion_indicator, const arma::imat& observations, const arma::ivec& num_categories, - const arma::imat& counts_per_category, - const arma::imat& blume_capel_stats, const arma::ivec& baseline_category, const arma::uvec& is_ordinal_variable, const double main_alpha, const double main_beta, const double pairwise_scale, - const arma::imat& pairwise_stats, - const arma::mat& residual_matrix + const arma::mat& residual_matrix, + const arma::imat index_matrix, + const arma::vec grad_obs ); // Pseudolikelihood ratio for a single variable diff --git a/src/bgm_sampler.cpp b/src/bgm_sampler.cpp index b1397f2..683bbf3 100644 --- a/src/bgm_sampler.cpp +++ b/src/bgm_sampler.cpp @@ -100,18 +100,19 @@ void impute_missing_bgm ( // Compute probabilities for Blume-Capel variable const int ref = baseline_category (variable); - cumsum = MY_EXP (main_effects (variable, 1) * ref * ref); + cumsum = MY_EXP ( + main_effects (variable, 0) * ref + main_effects (variable, 1) * ref * ref + ); category_probabilities[0] = cumsum; - for (int cat = 0; cat < num_cats; cat++) { - const int score = cat + 1; - const int centered = score - ref; + for (int cat = 0; cat <= num_cats; cat++) { + const int score = cat - ref; const double exponent = main_effects (variable, 0) * score + - main_effects (variable, 1) * centered * centered + + main_effects (variable, 1) * score * score + score * residual_score; cumsum += MY_EXP (exponent); - category_probabilities[score] = cumsum; + category_probabilities[cat] = cumsum; } } @@ -122,7 +123,9 @@ void impute_missing_bgm ( sampled_score++; } - const int new_value = sampled_score; + int new_value = sampled_score; + if(!is_ordinal) + new_value -= baseline_category (variable); const int old_value = observations(person, variable); if (new_value != old_value) { @@ -133,11 +136,8 @@ void impute_missing_bgm ( counts_per_category(old_value, variable)--; counts_per_category(new_value, variable)++; } else { - const int ref = baseline_category(variable); const int delta = new_value - old_value; - const int delta_sq = - (new_value - ref) * (new_value - ref) - - (old_value - ref) * (old_value - ref); + const int delta_sq = new_value * new_value - old_value * old_value; blume_capel_stats(0, variable) += delta; blume_capel_stats(1, variable) += delta_sq; @@ -212,27 +212,35 @@ double find_initial_stepsize_bgm( num_categories, is_ordinal_variable ); + arma::vec grad_obs; + arma::imat index_matrix; + + std::tie(grad_obs, index_matrix) = gradient_observed_active( + inclusion_indicator, observations, num_categories, counts_per_category, + blume_capel_stats, baseline_category, is_ordinal_variable, pairwise_stats + ); + arma::mat current_main = main_effects; arma::mat current_pair = pairwise_effects; - auto log_post = [&](const arma::vec& theta_vec) { - unvectorize_model_parameters_bgm(theta_vec, current_main, current_pair, - inclusion_indicator, - num_categories, is_ordinal_variable); + auto grad = [&](const arma::vec& theta_vec) { + unvectorize_model_parameters_bgm(theta_vec, current_main, current_pair, inclusion_indicator, + num_categories, is_ordinal_variable); arma::mat rm = observations * current_pair; - return log_pseudoposterior( - current_main, current_pair, inclusion_indicator, observations, - num_categories, counts_per_category, blume_capel_stats, - baseline_category, is_ordinal_variable, main_alpha, main_beta, - pairwise_scale, pairwise_stats, rm + + return gradient_log_pseudoposterior ( + current_main, current_pair, inclusion_indicator, observations, + num_categories, baseline_category, is_ordinal_variable, main_alpha, + main_beta, pairwise_scale, rm, index_matrix, grad_obs ); }; - auto grad = [&](const arma::vec& theta_vec) { - unvectorize_model_parameters_bgm(theta_vec, current_main, current_pair, inclusion_indicator, - num_categories, is_ordinal_variable); + auto log_post = [&](const arma::vec& theta_vec) { + unvectorize_model_parameters_bgm(theta_vec, current_main, current_pair, + inclusion_indicator, + num_categories, is_ordinal_variable); arma::mat rm = observations * current_pair; - return gradient_log_pseudoposterior( + return log_pseudoposterior( current_main, current_pair, inclusion_indicator, observations, num_categories, counts_per_category, blume_capel_stats, baseline_category, is_ordinal_variable, main_alpha, main_beta, @@ -521,6 +529,14 @@ void update_hmc_bgm( num_categories, is_ordinal_variable ); + arma::vec grad_obs; + arma::imat index_matrix; + + std::tie(grad_obs, index_matrix) = gradient_observed_active( + inclusion_indicator, observations, num_categories, counts_per_category, + blume_capel_stats, baseline_category, is_ordinal_variable, pairwise_stats + ); + arma::mat current_main = main_effects; arma::mat current_pair = pairwise_effects; @@ -531,9 +547,8 @@ void update_hmc_bgm( return gradient_log_pseudoposterior ( current_main, current_pair, inclusion_indicator, observations, - num_categories, counts_per_category, blume_capel_stats, - baseline_category, is_ordinal_variable, main_alpha, - main_beta, pairwise_scale, pairwise_stats, rm + num_categories, baseline_category, is_ordinal_variable, main_alpha, + main_beta, pairwise_scale, rm, index_matrix, grad_obs ); }; @@ -641,20 +656,26 @@ SamplerResult update_nuts_bgm( num_categories, is_ordinal_variable ); + arma::vec grad_obs; + arma::imat index_matrix; + + std::tie(grad_obs, index_matrix) = gradient_observed_active( + inclusion_indicator, observations, num_categories, counts_per_category, + blume_capel_stats, baseline_category, is_ordinal_variable, pairwise_stats + ); + arma::mat current_main = main_effects; arma::mat current_pair = pairwise_effects; auto grad = [&](const arma::vec& theta_vec) { - unvectorize_model_parameters_bgm(theta_vec, current_main, current_pair, - inclusion_indicator, num_categories, - is_ordinal_variable); + unvectorize_model_parameters_bgm(theta_vec, current_main, current_pair, inclusion_indicator, + num_categories, is_ordinal_variable); arma::mat rm = observations * current_pair; - return gradient_log_pseudoposterior( - current_main, current_pair, inclusion_indicator, observations, - num_categories, counts_per_category, blume_capel_stats, - baseline_category, is_ordinal_variable, main_alpha, - main_beta, pairwise_scale, pairwise_stats, rm + return gradient_log_pseudoposterior ( + current_main, current_pair, inclusion_indicator, observations, + num_categories, baseline_category, is_ordinal_variable, main_alpha, + main_beta, pairwise_scale, rm, index_matrix, grad_obs ); }; diff --git a/src/data_simulation.cpp b/src/data_simulation.cpp index 2e50c82..55639a2 100644 --- a/src/data_simulation.cpp +++ b/src/data_simulation.cpp @@ -80,7 +80,7 @@ IntegerMatrix sample_bcomrf_gibbs(int no_states, NumericMatrix interactions, NumericMatrix thresholds, StringVector variable_type, - IntegerVector reference_category, + IntegerVector baseline_category, int iter) { IntegerMatrix observations(no_states, no_variables); @@ -118,21 +118,26 @@ IntegerMatrix sample_bcomrf_gibbs(int no_states, for(int person = 0; person < no_states; person++) { rest_score = 0.0; for(int vertex = 0; vertex < no_variables; vertex++) { - rest_score += observations(person, vertex) * - interactions(vertex, variable); + if(variable_type[vertex] != "blume-capel") { + rest_score += observations(person, vertex) * interactions(vertex, variable); + } else { + int ref = baseline_category[vertex]; + int obs = observations(person, vertex); + rest_score += (obs - ref) * interactions(vertex, variable); + } } if(variable_type[variable] == "blume-capel") { cumsum = 0.0; + int ref = baseline_category[variable]; for(int category = 0; category < no_categories[variable] + 1; category++) { + const int score = category - ref; //The linear term of the Blume-Capel variable - exponent = thresholds(variable, 0) * category; + exponent = thresholds(variable, 0) * score; //The quadratic term of the Blume-Capel variable - exponent += thresholds(variable, 1) * - (category - reference_category[variable]) * - (category - reference_category[variable]); + exponent += thresholds(variable, 1) * score * score; //The pairwise interactions - exponent += category * rest_score; + exponent += rest_score * score; cumsum += MY_EXP(exponent); probabilities[category] = cumsum; } diff --git a/src/mcmc_leapfrog.cpp b/src/mcmc_leapfrog.cpp index 6d5fb4d..49c7d10 100644 --- a/src/mcmc_leapfrog.cpp +++ b/src/mcmc_leapfrog.cpp @@ -4,6 +4,7 @@ #include "mcmc_memoization.h" + /** * Function: leapfrog * diff --git a/src/mcmc_utils.cpp b/src/mcmc_utils.cpp index e210496..4c2328c 100644 --- a/src/mcmc_utils.cpp +++ b/src/mcmc_utils.cpp @@ -6,6 +6,7 @@ #include "rng_utils.h" + /** * Function: kinetic_energy * diff --git a/src/print_mutex.h b/src/print_mutex.h index bde2729..12f9f21 100644 --- a/src/print_mutex.h +++ b/src/print_mutex.h @@ -9,3 +9,13 @@ inline tbb::mutex& get_print_mutex() { } #endif // PRINT_MUTEX_H + +// Add this header to the parallel code you wish to print from +// + the below code to print in parallel code: +// +// { +// tbb::mutex::scoped_lock lock(get_print_mutex()); +// std::cout +// << "print " +// << std::endl; +// } \ No newline at end of file diff --git a/src/variable_helpers.h b/src/variable_helpers.h new file mode 100644 index 0000000..79becda --- /dev/null +++ b/src/variable_helpers.h @@ -0,0 +1,395 @@ +#include +#include "explog_switch.h" + + + +// ----------------------------------------------------------------------------- +// Compute a numerically stable sum of the form: +// +// denom = exp(-bound) + sum_{cat=0}^{K-1} exp(main_effect_param(cat) +// + (cat + 1) * residual_score - bound) +// +// but evaluated efficiently using precomputed exponentials: +// +// exp_r = exp(residual_score) +// exp_m = exp(main_effect_param) +// denom = exp(-bound) * ( 1 + sum_c exp_m[c] * exp_r^(c+1) ) +// +// If non-finite values arise (overflow, underflow, NaN), a safe fallback +// recomputes the naive version using direct exponentials. +// ---------------------------------------------------------------------------- +inline arma::vec compute_denom_ordinal(const arma::vec& residual, + const arma::vec& main_eff, + const arma::vec& bound) +{ + constexpr double EXP_BOUND = 709.0; + const int K = static_cast(main_eff.n_elem); + + // --- Binary shortcut (K == 1) --------------------------------------------- + if (K == 1) { + return ARMA_MY_EXP(-bound) + ARMA_MY_EXP(main_eff[0] + residual - bound); + } + + const arma::uword N = bound.n_elem; + arma::vec denom(N, arma::fill::none); + const arma::vec eM = ARMA_MY_EXP(main_eff); + + // Fast block: uses eB inside the loop (avoids intermediate overflow) + auto do_fast_block = [&](arma::uword i0, arma::uword i1) { + arma::vec r = residual.rows(i0, i1); + arma::vec b = bound.rows(i0, i1); + arma::vec eR = ARMA_MY_EXP(r); + arma::vec eB = ARMA_MY_EXP(-b); + arma::vec pow = eR; + + arma::vec d = eB; + for (int c = 0; c < K; ++c) { + d += eM[c] * pow % eB; + pow %= eR; + } + denom.rows(i0, i1) = d; + }; + + // Safe block: stabilized exponent; NO clamp here by design + auto do_safe_block = [&](arma::uword i0, arma::uword i1) { + arma::vec r = residual.rows(i0, i1); + arma::vec b = bound.rows(i0, i1); + + arma::vec d = ARMA_MY_EXP(-b); + for (int c = 0; c < K; ++c) { + arma::vec ex = main_eff[c] + (c + 1) * r - b; + d += ARMA_MY_EXP(ex); + } + denom.rows(i0, i1) = d; + }; + + // Single linear scan over contiguous runs + const double* bp = bound.memptr(); + arma::uword i = 0; + while (i < N) { + const bool fast = !(bp[i] < -EXP_BOUND || bp[i] > EXP_BOUND); + arma::uword j = i + 1; + while (j < N) { + const bool fast_j = !(bp[j] < -EXP_BOUND || bp[j] > EXP_BOUND); + if (fast_j != fast) break; + ++j; + } + if (fast) do_fast_block(i, j - 1); + else do_safe_block(i, j - 1); + i = j; + } + + return denom; +} + +// ----------------------------------------------------------------------------- +// Compute denom = Σ_c exp( θ(c) + c*r - b ), with +// θ(c) = lin_eff*(c-ref) + quad_eff*(c-ref)^2 +// b = max_c( θ(c) + c*r ) (vectorized) +// +// Two modes: +// +// FAST (preexp + power-chain): +// denom = Σ_c exp_theta[c] * exp(-b) * exp(r)^c +// Used only when all exponent terms are safe: +// |b| ≤ EXP_BOUND, +// underflow_bound ≥ -EXP_BOUND, +// num_cats*r - b ≤ EXP_BOUND. +// This guarantees the recursive pow-chain stays finite. +// +// SAFE (direct evaluation): +// denom = Σ_c exp(θ(c) + c*r - b) +// Used whenever any FAST-condition fails. Slower but always stable. +// +// FAST gives identical results when safe, otherwise SAFE is used. +// ----------------------------------------------------------------------------- +inline arma::vec compute_denom_blume_capel( + const arma::vec& residual, + const double lin_eff, + const double quad_eff, + const int ref, + const int num_cats, + arma::vec& b // update in place: per-person bound b[i] +) { + + constexpr double EXP_BOUND = 709.0; + const arma::uword N = residual.n_elem; + arma::vec denom(N); + + // ---- 1. Precompute theta_part[cat] and exp(theta_part) ---- + arma::vec cat = arma::regspace(0, num_cats); + arma::vec centered = cat - double(ref); + arma::vec theta = lin_eff * centered + quad_eff * arma::square(centered); + arma::vec exp_theta = ARMA_MY_EXP(theta); + + // ---- 2. Numerical bounds [b] ---- + b.set_size(N); + b.fill(theta[0]); + for (int c = 1; c <= num_cats; c++) + b = arma::max(b, theta[c] + double(c) * residual); + + // ---- 3. Bounds for the FAST power chain: c*r - b ---- + // For fixed i, c*r[i] - b[i] ranges between -b[i] and num_cats*r[i] - b[i]. + // We need max_c (c*r[i] - b[i]) <= EXP_BOUND to avoid overflow in pow. + arma::vec pow_bound = double(num_cats) * residual - b; + + // ---- 4. FAST BLOCK: Preexp + bounded power chain ---- + auto do_fast_block = [&](arma::uword i0, arma::uword i1) { + arma::vec r = residual.rows(i0, i1); + arma::vec bb = b.rows(i0, i1); + + arma::vec eR = ARMA_MY_EXP(r); // exp(r) + arma::vec pow = ARMA_MY_EXP(-bb); // start at cat=0 term: exp(0*r - b) + arma::vec d = exp_theta[0] * pow; + + for (int c = 1; c <= num_cats; c++) { + pow %= eR; // exp(c*r - b) + d += exp_theta[c] * pow; + } + denom.rows(i0, i1) = d; + }; + + // ---- 5. SAFE BLOCK: direct exp(theta[c] + c*r - b) ---- + auto do_safe_block = [&](arma::uword i0, arma::uword i1) { + arma::vec r = residual.rows(i0, i1); + arma::vec bb = b.rows(i0, i1); + + arma::vec d(bb.n_elem, arma::fill::zeros); + for (int c = 0; c <= num_cats; c++) { + arma::vec ex = theta[c] + double(c) * r - bb; + d += ARMA_MY_EXP(ex); + } + + + + denom.rows(i0, i1) = d; + }; + + // ---- 6. BLOCK SCAN: decide FAST vs SAFE per contiguous run ---- + const double* bp = b.memptr(); + const double* pp = pow_bound.memptr(); + + arma::uword i = 0; + while (i < N) { + const bool fast_i = (std::abs(bp[i]) <= EXP_BOUND) && (std::abs(pp[i]) <= EXP_BOUND); + + arma::uword j = i + 1; + while (j < N) { + const bool fast_j = (std::abs(bp[j]) <= EXP_BOUND) && (std::abs(pp[j]) <= EXP_BOUND); + if (fast_j != fast_i) break; + ++j; + } + + if (fast_i) do_fast_block(i, j - 1); + else do_safe_block(i, j - 1); + + i = j; + } + + return denom; +} + + + +/** + * Compute category probabilities in a numerically stable manner. + * + * Uses pre-exp or bounded formulations depending on the magnitude of `bound`. + * - If |bound| < 700: uses cheaper direct pre-exp computation + * - Else: clips bound at zero and applies stabilized scaling + * + * Empirical tests (see R/compare_prob_ratios.R) showed: + * - Clipping necessary for bound < -700 + * - Bounds improve stability when large + * + * Returns: + * probs: num_persons × (num_cats + 1) matrix of probabilities (row-normalized) + */ +inline arma::mat compute_probs_ordinal(const arma::vec& main_param, + const arma::vec& residual_score, + const arma::vec& bound, + int num_cats) +{ + constexpr double EXP_BOUND = 709.0; + const arma::uword N = bound.n_elem; + + if (num_cats == 1) { + arma::vec b = arma::clamp(bound, 0.0, arma::datum::inf); + arma::vec ex = main_param(0) + residual_score - b; + arma::vec t = ARMA_MY_EXP(ex); + arma::vec den = ARMA_MY_EXP(-b) + t; + arma::mat probs(N, 2, arma::fill::none); + probs.col(1) = t / den; + probs.col(0) = 1.0 - probs.col(1); + return probs; + } + + arma::mat probs(N, num_cats + 1, arma::fill::none); + const arma::vec eM = ARMA_MY_EXP(main_param); + + auto do_fast_block = [&](arma::uword i0, arma::uword i1) { + auto P = probs.rows(i0, i1).cols(1, num_cats); + arma::vec r = residual_score.rows(i0, i1); + arma::vec eR = ARMA_MY_EXP(r); + arma::vec pow = eR; + arma::vec den(P.n_rows, arma::fill::ones); + for (int c = 0; c < num_cats; c++) { + arma::vec term = eM[c] * pow; + P.col(c) = term; + den += term; + pow %= eR; + } + P.each_col() /= den; + }; + + auto do_safe_block = [&](arma::uword i0, arma::uword i1) { + auto P = probs.rows(i0, i1).cols(1, num_cats); + arma::vec r = residual_score.rows(i0, i1); + arma::vec b = arma::clamp(bound.rows(i0, i1), 0.0, arma::datum::inf); + arma::vec den = ARMA_MY_EXP(-b); + for (int c = 0; c < num_cats; c++) { + arma::vec ex = main_param(c) + (c + 1) * r - b; + arma::vec t = ARMA_MY_EXP(ex); + P.col(c) = t; + den += t; + } + P.each_col() /= den; + }; + + // Single linear scan; no std::abs + const double* bp = bound.memptr(); + arma::uword i = 0; + while (i < N) { + const bool fast = !(bp[i] < -EXP_BOUND || bp[i] > EXP_BOUND); + arma::uword j = i + 1; + while (j < N) { + const bool fast_j = !(bp[j] < -EXP_BOUND || bp[j] > EXP_BOUND); + if (fast_j != fast) break; + j++; + } + if (fast) do_fast_block(i, j - 1); + else do_safe_block(i, j - 1); + i = j; + } + + probs.col(0) = 1.0 - arma::sum(probs.cols(1, num_cats), 1); + return probs; +} + + + +// ----------------------------------------------------------------------------- +// Blume–Capel probabilities, numerically stable via FAST/SAFE split. +// +// Model: +// θ(c) = lin_eff * (c - ref) + quad_eff * (c - ref)^2, c = 0..num_cats +// exps_i(c) = θ(c) + c * r_i +// b_i = max_c exps_i(c) +// +// Probabilities: +// p_i(c) ∝ exp( exps_i(c) - b_i ) +// +// FAST (preexp + power-chain, same bounds as compute_denom_blume_capel): +// used when |b_i| ≤ EXP_BOUND and pow_bound_i = num_cats * r_i - b_i ≤ EXP_BOUND +// +// SAFE (direct): +// used otherwise: direct exp(θ(c) + c * r_i - b_i) +// +// Under these conditions, denom is finite and > 0, so no one-hot fallback. +// ----------------------------------------------------------------------------- +inline arma::mat compute_probs_blume_capel(const arma::vec& residual, + const double lin_eff, + const double quad_eff, + const int ref, + const int num_cats, + arma::vec& b) // updated in place +{ + constexpr double EXP_BOUND = 709.0; + + const arma::uword N = residual.n_elem; + arma::mat probs(N, num_cats + 1, arma::fill::none); + + // 1. Precompute θ(c) and exp(θ(c)) + arma::vec cat = arma::regspace(0, num_cats); + arma::vec centered = cat - double(ref); + arma::vec theta = lin_eff * centered + quad_eff * arma::square(centered); + arma::vec exp_theta = ARMA_MY_EXP(theta); + + // 2. Compute bounds b[i] = max_c (θ(c) + c * r_i) + b.set_size(N); + b.fill(theta[0]); + for (int c = 1; c <= num_cats; ++c) { + b = arma::max(b, theta[c] + double(c) * residual); + } + + // 3. Bound for the power chain: max_c (c * r_i - b_i) = num_cats * r_i - b_i + arma::vec pow_bound = double(num_cats) * residual - b; + + // FAST block: preexp + bounded power chain + auto do_fast_block = [&](arma::uword i0, arma::uword i1) { + auto P = probs.rows(i0, i1); + arma::vec r = residual.rows(i0, i1); + arma::vec bb = b.rows(i0, i1); + const arma::uword B = bb.n_elem; + + arma::vec eR = ARMA_MY_EXP(r); // exp(r_i) + arma::vec pow = ARMA_MY_EXP(-bb); // exp(0 * r_i - b_i) + arma::vec denom(B, arma::fill::zeros); + + // c = 0 + arma::vec col0 = exp_theta[0] * pow; + P.col(0) = col0; + denom += col0; + + // c = 1..num_cats + for (int c = 1; c <= num_cats; ++c) { + pow %= eR; // exp(c * r_i - b_i) + arma::vec col = exp_theta[c] * pow; + P.col(c) = col; + denom += col; + } + + P.each_col() /= denom; + }; + + // SAFE block: direct exp(θ(c) + c * r_i - b_i) + auto do_safe_block = [&](arma::uword i0, arma::uword i1) { + auto P = probs.rows(i0, i1); + arma::vec r = residual.rows(i0, i1); + arma::vec bb = b.rows(i0, i1); + const arma::uword B = bb.n_elem; + arma::vec denom(B, arma::fill::zeros); + + for (int c = 0; c <= num_cats; ++c) { + arma::vec ex = theta[c] + double(c) * r - bb; + arma::vec col = ARMA_MY_EXP(ex); + P.col(c) = col; + denom += col; + } + P.each_col() /= denom; + }; + + // 4. Single linear scan over contiguous FAST/SAFE runs (same as denom) + const double* bp = b.memptr(); + const double* pp = pow_bound.memptr(); + arma::uword i = 0; + while (i < N) { + const bool fast_i = + (std::abs(bp[i]) <= EXP_BOUND) && (std::abs(pp[i]) <= EXP_BOUND); + + arma::uword j = i + 1; + while (j < N) { + const bool fast_j = + (std::abs(bp[j]) <= EXP_BOUND) && (std::abs(pp[j]) <= EXP_BOUND); + if (fast_j != fast_i) break; + j++; + } + + if (fast_i) do_fast_block(i, j - 1); + else do_safe_block(i, j - 1); + + i = j; + } + + return probs; +} \ No newline at end of file