diff --git a/.gitignore b/.gitignore index c99070b..8149067 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,5 @@ .RHistory escape.Rproj .Rproj* -.RData \ No newline at end of file +.RData +**/.tmp.driveupload \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index ca6626a..4533bed 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: escape Title: Easy single cell analysis platform for enrichment -Version: 2.3.1 +Version: 2.5.0 Authors@R: c( person(given = "Nick", family = "Borcherding", role = c("aut", "cre"), email = "ncborch@gmail.com"), person(given = "Jared", family = "Andrews", role = c("aut"), email = "jared.andrews07@gmail.com"), @@ -13,42 +13,43 @@ LazyData: false RoxygenNote: 7.3.2 biocViews: Software, SingleCell, Classification, Annotation, GeneSetEnrichment, Sequencing, GeneSignaling, Pathways Depends: R (>= 4.1) -Imports: +Imports: + ggdist, + ggplot2 (>= 3.5.0), + grDevices, + Matrix, + MatrixGenerics, + methods, + stats, + SummarizedExperiment, + utils +Suggests: AUCell, BiocParallel, - grDevices, + BiocStyle, dplyr, - ggdist, - ggplot2, - ggpointdensity, + fgsea, GSEABase, - GSVA, - SingleCellExperiment, + ggraph, ggridges, - msigdb, - stats, - reshape2, - patchwork, - MatrixGenerics, - utils, - SummarizedExperiment, - UCell, - stringr, - methods, - SeuratObject, - Matrix -Suggests: - Seurat, + ggpointdensity, + GSVA, hexbin, - scran, + igraph, + irlba, knitr, + msigdb, + patchwork, rmarkdown, - markdown, - BiocStyle, - RColorBrewer, rlang, + scran, + SeuratObject, + Seurat, + SingleCellExperiment, spelling, + stringr, testthat (>= 3.0.0), - vdiffr + UCell VignetteBuilder: knitr Language: en-US +BugReports: https://github.com/BorchLab/escape/issues diff --git a/NAMESPACE b/NAMESPACE index d59ea16..abc7840 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,9 +1,12 @@ # Generated by roxygen2: do not edit by hand export(densityEnrichment) +export(enrichIt) +export(enrichItPlot) export(escape.matrix) export(getGeneSets) export(geyserEnrichment) +export(gseaEnrichment) export(heatmapEnrichment) export(pcaEnrichment) export(performNormalization) @@ -13,58 +16,17 @@ export(runEscape) export(scatterEnrichment) export(splitEnrichment) import(ggplot2) -import(patchwork) -importFrom(AUCell,AUCell_buildRankings) -importFrom(AUCell,AUCell_calcAUC) -importFrom(BiocParallel,BatchtoolsParam) -importFrom(BiocParallel,MulticoreParam) -importFrom(BiocParallel,SerialParam) -importFrom(GSEABase,GeneSet) -importFrom(GSEABase,GeneSetCollection) -importFrom(GSEABase,geneIds) -importFrom(GSVA,gsva) -importFrom(GSVA,gsvaParam) -importFrom(GSVA,ssgseaParam) -importFrom(Matrix,colSums) -importFrom(Matrix,t) +importFrom(MatrixGenerics,rowMeans2) importFrom(MatrixGenerics,rowSds) -importFrom(MatrixGenerics,rowSums2) -importFrom(SeuratObject,Assays) -importFrom(SeuratObject,CreateAssay5Object) -importFrom(SeuratObject,CreateAssayObject) -importFrom(SeuratObject,CreateDimReducObject) -importFrom(SeuratObject,Idents) -importFrom(SingleCellExperiment,"altExp<-") -importFrom(SingleCellExperiment,"reducedDim<-") -importFrom(SingleCellExperiment,altExp) -importFrom(SingleCellExperiment,altExps) -importFrom(SingleCellExperiment,colData) -importFrom(SingleCellExperiment,reducedDim) -importFrom(SummarizedExperiment,"assays<-") -importFrom(SummarizedExperiment,SummarizedExperiment) -importFrom(SummarizedExperiment,assay) -importFrom(SummarizedExperiment,assays) -importFrom(UCell,ScoreSignatures_UCell) -importFrom(dplyr,"%>%") -importFrom(dplyr,across) -importFrom(dplyr,group_by) -importFrom(dplyr,slice_max) -importFrom(dplyr,summarise) -importFrom(dplyr,summarise_at) +importFrom(SummarizedExperiment,colData) importFrom(ggdist,stat_pointinterval) -importFrom(ggpointdensity,geom_pointdensity) -importFrom(ggridges,geom_density_ridges) -importFrom(ggridges,geom_density_ridges2) -importFrom(ggridges,geom_density_ridges_gradient) -importFrom(ggridges,position_points_jitter) -importFrom(grDevices,hcl.colors) -importFrom(methods,slot) -importFrom(msigdb,appendKEGG) -importFrom(msigdb,getMsigdb) -importFrom(reshape2,melt) +importFrom(grDevices,hcl.pals) +importFrom(stats,aggregate) +importFrom(stats,as.formula) importFrom(stats,dist) importFrom(stats,hclust) -importFrom(stats,prcomp) -importFrom(stringr,str_replace_all) -importFrom(stringr,str_sort) -importFrom(utils,getFromNamespace) +importFrom(stats,median) +importFrom(stats,na.omit) +importFrom(stats,p.adjust) +importFrom(stats,setNames) +importFrom(utils,head) diff --git a/NEWS.md b/NEWS.md index 69d8ab3..ad5e618 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,78 +1,98 @@ -# escape VERSION 2.4.1 - -Version bump to be consistent with Bioconductor release - -## UNDERLYING CHANGES - -* Automatically remove gene sets with 0 in ```escape.matrix()``` - -# escape VERSION 2.2.4 - -## UNDERLYING CHANGES - -* moved dependency from msigdbr to msigdb -* ```getGeneSets()``` now locally caches the gene sets to improve speed of repeated use -* ```getGeneSets()``` now only supports Mouse or Human - -# escape VERSION 2.2.3 - -## UNDERLYING CHANGES - -* fixed handling of *groups* parameter and data splitting in ```escape.matrix()``` -* improved efficiency of internal ```.split_data.matrix()``` - -# escape VERSION 2.2.2 - -## UNDERLYING CHANGES - -* fix ```performNormalization()``` conditional statements -* fix ```performNormalization()``` rescaling for per gene set calculations - -# escape VERSION 2.2.1 - -#VERSION BUMP FOR BIOCONDUCTOR - -# escape VERSION 2.1.5 (2024-10-23) - -* update handling of v5 Seurat versus 250 expectations) now ships with the package. + +## New & enhanced functionality +| Area | Function(s) | What changed | +|------|-------------|--------------| +| **Visualisation** | `ridgeEnrichment()` | *True gradient* coloring mode for numeric `color.by`; optional per-cell rugs; quantile median line; fixed grey-fill bug | +| | `densityEnrichment()` | accepts new `rug.height`; ~4× faster ranking routine using `MatrixGenerics::rowMeans2`; cleaner two-panel layout via **patchwork** | +| | `gseaEnrichment()` | new `rug.height`; clearer legend showing ES/NES/ *p*; internal vectorised ES calculation | +| | `splitEnrichment()` | rewritten: split violins when `split.by` has 2 levels, dodged violins otherwise; inline boxplots; auto Z-scaling; palette helper | +| | `scatterEnrichment()` | density-aware points (via **ggpointdensity**), hex-bin alternative, optional Pearson/Spearman overlay, continuous or discrete color mapping | +| **Dimensionality reduction** | `performPCA()` / `pcaEnrichment()` | uses `irlba::prcomp_irlba()` automatically for large matrices; stores eigen-values/contribution in `misc`; `add.percent.contribution` now always respected | +| **Scoring backend** | `escape.matrix()` / `.compute_enrichment()` | lazy loading of heavy back-ends (*GSVA*, *UCell*, *AUCell*); unified `.build_gsva_param()`; drops empty gene-sets up-front | +| **Normalization** | `performNormalization()` | chunk-wise expressed-gene scaling (memory-friendly); accepts external `scale.factor`; optional signed log-transform; returns object with assay `_normalized` | +| **Gene-set retrieval** | `getGeneSets()` | downloads now cached under `tools::R_user_dir("escape", "cache")`; graceful KEGG append; clearer error for non-human/mouse requests | + +## Performance & dependency reductions +* Replaced *plyr*, *stringr*, *rlang* usage with base-R helpers; these packages +are now **Suggests** only. +* Common color and label utilities (`.colorizer()`, `.colorby()`, `.orderFunction()`) +removed redundant tidyverse imports. +* Internal matrices split/chunked with new `.split_*` helpers to cap memory +during parallel scoring/normalization. + +## Bug fixes +* Gradient mode in `ridgeEnrichment()` no longer produces grey fills when the +chosen gene-set is mapped to `color.by`. +* `pcaEnrichment()` axis labels correctly include variance contribution +when `display.factors = FALSE`. +* `.grabDimRed()` handles both Seurat v5 and + .filterFeatures() - lapply(seq_len(length(groups)), function(x) { - tmp <- cnts.filter[,which(grouping == groups[x])] - density <- suppressWarnings(compute.gene.cdf(tmp, seq_len(ncol(tmp)), TRUE, FALSE)) - rank.scores <- rep(0, nrow(tmp)) - sort.sgn.idxs <- apply(density, 2, order, decreasing=TRUE) - gsva_rnk2 <- apply(sort.sgn.idxs, 2, compute_rank_score.mod, nrow(cnts)) - means <- rowMeans(gsva_rnk2) - rank <- round(order(means)/2) - rank - }) -> ranks + meta <- .grabMeta(input.data) + groups <- na.omit(unique(meta[[group.by]])) - output <- do.call(cbind, ranks) - output <- as.data.frame(output) - colnames(output) <- paste0(group.by, ".", groups) - rownames(output) <- rownames(cnts.filter) + ## -------- 2 Fast rank computation per group ------------------------------ + n.genes <- nrow(cnts) + weights <- abs(seq(n.genes, 1) - n.genes/2) # fixed triangular weight + rank.mat <- matrix(NA_integer_, n.genes, length(groups), + dimnames = list(rownames(cnts), + paste0(group.by, ".", groups))) - mapped.gset.idx.list <- na.omit(match(gene.set, rownames(cnts.filter))) + compute.cdf <- utils::getFromNamespace("compute.gene.cdf", "GSVA") - output$gene.set.query <- NA - output$gene.set.query[mapped.gset.idx.list] <- "yes" - melted.data.frame <- suppressMessages(melt(output)) - col <- .colorizer(palette, length(groups)) + for (i in seq_along(groups)) { + cols <- which(meta[[group.by]] == groups[i]) + tmp <- cnts[, cols, drop = FALSE] + + dens <- suppressWarnings( + compute.cdf(tmp, seq_len(ncol(tmp)), TRUE, FALSE) + ) + ord <- apply(dens, 2, order, decreasing = TRUE) # genes × cells + scores <- vapply(seq_len(ncol(ord)), + function(j) weights[ord[, j]], + numeric(n.genes)) + + mean.score <- rowMeans2(scores) + rank.mat[, i] <- round(rank(mean.score, ties.method = "average") / 2) + } + ## -------- 3 Long data.frame w/o extra deps ------------------------------- + in.set <- rownames(rank.mat) %in% gene.sets[[gene.set.use]] + long.df <- data.frame( + value = as.vector(rank.mat), + variable = rep(colnames(rank.mat), each = n.genes), + gene.set.query = rep(ifelse(in.set, "yes", NA_character_), times = length(groups)), + stringsAsFactors = FALSE + ) - plot1 <- ggplot(melted.data.frame, aes(x = value)) + - geom_density(data = subset(melted.data.frame, gene.set.query == "yes"), - #linetype="dashed", - aes(fill = variable), - alpha = 0.4, - color = "black") + - theme_classic() + - scale_fill_manual(values = col) + - labs(fill = "Group") + - ylab("Rank Density") + + ## -------- 4 Plots --------------------------------------------------------- + cols <- .colorizer(palette, length(groups)) + plot.df <- subset(long.df, gene.set.query == "yes" & is.finite(value)) + + p1 <- ggplot(plot.df, + aes(x = value, fill = variable)) + + geom_density(alpha = 0.4, colour = "black") + + scale_fill_manual(values = cols, name = "Group") + + labs(y = "Rank density") + + theme_classic() + theme(axis.title.x = element_blank(), - axis.ticks.x = element_blank(), - axis.text.x = element_blank()) - melted.data.frame$segmenty <- NA - melted.data.frame$segmenty2 <- NA - ymax <- 0.2 - for (i in seq_along(groups)) { - melted.data.frame$segmenty <- ifelse(melted.data.frame$variable == paste0(group.by, ".", groups[i]), -(i*ymax-ymax), melted.data.frame$segmenty) - melted.data.frame$segmenty2 <- ifelse(melted.data.frame$variable == paste0(group.by, ".", groups[i]), -(i*ymax), melted.data.frame$segmenty2) - } - plot2 <- ggplot(subset(melted.data.frame, gene.set.query == "yes")) + - geom_segment(aes(x = value,y=segmenty,yend=segmenty2,xend=value, color = variable), - lwd = 1) + - guides(color = "none") + - xlab("Mean Rank Order") + - scale_color_manual(values = col) + - theme(axis.title.y = element_blank(), - axis.ticks.y = element_blank(), - axis.text.y = element_blank(), - panel.background = element_rect(fill = NA, colour = "black")) - EnPlot <- plot1 + plot2 + plot_layout(ncol=1, heights = c(3, 1)) - return(EnPlot) -} + axis.text.x = element_blank(), + axis.ticks.x = element_blank()) -# Internal function from GSVA -compute_rank_score.mod <- function(sort_idx_vec, p){ - tmp <- rep(0, p) - tmp[sort_idx_vec] <- abs(seq(from=p,to=1) - p/2) - return (tmp) -} - -# Modified from GSVA -#' @importFrom MatrixGenerics rowSds -.filterFeatures <- function(expr) { - sdGenes <- rowSds(expr) - sdGenes[sdGenes < 1e-10] <- 0 - if (any(sdGenes == 0) || any(is.na(sdGenes))) { - expr <- expr[sdGenes > 0 & !is.na(sdGenes), ] - } + ## simple segment plot for mean-rank positions + offset <- rug.height + seg.df <- within(plot.df, { + ord <- match(variable, unique(variable)) + y <- -(ord * offset - offset) + yend <- y - offset + }) - if (nrow(expr) < 2) - stop("Less than two genes in the input assay object\n") + p2 <- ggplot(seg.df, aes(x = value, xend = value, + y = y, yend = yend, + colour = variable)) + + geom_segment(linewidth = 1) + + scale_colour_manual(values = cols, guide = "none") + + labs(x = "Mean rank order") + + theme_classic() + + theme(axis.title.y = element_blank(), + axis.text.y = element_blank(), + axis.ticks.y = element_blank(), + panel.border = element_rect(fill = NA, colour = "black")) - if(is.null(rownames(expr))) - stop("The input assay object doesn't have rownames\n") - expr -} + patchwork::wrap_plots(p1, p2, ncol = 1, heights = c(3,1)) +} \ No newline at end of file diff --git a/R/enrichIt.R b/R/enrichIt.R new file mode 100644 index 0000000..e000390 --- /dev/null +++ b/R/enrichIt.R @@ -0,0 +1,150 @@ +#' Flexible GSEA for Precomputed Gene Lists +#' +#' @description +#' A convenience front-end to **fgsea** that lets you point at the +#' `avg_log2FC` and `p_val_adj` columns coming out of Seurat / DESeq2 / +#' edgeR etc. It converts them to a signed -log10(*p*) ranking, filters on +#' significance / effect size, and then runs fgsea. +#' +#' @param input.data Either +#' • a named numeric vector **already ranked**, *or* +#' • a data.frame/tibble with one row per gene and columns containing +#' log-fold-change and *p*-value. If the gene ID is not in `rownames(data)`, +#' supply `gene_col`. +#' @param gene.sets AA named list of character vectors, the result of +#' [getGeneSets()], or the built-in data object [escape.gene.sets]. +#' @param gene_col Name of the column holding gene identifiers (ignored when +#' they are row-names). Default `NULL`. +#' @param logFC_col,pval_col Column names for logFC and *p* (or adj.*p*) +#' – defaults match Seurat’s `FindMarkers()`. +#' @param minSize,maxSize Integer. Minimum / maximum pathway size passed to +#' *fgsea* (default 5 / 500). +#' @param ranking_fun How to build the ranking: `"signed_log10_p"` (default) +#' or `"logFC"`. +#' @param pval_cutoff,logFC_cutoff Filters applied **before** ranking. +#' @param padjust_method Multiple-testing correction; any method accepted by +#' [stats::p.adjust()] (default `"BH"`). +#' @param nproc Passed to **fgsea** (`0` = multithread if OpenMP available). +#' +#' +#' @seealso [fgsea::fgsea()], [getGeneSets()], [gseaEnrichment()] +#' +#' @examples +#' pbmc_small <- SeuratObject::pbmc_small +#' +#' Seurat::Idents(pbmc_small) <- "groups" +#' markers <- Seurat::FindMarkers(pbmc_small, +#' ident.1 = "g1", +#' ident.2 = "g2") +#' +#' gs <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), +#' Tcells = c("CD3E", "CD3D", "CD3G", "CD7","CD8A")) +#' +#' gsea <- enrichIt(markers, +#' gene.sets = gs) +#' +#' @return `data.frame` with the usual fgsea columns plus a convenient +#' `leadingEdge` character column collapsed with \";\". +#' @importFrom stats p.adjust +#' @export +enrichIt <- function(input.data, + gene.sets, + gene_col = NULL, + logFC_col = "avg_log2FC", + pval_col = c("p_val_adj", "p_val"), + ranking_fun = c("signed_log10_p", "logFC"), + pval_cutoff = 1, + logFC_cutoff = 0, + minSize = 5, + maxSize = 500, + padjust_method = "BH", + nproc = 0) { + + if (!requireNamespace("fgsea", quietly = TRUE)) + stop("Package 'fgsea' is required.") + + ranking_fun <- match.arg(ranking_fun) + + ## ------------------------------------------------------------------------ + ## 1. Build/validate the STATISTIC vector + ## ------------------------------------------------------------------------ + if (is.numeric(input.data) && !is.null(names(input.data))) { + stats <- sort(input.data[!is.na(input.data)], decreasing = TRUE) + + } else if (is.data.frame(input.data)) { + + df <- input.data + + ## decide which p-value column to use ------------------------ + pval_col <- match.arg(pval_col[ pval_col %in% names(df) ], + choices = pval_col) + + ## pull gene IDs -------------------------------------------- + if (is.null(gene_col)) { + if (is.null(rownames(df))) + stop("Gene IDs must be in row.names or specify 'gene_col'.") + gene_ids <- rownames(df) + } else { + if (!gene_col %in% names(df)) + stop("'gene_col' not found in data.") + gene_ids <- df[[gene_col]] + } + + ## sanity ---------------------------------------------------- + if (!all(c(logFC_col, pval_col) %in% names(df))) + stop("Specified 'logFC_col' or 'pval_col' not in data.") + + ## filter ---------------------------------------------------- + keep <- !is.na(df[[logFC_col]]) & + !is.na(df[[pval_col]]) & + df[[pval_col]] <= pval_cutoff & + abs(df[[logFC_col]]) >= logFC_cutoff + df <- df[keep, ] + gene_ids <- gene_ids[keep] + + if (nrow(df) == 0) + stop("No genes left after filtering (check cut-offs).") + + ## build ranking -------------------------------------------- + stat_vec <- switch(ranking_fun, + signed_log10_p = sign(df[[logFC_col]]) * -log10(df[[pval_col]]), + logFC = df[[logFC_col]] + ) + stats <- setNames(stat_vec, gene_ids) + stats <- sort(stats, decreasing = TRUE) + + } else { + stop("'data' must be a named numeric vector or a data.frame.") + } + + ## ------------------------------------------------------------------------ + ## 2. Harmonise gene-sets (escape utility) & run fgsea + ## ------------------------------------------------------------------------ + gene.sets <- .GS.check(gene.sets) + + ## Decide scoreType automatically ---------------------------------------- + score_type <- if (all(stats >= 0)) { + "pos" # every value ≥0 + } else if (all(stats <= 0)) { + "neg" # every value ≤0 + } else { + "std" # mixture of positive and negative + } + + res <- fgsea::fgsea( + pathways = gene.sets, + stats = stats, + minSize = minSize, + maxSize = maxSize, + nproc = nproc, + scoreType = score_type) + + ## tidy -------------------------------------------------------- + res$geneRatio <- vapply(res$leadingEdge, length, integer(1L)) / res$size + res$leadingEdge <- vapply(res$leadingEdge, + paste, collapse = ";", character(1)) + res$padj <- p.adjust(res$pval, method = padjust_method) + res <- res[order(res$padj, res$pval), ] + rownames(res) <- NULL + res +} \ No newline at end of file diff --git a/R/enrichItPlot.R b/R/enrichItPlot.R new file mode 100644 index 0000000..856b865 --- /dev/null +++ b/R/enrichItPlot.R @@ -0,0 +1,139 @@ +#' Adaptive visualisation of enrichIt results +#' +#' @param res `data.frame` returned by [enrichIt()]. +#' @param plot.type `"bar"`, `"dot"`, or `"cnet"`. +#' @param top Integer. Keep the top *n* terms **per database** +#' (ranked by adjusted *p*). Set to `Inf` to keep all. +#' @param x.measure A column in `res` mapped to the *x*-axis +#' (ignored for `"cnet"`). Default `"-log10(padj)"`. +#' @param color.measure Column mapped to color (dot plot only). +#' Default same as `x.measure`. +#' @param show.counts Logical. Annotate bar plot with the `Count` (number of genes). +#' @param palette palette Character. Any palette from \code{\link[grDevices]{hcl.pals}}. +#' @param ... Further arguments passed to **ggplot2** geoms (e.g. +#' `alpha`, `linewidth`). +#' +#' @return A **patchwork** object (bar / dot) or **ggraph** object (cnet). +#' @export +#' +#' @examples +#' \dontrun{ +#' ranks <- setNames(markers$avg_log2FC, rownames(markers)) +#' gs <- getGeneSets("Homo sapiens", library = c("H", "C2")) +#' res <- enrichIt(ranks, gs) +#' +#' enrichItPlot(res) +#' enrichItPlot(res, "dot", top=10) +#' enrichItPlot(res, "cnet", top=5) +#' } +enrichItPlot <- function(res, + plot.type = c("bar", "dot", "cnet"), + top = 20, + x.measure = "-log10(padj)", + color.measure = x.measure, + show.counts = TRUE, + palette = "inferno", + ...) { + + stopifnot(is.data.frame(res)) + plot.type <- match.arg(plot.type) + + if (!requireNamespace("ggplot2", quietly = TRUE)) + stop("Please install 'ggplot2'.") + + ## 0 housekeeping ---------------------------------------------------- + res <- res[order(res$padj, res$pval), , drop = FALSE] + ## use Count if present, otherwise fall back on leadingEdge length + if (!"Count" %in% names(res)) + res$Count <- vapply(strsplit(res$leadingEdge, ";"), length, integer(1)) + + # Convert Database to factor + if ("Database" %in% names(res)) { + res$Database[is.na(res$Database)] <- "Unknown" + } else { + res$Database <- "Unknown" + } + res$Database <- factor(res$Database) + res$Term <- with(res, reorder(pathway, -padj)) + + res$`-log10(padj)` <- -log10(res$padj + 1e-300) + + ## top-n per library ------------------------------------------------------- + if (is.finite(top)) { + res <- do.call(rbind, lapply(split(res, res$Database), head, n = top)) + res$Term <- factor(res$Term, levels = unique(res$Term)) + } + + ## Bar Plot + if (plot.type == "bar") { + p <- ggplot2::ggplot(res, + ggplot2::aes(x = .data[[x.measure]], y = .data$Term)) + + ggplot2::geom_col(fill = .colorizer(palette, n = 1), ...) + + ggplot2::facet_wrap(~ Database, scales = "free_y") + + ggplot2::labs(x = x.measure, y = NULL) + + ggplot2::theme_classic() + + if (isTRUE(show.counts)) { + p <- p + ggplot2::geom_text( + ggplot2::aes(label = .data$Count, + x = .data[[x.measure]] + max(.data[[x.measure]])*0.02), + hjust = 0, size = 3) + } + p <- p + ggplot2::coord_cartesian(clip = "off") + return(patchwork::wrap_plots(p)) + + ## Dot Plot + } else if (plot.type == "dot") { + if (!requireNamespace("patchwork", quietly = TRUE)) + stop("Install 'patchwork' for facetted output.") + + p <- ggplot2::ggplot(res, + ggplot2::aes(x = .data$geneRatio, y = .data$Term, + color = .data[[color.measure]], + size = .data$size*.data$geneRatio)) + + ggplot2::geom_point(...) + + ggplot2::facet_wrap(~ Database, scales = "free_y") + + ggplot2::scale_size_continuous(name = "Core Count") + + ggplot2::labs(x = "geneRatio", y = NULL, + color = color.measure) + + ggplot2::theme_classic() + + ggplot2::theme(legend.box = "vertical") + + if (!is.null(palette)) + p <- p + ggplot2::scale_color_gradientn(colors = .colorizer(palette, 11)) + return(patchwork::wrap_plots(p)) + + # Network Plot + } else { + if (!requireNamespace("ggraph", quietly = TRUE)) + stop("Install 'ggraph' for the cnet option.") + if (!requireNamespace("igraph", quietly = TRUE)) + stop("Install 'igraph' for the cnet option.") + + # keep leading-edge genes only -> explode rows + le_df <- res[seq_len(top), c("Database", "pathway", "leadingEdge")] + le_df <- within(le_df, { + leadingEdge <- strsplit(leadingEdge, ";") + }) + edges <- do.call(rbind, lapply(1:nrow(le_df), function(i) { + data.frame(pathway = le_df$pathway[i], + gene = le_df$leadingEdge[[i]], + Database = le_df$Database[i], + stringsAsFactors = FALSE) + })) + + g <- igraph::graph_from_data_frame(edges, directed = FALSE) + igraph::V(g)$type <- ifelse(igraph::V(g)$name %in% res$pathway, "pathway", "gene") + igraph::V(g)$size <- ifelse(igraph::V(g)$type == "pathway", 8, 3) + + ggraph::ggraph(g, layout = "fr") + + ggraph::geom_edge_link(aes(alpha = after_stat(index)), show.legend = FALSE) + + ggraph::geom_node_point(aes(size = .data$size, + color = .data$type)) + + ggraph::geom_node_text(aes(label = .data$name), + repel = TRUE, size = 3, + vjust = 1.5, check_overlap = TRUE) + + ggplot2::scale_color_manual(values = .colorizer(palette, n = 2)) + + ggplot2::theme_void() + } +} diff --git a/R/escape.gene.sets.R b/R/escape.gene.sets.R index 7a7f484..c81737b 100644 --- a/R/escape.gene.sets.R +++ b/R/escape.gene.sets.R @@ -1,9 +1,24 @@ #' Built-In Gene Sets for escape -#' -#' A list of gene sets derived from Azizi, et al 2018 -#' \href{https://pubmed.ncbi.nlm.nih.gov/29961579/}{PMID: 29961579}) -#' relating to tumor immunity. +#' +#' `escape.gene.sets` ships with **escape** and provides a convenient set of +#' cell-type and pathway signatures from the scRNA-seq tumor micro-environment +#' study by Azizi *et al.* (2018, Cell \doi{10.1016/j.cell.2018.06.021}). These +#' signatures capture major immune and stromal populations observed across +#' breast-cancer samples and serve as a lightweight default for quick testing or +#' exploratory analyses. +# +#' @details +#' The original paper defined cell-type signatures as the top differentially +#' expressed genes per cluster (Azizi *et al.*, Supplementary Table S3). +#' +#' @usage data("escape.gene.sets") +#' @seealso [runEscape()], [escape.matrix()], [getGeneSets()] +#' @keywords datasets +#' @source Supplementary Table S3 in Azizi *et al.* (2018) +#' +#' @references +#' Azizi E, *et al.* **Single-cell map of diverse immune phenotypes in the +#' breast tumor microenvironment.** *Cell* 173(5):1293-1308 (2018). #' @docType data #' @name escape.gene.sets -#' -NULL +NULL \ No newline at end of file diff --git a/R/getGeneSets.R b/R/getGeneSets.R index ae0f9a2..2f05b04 100644 --- a/R/getGeneSets.R +++ b/R/getGeneSets.R @@ -1,22 +1,3 @@ -# create a cache environment. -.msigdb_cache <- new.env(parent = emptyenv()) - -# Helper function: Retrieve (or download and cache) the msigdb object. -getMsigdbCached <- function(org, id, version) { - cache_key <- paste(org, id, version, sep = "_") - - if (exists(cache_key, envir = .msigdb_cache)) { - message("Loading msigdb object from cache") - msigdb_obj <- get(cache_key, envir = .msigdb_cache) - } else { - message("Downloading msigdb object") - msigdb_obj <- suppressMessages(getMsigdb(org = org, id = id, version = version)) - msigdb_obj <- suppressMessages(suppressWarnings(appendKEGG(msigdb_obj))) - assign(cache_key, msigdb_obj, envir = .msigdb_cache) - } - return(msigdb_obj) -} - #' Get a collection of gene sets from the msigdb #' #' This function retrieves gene sets from msigdb and caches the downloaded object @@ -24,18 +5,14 @@ getMsigdbCached <- function(org, id, version) { #' subcollection, or specific gene sets, and only supports human #' ("Homo sapiens") and mouse ("Mus musculus"). #' -#' @param species The scientific name of the species of interest; only -#' "Homo sapiens" or "Mus musculus" are supported. -#' @param library A character vector of main collections (e.g. "H", "C5"). -#' If provided, only gene sets in these collections are returned. -#' @param subcategory A character vector specifying sub-collection abbreviations -#' (e.g. "CGP", "CP:REACTOME") to further subset the gene sets. -#' @param gene.sets A character vector of specific gene set names to select. -#' This filter is applied after other subsetting. -#' @param version The version of MSigDB to use (default "7.4"). -#' @param id The gene identifier type to use (default "SYM" for gene symbols). -#' -#' @return A named list of gene identifiers for each gene set. +#' @param species `"Homo sapiens"` (default) or `"Mus musculus"`. +#' @param library Character. Optional vector of main collection codes +#' (e.g. `"H"`, `"C5"`). +#' @param subcategory Character. Optional vector of sub-collection codes +#' (e.g. `"GO:BP"`). +#' @param gene.sets Character. Optional vector of specific gene-set names. +#' @param version MSigDB version (character, default `"7.4"`). +#' @param id Identifier type (default `"SYM"` for symbols). #' #' @examples #' \dontrun{ @@ -48,66 +25,99 @@ getMsigdbCached <- function(org, id, version) { #' library = c("C2", "C5"), #' subcategory = "GO:BP") #' } -#' @importFrom GSEABase GeneSet GeneSetCollection geneIds -#' @importFrom msigdb getMsigdb appendKEGG -#' @importFrom stringr str_replace_all +#' +#' @return A named `list` of character vectors (gene IDs). #' @export -getGeneSets <- function(species = "Homo sapiens", - library = NULL, - subcategory = NULL, - gene.sets = NULL, - version = "7.4", - id = "SYM") { - # Only support human and mouse. - if (!(species %in% c("Homo sapiens", "Mus musculus"))) { - stop("Supported species are only 'Homo sapiens' and 'Mus musculus'.") - } +getGeneSets <- function(species = c("Homo sapiens", "Mus musculus"), + library = NULL, + subcategory = NULL, + gene.sets = NULL, + version = "7.4", + id = "SYM") +{ + species <- match.arg(species) + org <- if (species == "Homo sapiens") "hs" else "mm" - # Map species name to the organism code used by msigdb. - org <- ifelse(species == "Homo sapiens", "hs", "mm") + ## download or fetch from cache ------------------------------------------------ + msig <- .msigdb_cached(org, id, version) - # Retrieve the msigdb object, from cache if available. - msigdb_obj <- getMsigdbCached(org = org, id = id, version = version) + ## helper to interrogate S4 slots without formal import ------------------------ + .get_slot_nested <- function(x, outer_slot, inner_slot) { + outer <- methods::slot(x, outer_slot) + methods::slot(outer, inner_slot) + } + + ## apply successive filters in one pass --------------------------------------- + keep <- rep(TRUE, length(msig)) - # Filter by main collection using the S4 slot: if (!is.null(library)) { - msigdb_obj <- msigdb_obj[sapply(msigdb_obj, function(x) - toupper(x@collectionType@category) %in% toupper(library))] + keep <- keep & vapply(msig, + \(x) toupper(.get_slot_nested(x, "collectionType", "category")), + "", USE.NAMES = FALSE) %in% toupper(library) } - # Filter by subcollection using the S4 slot: if (!is.null(subcategory)) { - msigdb_obj <- msigdb_obj[sapply(msigdb_obj, function(x) - x@collectionType@subCategory %in% toupper(subcategory))] + keep <- keep & vapply(msig, + function(x) { + ct <- methods::slot(x, "collectionType") + toupper(methods::slot(ct, "subCategory")) + }, + "", USE.NAMES = FALSE) %in% toupper(subcategory) } - # Optional filtering by specific gene set names. if (!is.null(gene.sets)) { - msigdb_obj <- msigdb_obj[sapply(msigdb_obj, function(x) x@setName %in% gene.sets)] + keep <- keep & vapply(msig, \(x) x@setName, "", USE.NAMES = FALSE) %in% gene.sets } - if (length(msigdb_obj) == 0) { - warning("No gene sets found for the specified parameters.") + msig <- msig[keep] + if (!length(msig)) { + warning("No gene sets matched the requested filters.") return(NULL) } - # Build the gene set list. - gs_names <- unique(sapply(msigdb_obj, function(x) x@setName)) - gene_set_list <- vector("list", length(gs_names)) - for (i in seq_along(gs_names)) { - genes <- unique(unlist(lapply(msigdb_obj, function(x) { - if (x@setName == gs_names[i]) { - return(x@geneIds) - } - }))) - gene_set_list[[i]] <- GSEABase::GeneSet(genes, setName = gs_names[i]) - } + ## build simple list ----------------------------------------------------------- + g.list <- lapply(msig, function(x) x@geneIds) + names(g.list) <- vapply(msig, function(x) x@setName, "", USE.NAMES = FALSE) + names(g.list) <- gsub("_", "-", names(g.list), fixed = TRUE) - # Create a GeneSetCollection and return as a named list. - gsc <- GSEABase::GeneSetCollection(gene_set_list) - mod.names <- stringr::str_replace_all(names(gsc), "_", "-") - gene_list <- GSEABase::geneIds(gsc) - names(gene_list) <- mod.names + ## optionally attach GeneSetCollection invisibly ------------------------------ + if (requireNamespace("GSEABase", quietly = TRUE)) { + gsc <- GSEABase::GeneSetCollection( + Map(GSEABase::GeneSet, g.list, setName = names(g.list)) + ) + invisible(gsc) + } - return(gene_list) + g.list } + +# Setting up cache system +.msigdb_cache_dir <- tools::R_user_dir("escape", "cache") +dir.create(.msigdb_cache_dir, showWarnings = FALSE, recursive = TRUE) + +# Function to cache and retrieve MSigDB gene sets +.msigdb_cached <- function(org, id = "SYM", version = "7.4") { + key <- paste(org, id, version, sep = "_") + file_path <- file.path(.msigdb_cache_dir, paste0(key, ".rds")) + + if (file.exists(file_path)) { + gs <- readRDS(file_path) + } else { + if (!requireNamespace("msigdb", quietly = TRUE)) + stop("Package 'msigdb' must be installed to download MSigDB resources") + + gs <- suppressMessages( + msigdb::getMsigdb(org = org, id = id, version = version) + ) + + # Optionally append KEGG pathways, but fail gracefully + gs <- tryCatch( + suppressWarnings(msigdb::appendKEGG(gs)), + error = function(e) gs + ) + + saveRDS(gs, file_path) + } + + gs +} \ No newline at end of file diff --git a/R/geyserEnrichment.R b/R/geyserEnrichment.R index 194bdde..2ab033c 100644 --- a/R/geyserEnrichment.R +++ b/R/geyserEnrichment.R @@ -1,120 +1,108 @@ -#' Generate a ridge plot to examine enrichment distributions +#' Visualize Enrichment Distributions Using Geyser Plots #' #' This function allows to the user to examine the distribution of -#' enrichment across groups by generating a ridge plot. +#' enrichment across groups by generating a geyser plot. #' -#' @param input.data Enrichment output from \code{\link{escape.matrix}} or -#' \code{\link{runEscape}}. -#' @param assay Name of the assay to plot if data is a single-cell object. -#' @param group.by Categorical parameter to plot along the x.axis. If input is -#' a single-cell object the default will be cluster. -#' @param gene.set Gene set to plot (on y-axis). -#' @param color.by How the color palette applies to the graph - can -#' be \strong{"group"} for a categorical color palette based on the -#' \strong{group.by} parameter or use the \strong{gene.set} name if wanting to -#' apply a gradient palette. -#' @param order.by Method to organize the x-axis: \strong{"mean"} will arrange -#' the x-axis by the mean of the gene.set, while \strong{"group"} will arrange -#' the x-axis by in alphanumerical order. Using \strong{NULL} will not reorder -#' the x-axis. -#' @param facet.by Variable to facet the plot into n distinct graphs. -#' @param scale Visualize raw values \strong{FALSE} or Z-transform -#' enrichment values \strong{TRUE}. -#' @param palette Colors to use in visualization - input any -#' \link[grDevices]{hcl.pals}. +#' @param input.data Output of \code{\link{escape.matrix}} or a single‑cell +#' object previously processed by \code{\link{runEscape}}. +#' @param assay Name of the assay holding enrichment scores when +#' `input.data` is a single‑cell object. Ignored otherwise. +#' @param group.by Metadata column plotted on the *x*‑axis. Defaults to the +#' Seurat/SCE `ident` slot when `NULL`. +#' @param gene.set Character(1). Gene‑set to plot (must exist in the +#' enrichment matrix). +#' @param color.by Aesthetic mapped to point color. Use either +#' *"group"* (default = `group.by`) for categorical coloring or the +#' *name of a gene‑set* (e.g. same as `gene.set`) to obtain a numeric +# gradient. Any other metadata or column present in the data is also +#' accepted. +#' @param order.by How to arrange the x‑axis: +#' *`"mean"`* – groups ordered by decreasing group mean; +#' *`"group"`* – natural sort of group labels; +#' *`NULL`* – keep original ordering. +#' @param facet.by Optional metadata column used to facet the plot. +#' @param scale Logical; if `TRUE` scores are centered/scaled (Z‑score) prior +#' to plotting. +#' @param palette Character. Any palette from \code{\link[grDevices]{hcl.pals}}. #' -#' @import ggplot2 -#' @importFrom ggdist stat_pointinterval -#' #' @examples -#' GS <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), +#' gs <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), #' Tcells = c("CD3E", "CD3D", "CD3G", "CD7","CD8A")) -#' pbmc_small <- SeuratObject::pbmc_small -#' pbmc_small <- runEscape(pbmc_small, -#' gene.sets = GS, -#' min.size = NULL) -#' -#' geyserEnrichment(pbmc_small, -#' assay = "escape", +#' +#' pbmc <- SeuratObject::pbmc_small |> +#' runEscape(gene.sets = gs, +#' min.size = NULL) +#' +#' geyserEnrichment(pbmc, +#' assay = "escape", #' gene.set = "Tcells") #' +#' @import ggplot2 +#' @importFrom ggdist stat_pointinterval +#' @importFrom stats as.formula +#' @return A \pkg{ggplot2} object. #' @export -#' -#' @return ggplot2 object with geyser-based distributions of selected gene.set -geyserEnrichment <- function(input.data, - assay = NULL, - group.by =NULL, - gene.set = NULL, - color.by = "group", - order.by = NULL, - scale = FALSE, - facet.by = NULL, - palette = "inferno") { +geyserEnrichment <- function(input.data, + assay = NULL, + group.by = NULL, + gene.set, + color.by = "group", + order.by = NULL, + scale = FALSE, + facet.by = NULL, + palette = "inferno") { + ## ---- 0) Sanity checks ----------------------------------------------------- + if (missing(gene.set) || length(gene.set) != 1L) + stop("Please supply exactly one 'gene.set' to plot.") - if(is.null(group.by)) { + if (is.null(group.by)) group.by <- "ident" - } - if(color.by == "group") { + if (identical(color.by, "group")) color.by <- group.by - } - - enriched <- .prepData(input.data, assay, gene.set, group.by, NULL, facet.by) - if(!is.null(order.by) && !is.null(group.by)) { - enriched <- .orderFunction(enriched, order.by, group.by) - } + ## ---- 1) Build tidy data.frame ------------------------------------------- + enriched <- .prepData(input.data, assay, gene.set, group.by, + split.by = NULL, facet.by = facet.by) - if(scale) { - enriched[,gene.set] <- as.numeric(scale(enriched[,gene.set])) - } + ## Optionally Z‑transform ---------------------------------------------------- + if (scale) + enriched[[gene.set]] <- as.numeric(scale(enriched[[gene.set]])) - if(inherits(enriched[,color.by], "numeric") && gene.set == color.by) { - gradient.format <- TRUE - } else { - gradient.format <- FALSE - } - - plot <- ggplot(data = enriched, - mapping = aes(x = enriched[,group.by], - y = enriched[,gene.set], - color = enriched[,color.by])) + ## Optionally reorder groups ------------------------------------------------- + if (!is.null(order.by)) + enriched <- .orderFunction(enriched, order.by, group.by) - plot <- plot + - geom_jitter(size = 2, - na.rm = TRUE) + - stat_pointinterval(interval_size_range = c(2, 3), - fatten_point = 1.5, - interval_color = "white", - point_color = "white", - position = position_dodge(width = 1), - na.rm = TRUE, - show.legend = FALSE) + - stat_pointinterval(interval_size_range = c(1, 2), - interval_color = "black", - point_color = "black", - position = position_dodge(width = 1), - na.rm = TRUE, - show.legend = FALSE) + ## ---- 2) Plot -------------------------------------------------------------- + plt <- ggplot(enriched, aes(x = .data[[group.by]], + y = .data[[gene.set]], + colour = .data[[color.by]])) + + # Raw points -------------------------------------------------------------- + geom_jitter(width = 0.25, size = 1.5, alpha = 0.6, na.rm = TRUE) + + + # White base interval + median point ------------------------------------- + stat_pointinterval(interval_size_range = c(2, 3), fatten_point = 1.4, + interval_colour = "white", point_colour = "white", + position = position_dodge(width = 0.6), show.legend = FALSE) + + + # Black outline for clarity ---------------------------------------------- + stat_pointinterval(interval_size_range = c(1, 2), fatten_point = 1.4, + interval_colour = "black", point_colour = "black", + position = position_dodge(width = 0.6), show.legend = FALSE) + + + labs(x = group.by, + y = paste0(gene.set, "\nEnrichment Score"), + colour = color.by) + + theme_classic() + + theme(legend.direction = "horizontal", + legend.position = "bottom") - plot <- plot + - xlab(group.by) + - ylab(paste0(gene.set, "\n Enrichment Score")) + - theme_classic() + - guides(fill = "none") + ## ---- 3) Colour scale ------------------------------------------------------ + plt <- .colorby(enriched, plt, color.by, palette, type = "color") - plot <- .colorby(enriched, - plot, - color.by, - palette, - type = "color") + ## ---- 4) Facetting --------------------------------------------------------- + if (!is.null(facet.by)) + plt <- plt + facet_grid(as.formula(paste(".~", facet.by))) - if (!is.null(facet.by)) { - plot <- plot + - facet_grid(as.formula(paste('. ~', facet.by))) - } - plot <- plot + - theme(legend.direction = "horizontal", - legend.position = "bottom") - return(plot) + plt } diff --git a/R/global.R b/R/global.R deleted file mode 100644 index 82d3abc..0000000 --- a/R/global.R +++ /dev/null @@ -1,25 +0,0 @@ -.onLoad <- function (libname, pkgname) -{ - - utils::globalVariables ("model.matrix") - utils::globalVariables ("t.test") - utils::globalVariables ("p.adjust") - utils::globalVariables ("aov") - utils::globalVariables ("as.formula") - utils::globalVariables ("factors.x") - utils::globalVariables ("factors.y") - utils::globalVariables ("slot") - utils::globalVariables ("GS") - utils::globalVariables ("na.omit") - utils::globalVariables ("segmenty") - utils::globalVariables ("segmenty2") - utils::globalVariables ("value") - utils::globalVariables ("variable") - utils::globalVariables (".SD") - utils::globalVariables ("gene.set.query") - utils::globalVariables (".") - utils::globalVariables ("x") - utils::globalVariables ("median") - invisible () - -} \ No newline at end of file diff --git a/R/gseaEnrichment.R b/R/gseaEnrichment.R new file mode 100644 index 0000000..a466f6d --- /dev/null +++ b/R/gseaEnrichment.R @@ -0,0 +1,200 @@ +#' Classical GSEA-style Running-Enrichment Plot +#' +#' Produces the familiar two-panel GSEA graphic—running enrichment score +#' (RES) plus a “hit” rug—for a **single gene-set** evaluated across +#' multiple biological groups (clusters, conditions, samples, ...). +#' +#' **Algorithm (Subramanian _et al._, PNAS 2005)** +#' 1. Within every group, library-size-normalise counts to CPM. +#' 2. Collapse gene expression with `summary.fun` (mean/median/…). +#' 3. Rank genes (descending) to obtain one ordered list per group. +#' 4. Compute the weighted Kolmogorov–Smirnov running score +#' (weight = \|stat\|^*p*). +#' 5. ES = maximum signed deviation of the curve. +#' +#' @param input.data A \link[SeuratObject]{Seurat} object or a +#' \link[SingleCellExperiment]{SingleCellExperiment}. +#' @param gene.set.use Character(1). Name of the gene set to display. +#' @param gene.sets A named list of character vectors, the result of +#' [getGeneSets()], or the built-in data object [escape.gene.sets]. +#' @param group.by Metadata column. Defaults to the Seurat/SCE `ident` +#' slot when `NULL`. +#' @param summary.fun Method used to collapse expression within each +#' group **before** ranking: one of `"mean"` (default), `"median"`, `"max"`, +#'`"sum"`, or `"geometric"`. +#' @param p Weighting exponent in the KS statistic (classical GSEA uses `p = 1`). +#' @param nperm Integer ≥ 0. Gene-label permutations per group (default 1000). +#' `0` value will skip NES/*p* calculation. +#' @param rug.height Vertical spacing of the hit rug as a fraction of the +#' y-axis (default `0.02`). +#' @param digits Number of decimal places displayed for ES in the +#' legend (default `2`). +#' @param BPPARAM A \pkg{BiocParallel} parameter object describing the +#' parallel backend. +#' @param palette Character. Any palette from \code{\link[grDevices]{hcl.pals}}. +#' +#' @examples +#' pbmc_small <- SeuratObject::pbmc_small +#' +#' gs <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), +#' Tcells = c("CD3E", "CD3D", "CD3G", "CD7","CD8A")) +#' +#' gseaEnrichment(pbmc_small, +#' gene.set.use = "Bcells", +#' gene.sets = gs, +#' group.by = "groups", +#' summary.fun = "mean", +#' digits = 3) +#' +#' @seealso \code{\link{escape.matrix}}, \code{\link{densityEnrichment}} +#' @importFrom stats na.omit +#' @return A single `patchwork`/`ggplot2` object +#' @export +gseaEnrichment <- function(input.data, + gene.set.use, + gene.sets, + group.by = NULL, + summary.fun = "mean", + p = 1, + nperm = 1000, + rug.height = 0.02, + digits = 2, + BPPARAM = NULL, + palette = "inferno") { + + ## ---- 0. Checks ---------------------------------------------------------- + gene.sets <- .GS.check(gene.sets) + if (length(gene.set.use) != 1L) + stop("'gene.set.use' must be length 1") + if (!gene.set.use %in% names(gene.sets)) + stop("Unknown gene-set") + + if (is.null(group.by)) group.by <- "ident" + meta <- .grabMeta(input.data) + if (!group.by %in% colnames(meta)) + stop("'", group.by, "' not found in metadata") + + groups <- stats::na.omit(unique(meta[[group.by]])) + if (length(groups) < 2) + stop("Need 2 or more groups") + + summary.fun <- .match_summary_fun(summary.fun) + + ## ---- 1. Expression matrix & rankings ------------------------------------ + cnts <- .cntEval(input.data, assay = "RNA", type = "counts") |> + .filterFeatures() + + gs.genes <- intersect(gene.sets[[gene.set.use]], rownames(cnts)) + if (!length(gs.genes)) + stop("Gene-set has no overlap with the matrix") + + getStats <- function(mat) { + keyword <- attr(summary.fun, "keyword") + switch(keyword, + mean = MatrixGenerics::rowMeans2(mat), + median = MatrixGenerics::rowMedians(mat), + max = MatrixGenerics::rowMaxs(mat), + sum = MatrixGenerics::rowSums2(mat), + geometric = exp(MatrixGenerics::rowMeans2(log1p(mat)))) # log1p is sparse-safe + } + + ranking.list <- lapply(groups, function(g) { + idx <- which(meta[[group.by]] == g) + lib <- Matrix::colSums(cnts[, idx, drop = FALSE]) / 1e6 # CPM scale + sub <- cnts[, idx, drop = FALSE] + + # Sparse-safe column normalization using Diagonal + norm <- sub %*% Matrix::Diagonal(x = 1 / lib) + + stat <- getStats(norm) + sort(stat, decreasing = TRUE) + }) + names(ranking.list) <- groups + n.genes <- length(ranking.list[[1L]]) + + ## ---- 2. ES, NES, p-value per group -------------------------------------- + es <- nes <- pval <- numeric(length(groups)) + curves <- vector("list", length(groups)) + + for (i in seq_along(groups)) { + rvec <- ranking.list[[i]] + weight <- abs(rvec[gs.genes])^p + curves[[i]] <- .computeRunningES(names(rvec), gs.genes, weight) + es[i] <- ifelse(max(abs(curves[[i]])) == abs(max(curves[[i]])), + max(curves[[i]]), min(curves[[i]])) + + ## ---- permutation null -------------------------------------------------- + if (nperm > 0) { + nullES <- .plapply( + seq_len(nperm), + function(xx) { + hits <- sample.int(n.genes, length(gs.genes)) + weight <- abs(rvec[hits])^p + cur <- .computeRunningES(names(rvec), names(rvec)[hits], weight) + ifelse(max(abs(cur)) == abs(max(cur)), max(cur), min(cur)) + }, + BPPARAM = BPPARAM, # will be ignored in serial mode + parallel = TRUE # set FALSE to force serial execution + ) + nullES <- unlist(nullES, use.names = FALSE) + + nes[i] <- es[i] / mean(abs(nullES)) + pval[i] <- (sum(abs(nullES) >= abs(es[i])) + 1) / (nperm + 1) + } else { + nes[i] <- NA_real_ + pval[i] <- NA_real_ + } + } + + ## ---- 3. Legend labels ---------------------------------------------------- + labES <- formatC(es, digits = digits, format = "f") + labNES <- formatC(nes, digits = digits, format = "f") + labP <- ifelse(is.na(pval), "NA", + formatC(pval, digits = 2, format = "e")) + pretty.grp <- paste0(groups, + " (NES = ", labNES, + ", p = ", labP, ")") + + ## ---- 4. Data frames for ggplot ------------------------------------------ + running.df <- data.frame( + rank = rep(seq_len(n.genes), times = length(groups)), + ES = unlist(curves, use.names = FALSE), + grp = factor(rep(pretty.grp, each = n.genes), levels = pretty.grp) + ) + + rug.df <- do.call(rbind, lapply(seq_along(groups), function(i) { + data.frame( + x = which(names(ranking.list[[i]]) %in% gs.genes), + y = -(i-1)*rug.height, + xend = which(names(ranking.list[[i]]) %in% gs.genes), + yend = -(i)*rug.height, + grp = pretty.grp[i]) + })) + + ## ---- 5. Plot ------------------------------------------------------------- + cols <- .colorizer(palette, length(groups)) + + p_top <- ggplot2::ggplot(running.df, ggplot2::aes(rank, ES, colour = grp)) + + ggplot2::geom_step(linewidth = 0.8) + + ggplot2::geom_hline(yintercept = 0) + + ggplot2::scale_colour_manual(values = cols, name = NULL) + + ggplot2::labs(y = "Running Enrichment Score") + + ggplot2::theme_classic() + + ggplot2::theme(axis.title.x = element_blank(), + axis.text.x = element_blank(), + axis.ticks.x = element_blank()) + + p_mid <- ggplot2::ggplot(rug.df) + + ggplot2::geom_segment(ggplot2::aes(x, y, xend = xend, yend = yend, + colour = grp)) + + ggplot2::scale_colour_manual(values = cols, guide = "none") + + theme_classic() + + ggplot2::ylim(-length(groups)*rug.height, 0) + + theme(axis.title = element_blank(), + axis.text.y = element_blank(), + axis.ticks.y = element_blank(), + panel.border = element_rect(fill = NA, colour = "black", linewidth = 0.5)) + + p_top / p_mid + patchwork::plot_layout(heights = c(3, 0.4)) +} + diff --git a/R/heatmapEnrichment.R b/R/heatmapEnrichment.R index 78347ab..be80a04 100644 --- a/R/heatmapEnrichment.R +++ b/R/heatmapEnrichment.R @@ -1,127 +1,135 @@ -#' Generate a heatmap to visualize enrichment values +#' Visualize Enrichment Value Summaries Using Heatmaps #' #' This function allows to the user to examine the heatmap with the mean #' enrichment values by group. The heatmap will have the gene sets as rows #' and columns will be the grouping variable. #' -#' @param input.data Enrichment output from \code{\link{escape.matrix}} or -#' \code{\link{runEscape}}. -#' @param assay Name of the assay to plot if data is a single-cell object. -#' @param group.by Categorical parameter to plot along the x.axis. If input is -#' a single-cell object the default will be cluster. -#' @param gene.set.use Selected gene sets to visualize. If \strong{"all"}, the -#' heatmap will be generated across all gene sets. -#' @param cluster.rows Use Euclidean distance to order the row values. -#' @param cluster.columns Use Euclidean distance to order the column values. -#' @param facet.by Variable to facet the plot into n distinct graphs. -#' @param scale Visualize raw values \strong{FALSE} or Z-transform -#' enrichment values \strong{TRUE}. -#' @param summary.stat Use \strong{'median'} or \strong{'mean'} values -#' to display. -#' @param palette Colors to use in visualization - input any -#' \link[grDevices]{hcl.pals}. -#' -#' @import ggplot2 -#' @importFrom stats dist hclust -#' @importFrom dplyr %>% group_by summarise across -#' @importFrom reshape2 melt -#' -#' @examples -#' GS <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), -#' Tcells = c("CD3E", "CD3D", "CD3G", "CD7","CD8A")) -#' pbmc_small <- SeuratObject::pbmc_small -#' pbmc_small <- runEscape(pbmc_small, -#' gene.sets = GS, -#' min.size = NULL) -#' -#' heatmapEnrichment(pbmc_small, -#' assay = "escape") +#' @param input.data Output of \code{\link{escape.matrix}} or a single‑cell +#' object previously processed by \code{\link{runEscape}}. +#' @param assay Name of the assay holding enrichment scores when +#' `input.data` is a single‑cell object. Ignored otherwise. +#' @param group.by Metadata column plotted on the *x*‑axis. Defaults to the +#' Seurat/SCE `ident` slot when `NULL`. +#' @param gene.set.use Vector of gene‑set names to plot, or \code{"all"} +#' (default) to show every available gene set. +#' @param cluster.rows,cluster.columns Logical; if \code{TRUE}, rows/columns +#' are ordered by Ward‑linkage hierarchical clustering (Euclidean distance). +#' @param facet.by Optional metadata column used to facet the plot. +#' @param scale If \code{TRUE}, Z‑transforms each gene‑set column **after** +#' summarization. +#' @param summary.stat Method used to summarize expression within each +#* group: one of `"mean"` (default), `"median"`, `"max"`, +#*`"sum"`, or `"geometric"` +#' @param palette Character. Any palette from \code{\link[grDevices]{hcl.pals}}. #' +#' @return A \code{ggplot2} object. +#' @importFrom stats aggregate dist hclust #' @export #' -#' @return ggplot2 object with heatmap of mean enrichment values - -heatmapEnrichment <- function(input.data, - assay = NULL, - group.by = NULL, - gene.set.use = "all", - cluster.rows = FALSE, - cluster.columns = FALSE, - facet.by = NULL, - scale = FALSE, - summary.stat = "mean", - palette = "inferno") { - - options(dplyr.summarise.inform = FALSE) - if(is.null(group.by)) { - group.by <- "ident" +#' @examples +#' gs <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), +#' Tcells = c("CD3E", "CD3D", "CD3G", "CD7","CD8A")) +#' +#' pbmc <- SeuratObject::pbmc_small |> +#' runEscape(gene.sets = gs, min.size = NULL) +#' +#' heatmapEnrichment(pbmc, assay = "escape", palette = "viridis") +#' +heatmapEnrichment <- function(input.data, + assay = NULL, + group.by = NULL, + gene.set.use = "all", + cluster.rows = FALSE, + cluster.columns= FALSE, + facet.by = NULL, + scale = FALSE, + summary.stat = "mean", + palette = "inferno") +{ + # ---------- 1. helper to match summary function ------------------------- + .match_summary_fun <- function(fun) { + if (is.function(fun)) return(fun) + if (!is.character(fun) || length(fun) != 1) + stop("'summary.stat' must be a single character keyword or a function") + kw <- tolower(fun) + fn <- switch(kw, + mean = base::mean, + median = stats::median, + sum = base::sum, + sd = stats::sd, + max = base::max, + min = base::min, + geometric = function(x) exp(mean(log(x + 1e-6))), + stop("Unsupported summary keyword: ", fun)) + fn } + summary_fun <- .match_summary_fun(summary.stat) - enriched <- .prepData(input.data, assay, gene.set.use, group.by, NULL, facet.by) + # ---------- 2. pull / tidy data ----------------------------------------- + if (is.null(group.by)) group.by <- "ident" + df <- .prepData(input.data, assay, gene.set.use, + group.by = group.by, + split.by = NULL, + facet.by = facet.by) - if(length(gene.set.use) == 1 && gene.set.use == "all") { - gene.set <- colnames(enriched)[colnames(enriched) %!in% c(group.by, facet.by)] - } else { + # Which columns contain gene-set scores? + if (identical(gene.set.use, "all")) + gene.set <- setdiff(colnames(df), c(group.by, facet.by)) + else gene.set <- gene.set.use - } - - if(summary.stat %!in% c("median", "mean")) { - message("Please select 'median' or 'mean' for the summary.stat argument. Using mean as a default") - summary_func <- mean - } - # Select the appropriate summary function - summary_func <- if (summary.stat == "median") median else mean + if (!length(gene.set)) + stop("No gene-set columns found to plot.") - if(!is.null(facet.by)) { - enriched.summary <- enriched %>% - group_by(.data[[group.by]], .data[[facet.by]]) %>% - summarise(across(which(colnames(enriched) %in% gene.set), mean)) %>% - as.data.frame() - } else { - enriched.summary <- enriched %>% - group_by(.data[[group.by]]) %>% - summarise(across(which(colnames(enriched) %in% gene.set), mean)) %>% - as.data.frame() - } + # ---------- 3. summarise with **base aggregate()** ---------------------- + grp_cols <- c(group.by, facet.by) # one or two columns + agg <- aggregate(df[gene.set], + by = df[grp_cols], + FUN = summary_fun, + SIMPLIFY = FALSE) + # aggregate() keeps grouping columns first; ensure correct names + names(agg)[seq_along(grp_cols)] <- grp_cols - if(scale) { - enriched.summary[,gene.set] <- apply(enriched.summary[,gene.set], 2, scale) - } + # Optional Z-transform AFTER summary + if (scale) + agg[gene.set] <- lapply(agg[gene.set], scale) - reformated.enriched <- suppressMessages(melt(enriched.summary)) + # ---------- 4. long format for ggplot (base-R) -------------------------- + long <- data.frame( + variable = rep(gene.set, each = nrow(agg)), + value = as.vector(t(agg[gene.set])), + group = rep(agg[[group.by]], times = length(gene.set)), + stringsAsFactors = FALSE + ) + if (!is.null(facet.by)) + long[[facet.by]] <- rep(agg[[facet.by]], times = length(gene.set)) - if(cluster.rows) { - row.order <- gene.set[hclust(dist(t(enriched.summary[,gene.set]), method = "euclidean"), method = "ward.D2")$order] - reformated.enriched[,"variable"] <- factor(reformated.enriched[,"variable"], levels = row.order) + # ---------- 5. optional clustering -------------------------------------- + if (cluster.rows) { + ord <- hclust(dist(t(agg[gene.set])), method = "ward.D2")$order + long$variable <- factor(long$variable, levels = gene.set[ord]) } - - if(cluster.columns) { - column.order <- unique(enriched.summary[,group.by][hclust(dist(enriched.summary[,gene.set], method = "euclidean"), method = "ward.D2")$order]) - reformated.enriched[,group.by] <- factor(reformated.enriched[,group.by], levels = as.vector(column.order)) + if (cluster.columns) { + ord <- hclust(dist(agg[gene.set]), method = "ward.D2")$order + long$group <- factor(long$group, levels = agg[[group.by]][ord]) } - - plot <- ggplot(reformated.enriched, - mapping = aes(x = reformated.enriched[,group.by], - y = variable, - fill = value)) + - geom_tile(color = "black", linewidth = 0.5) + - scale_y_discrete(expand = c(0, 0)) + - scale_x_discrete(expand = c(0, 0)) + - labs(fill = "Enrichment Score") + - guides(fill = guide_colorbar(title.position = "top", - title.hjust = 0.5)) + - coord_equal() + - scale_fill_gradientn(colors = .colorizer(palette, 11)) + - theme_classic() + - theme(axis.title = element_blank(), - axis.ticks = element_blank(), - legend.direction = "horizontal", - legend.position = "bottom") - - if (!is.null(facet.by)) { - plot <- plot + - facet_grid(as.formula(paste('. ~', facet.by))) - } - return(plot) + # ---------- 6. draw ------------------------------------------------------ + p <- ggplot2::ggplot(long, + ggplot2::aes(x = group, y = variable, fill = value)) + + ggplot2::geom_tile(colour = "black", linewidth = 0.4) + + ggplot2::scale_fill_gradientn(colours = .colorizer(palette, 11), + name = "Enrichment") + + ggplot2::scale_x_discrete(expand = c(0, 0)) + + ggplot2::scale_y_discrete(expand = c(0, 0)) + + ggplot2::coord_equal() + + ggplot2::theme_classic() + + ggplot2::theme(axis.title = ggplot2::element_blank(), + axis.ticks = ggplot2::element_blank(), + legend.position = "bottom", + legend.direction= "horizontal") + + if (!is.null(facet.by)) + p <- p + ggplot2::facet_grid(stats::as.formula(paste(". ~", facet.by))) + + p } diff --git a/R/pcaEnrichment.R b/R/pcaEnrichment.R index 8ecd102..3064e33 100644 --- a/R/pcaEnrichment.R +++ b/R/pcaEnrichment.R @@ -1,26 +1,23 @@ -#' Visualize the PCA of enrichment values +#' Visualize the PCA of Enrichment Values #' #' This function allows to the user to examine the distribution #' of principal components run on the enrichment values. #' -#' @param input.data PCA from \code{\link{performPCA}}. -#' @param dimRed Name of the dimensional reduction to plot if data is a single-cell object. -#' @param x.axis Component to plot on the x.axis. -#' @param y.axis Component set to plot on the y.axis. -#' @param facet.by Variable to facet the plot into n distinct graphs. -#' @param style Return a \strong{"hex"} bin plot or a \strong{"point"}-based plot. -#' @param add.percent.contribution Add the relative percent of contribution of the -#' selected components to the axis labels. -#' @param display.factors Add an arrow overlay to show the direction and magnitude of individual -#' gene sets on the PCA dimensions. -#' @param number.of.factors The number of gene.sets to display on the overlay. -#' @param palette Colors to use in visualization - input any -#' \link[grDevices]{hcl.pals}. -#' -#' @import ggplot2 -#' @importFrom dplyr slice_max %>% +#' @param input.data Single‑cell object (Seurat / SCE) **or** the raw list +#' returned by [`performPCA()`]. +#' @param dimRed Name of the dimensional‑reduction slot to pull from a +#' single‑cell object. Ignored when `input.data` is the list output. +#' @param x.axis,y.axis Character vectors naming the PCs to display (e.g. "PC1"). +#' @param facet.by Metadata column to facet plot. +#' @param style "point" (default) or "hex". +#' @param add.percent.contribution Include percent variance explained in axis +#' labels. +#' @param display.factors Draw arrows for the top gene‑set loadings. +#' @param number.of.factors Integer; how many loadings to display if +#' `display.factors = TRUE`. +#' @param palette Character. Any palette from \code{\link[grDevices]{hcl.pals}}. #' -#' @examples +#' #' @examples #' GS <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), #' Tcells = c("CD3E", "CD3D", "CD3G", "CD7","CD8A")) #' pbmc_small <- SeuratObject::pbmc_small @@ -36,128 +33,117 @@ #' y.axis = "PC2", #' dimRed = "escape.PCA") #' +#' @return A **ggplot2** object. +#' @importFrom utils head #' @export -#' -#' @return ggplot2 object with PCA distribution -pcaEnrichment <- function(input.data, +pcaEnrichment <- function(input.data, dimRed = NULL, - x.axis = "PC1", + x.axis = "PC1", y.axis = "PC2", facet.by = NULL, - style = "point", + style = c("point", "hex"), add.percent.contribution = TRUE, display.factors = FALSE, number.of.factors = 10, palette = "inferno") { + style <- match.arg(style) - if (is_seurat_or_se_object(input.data)) { - pca.values <- .grabDimRed(input.data, dimRed) - } else if (inherits(input.data, "list") & length(input.data) == 4) { + # ------------------------------------------------------------------------ + # 1. Extract PCA slots ---------------------------------------------------- + # ------------------------------------------------------------------------ + if (.is_seurat_or_sce(input.data)) { + pca.values <- .grabDimRed(input.data, dimRed) + } else if (is.list(input.data) && length(input.data) == 4) { pca.values <- input.data - if(!is.null(facet.by)) { - stop("group.by parameter requires input.data to be a single-cell object.") - } + if (!is.null(facet.by)) + stop("facet.by is only valid with a single-cell object.") } else { - stop("input.data does not seem to be a single-cell object or a product of performPCA().") + stop("'input.data' must be a Seurat / SCE object or the list from performPCA().") } - x.axis.dim <- as.numeric(substring(x.axis, 3, nchar(x.axis))) - y.axis.dim <- as.numeric(substring(y.axis, 3, nchar(y.axis))) + # Helper to convert PCX to X + pc_idx <- function(pc) as.integer(sub("PC", "", pc, ignore.case = TRUE)) + x.idx <- pc_idx(x.axis) + y.idx <- pc_idx(y.axis) - if(add.percent.contribution & length(pca.values) == 4) { - x.axis.title <- paste0(x.axis, "\n (", pca.values[[3]][x.axis.dim],"%)") - y.axis.title <- paste0(y.axis, "\n (", pca.values[[3]][y.axis.dim],"%)") + # Axis labels with % variance ------------------------------------------------ + if (add.percent.contribution && "contribution" %in% names(pca.values)) { + pc.var <- pca.values$contribution + x.title <- sprintf("%s (%.1f%%)", x.axis, pc.var[x.idx]) + y.title <- sprintf("%s (%.1f%%)", y.axis, pc.var[y.idx]) } else { - x.axis.title <- x.axis - y.axis.title <- y.axis + x.title <- x.axis + y.title <- y.axis } + # ------------------------------------------------------------------------ + # 2. Build plotting data.frame ------------------------------------------- + # ------------------------------------------------------------------------ plot.df <- as.data.frame(pca.values[[1]]) - if(!is.null(facet.by)) { + if (!is.null(facet.by)) { meta <- .grabMeta(input.data) - if(facet.by %!in% colnames(meta)) { - stop("Please select a variable in your meta data to use for facet.by.") - } - col.pos <- ncol(plot.df) - plot.df <- cbind.data.frame(plot.df, meta[,facet.by]) - colnames(plot.df)[col.pos+1] <- facet.by + if (!facet.by %in% colnames(meta)) + stop("'", facet.by, "' not found in the single-cell object metadata.") + plot.df[[facet.by]] <- meta[[facet.by]] } - plot <- ggplot(data = plot.df, - mapping = aes(x = plot.df[,x.axis.dim], - y = plot.df[,y.axis.dim])) + # ------------------------------------------------------------------------ + # 3. Base ggplot ---------------------------------------------------------- + # ------------------------------------------------------------------------ + aes.map <- ggplot2::aes(x = plot.df[,x.idx], y = plot.df[,y.idx]) + g <- ggplot2::ggplot(plot.df, aes.map) - if(style == "point") { - plot <- plot + - geom_pointdensity() + - scale_color_gradientn(colors = .colorizer(palette, 11)) + - labs(color = "Relative Density") - } else if (style == "hex") { - plot <- plot + - stat_binhex() + - scale_fill_gradientn(colors = .colorizer(palette, 11)) - labs(fill = "Relative Density") + if (style == "point") { + if (!requireNamespace("ggpointdensity", quietly = TRUE)) { + warning("Package `ggpointdensity` not installed, falling back to alpha-blended points") + g <- g + ggplot2::geom_point(alpha = 0.4, size = 0.6) + } else { + g <- g + ggpointdensity::geom_pointdensity() + + ggplot2::scale_color_gradientn(colors = grDevices::hcl.colors(11, palette)) + + ggplot2::labs(color = "Density") + } + } else { # hex‑bin + if (!requireNamespace("hexbin", quietly = TRUE)) + stop("'hexbin' package required for style = 'hex'.") + g <- g + ggplot2::stat_binhex() + + ggplot2::scale_fill_gradientn(colors = grDevices::hcl.colors(11, palette)) + + ggplot2::labs(fill = "Count") } - plot <- plot + - ylab(y.axis.title) + - xlab(x.axis.title) + - theme_classic() + g <- g + ggplot2::labs(x = x.title, y = y.title) + ggplot2::theme_classic() - if (!is.null(facet.by)) { - plot <- plot + - facet_grid(as.formula(paste('. ~', facet.by))) - } + if (!is.null(facet.by)) + g <- g + ggplot2::facet_grid(stats::as.formula(paste(".~", facet.by))) - if(display.factors) { - x.range <- range(plot.df[,x.axis.dim]) - - y.range <- range(plot.df[,y.axis.dim]) - - tbl <- data.frame(names = row.names(pca.values[[4]]), - factors.y = pca.values[[4]][,y.axis.dim]^2/sum(pca.values[[4]][,y.axis.dim]^2), - factors.x = pca.values[[4]][,x.axis.dim]^2/sum(pca.values[[4]][,x.axis.dim]^2)) %>% - slice_max(n = number.of.factors, - order_by = (factors.x + factors.y)/2) - names <- tbl$names + # ------------------------------------------------------------------------ + # 4. Biplot arrows -------------------------------------------------------- + # ------------------------------------------------------------------------ + if (display.factors) { + loadings <- as.data.frame(pca.values$rotation) + sel.score <- (loadings[[x.idx]]^2 + loadings[[y.idx]]^2) / 2 + sel <- head(order(sel.score, decreasing = TRUE), number.of.factors) + loadings <- loadings[sel, ] + loadings$names <- rownames(loadings) - df <- as.data.frame(pca.values[[4]]) - df <- df[rownames(df) %in% names,] - df$names <- rownames(df) - if(!is.null(facet.by)) { - facets <- sort(unique(plot.df[,facet.by])) - df[,facet.by] <- facets[1] - } + # Rescale onto existing plot range (80 % of extents) + rng.x <- range(plot.df[[x.idx]]) * 0.8 + rng.y <- range(plot.df[[y.idx]]) * 0.8 + rescale <- function(v, to) (v - min(v)) / diff(range(v)) * diff(to) + min(to) + loadings$xend <- rescale(loadings[[x.idx]], rng.x) + loadings$yend <- rescale(loadings[[y.idx]], rng.y) - plot <- plot + - geom_hline(yintercept = 0, lty=2) + - geom_vline(xintercept = 0, lty=2) + - geom_segment(data = df, - aes(x = 0, - y = 0, - xend = .scale.variable(df[,x.axis.dim], x.range), - yend = .scale.variable(df[,y.axis.dim], y.range)), - arrow = arrow(length = unit(0.25, "cm"))) + - geom_label(data = df, aes(label = names, - x = .scale.variable(df[,x.axis.dim], x.range), - y = .scale.variable(df[,y.axis.dim], y.range)), - size=2, - hjust = 0.5, - nudge_y = -0.01, - label.padding = unit(0.1, "lines")) + g <- g + + ggplot2::geom_hline(yintercept = 0, linetype = 2) + + ggplot2::geom_vline(xintercept = 0, linetype = 2) + + ggplot2::geom_segment(data = loadings, + ggplot2::aes(x = 0, y = 0, xend = xend, yend = yend), + arrow = ggplot2::arrow(length = grid::unit(0.25, "cm"))) + + ggplot2::geom_text(data = loadings, + ggplot2::aes(x = xend, y = yend, label = names), + size = 2, vjust = 1.1) } - - return(plot) -} - -# Function to scale the new variable -.scale.variable <- function(new_var, existing_range) { - new_range <- range(new_var) - existing_range <- existing_range* 0.8 - normalized <- (new_var - min(new_range)) / (max(new_range) - min(new_range)) - scaled <- normalized * (max(existing_range) - min(existing_range)) + min(existing_range) - return(scaled) + g } diff --git a/R/performNormalization.R b/R/performNormalization.R index dad9797..c47bd55 100644 --- a/R/performNormalization.R +++ b/R/performNormalization.R @@ -1,127 +1,127 @@ #' Perform Normalization on Enrichment Data #' -#' This function allows users to normalize the enrichment calculations -#' by accounting for single-cell dropout and producing positive -#' values for downstream differential enrichment analyses. Default calculation -#' uses will scale the enrichment values by the number of genes present from -#' the gene set and then use a natural log transformation. A positive range -#' values is useful for several downstream analyses, like differential -#' evaluation for log2-fold change, but will alter the original -#' enrichment values. -#' -#' @param sc.data Single-cell object or matrix used in the gene set enrichment calculation in -#' \code{\link{escape.matrix}} or \code{\link{runEscape}}. -#' @param enrichment.data The enrichment results from \code{\link{escape.matrix}} -#' or \code{\link{runEscape}} (optional) -#' @param assay Name of the assay to normalize if using a single-cell object -#' @param gene.sets The gene set library to use to extract -#' the individual gene set information from -#' @param scale.factor A vector to use for normalizing enrichment scores per cell. -#' @param make.positive Shift enrichment values to a positive range \strong{TRUE} -#' for downstream analysis or not \strong{TRUE} (default). -#' @param groups the number of cells to calculate normalization on at once. -#' chunks matrix into groups sized chunks. Useful in case of memory issues. -#' @importFrom stringr str_replace_all -#' @importFrom SeuratObject Assays -#' @importFrom SummarizedExperiment assays -#' @importFrom Matrix colSums - -#' @examples -#' GS <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), +#' @description +#' Scales each enrichment value by the **number of genes from the set that are +#' expressed** in that cell (non‑zero counts). Optionally shifts results into a +#' positive range and/or applies a natural‑log transform for compatibility with +#' log‑based differential tests. +#' +#' @param input.data raw‐counts matrix (`genes × cells`), a +#' \link[SeuratObject]{Seurat} object, or a +#' \link[SingleCellExperiment]{SingleCellExperiment}. Gene identifiers must +#' match those in `gene.sets`. +#' @param enrichment.data Output of \code{\link{escape.matrix}} or a single‑cell +#' object previously processed by \code{\link{runEscape}}. +#' @param assay Name of the assay holding enrichment scores when +#' `input.data` is a single‑cell object. Ignored otherwise. +#' @param gene.sets A named list of character vectors, the result of +#' [getGeneSets()], or the built-in data object [escape.gene.sets]. +#' List names become column names in the result. +#' @param make.positive Logical; if `TRUE` shifts each column so its minimum is +#' zero. +#' @param scale.factor Optional numeric vector overriding gene‑count scaling +#' (length = #cells). Use when you want external per‑cell normalization factors. +#' @param groups Integer ≥ 1. Number of cells per processing chunk. +#' Larger values reduce overhead but increase memory usage. Default **1000**. +#' +#' @examples +#' gs <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), #' Tcells = c("CD3E", "CD3D", "CD3G", "CD7","CD8A")) -#' pbmc_small <- SeuratObject::pbmc_small -#' pbmc_small <- runEscape(pbmc_small, -#' gene.sets = GS, -#' min.size = NULL) +#' +#' pbmc <- SeuratObject::pbmc_small |> +#' runEscape(gene.sets = gs, +#' min.size = NULL) #' -#' pbmc_small <- performNormalization(pbmc_small, -#' assay = "escape", -#' gene.sets = GS) +#' pbmc <- performNormalization(pbmc, +#' assay = "escape", +#' gene.sets = gs) #' +#' @return If `input.data` is an object, the same object with a new assay +#' "_normalized". Otherwise a matrix of normalized scores. #' @export -#' @return Single-cell object or matrix of normalized enrichment scores -performNormalization <- function(sc.data, +performNormalization <- function(input.data, enrichment.data = NULL, - assay = "escape", - gene.sets = NULL, - make.positive = FALSE, - scale.factor = NULL, - groups = NULL) { - if(!is.null(assay)) { - if(is_seurat_object(sc.data)) { - assay.present <- assay %in% Assays(sc.data) - } else if (is_se_object(sc.data)) { - assay.present <- assay %in% assays(sc.data) + assay = "escape", + gene.sets = NULL, + make.positive = FALSE, + scale.factor = NULL, + groups = NULL) { + + ## 1. Retrieve enrichment matrix --------------------------------------- + assay.present <- FALSE + if (!is.null(assay) && .is_seurat_or_sce(input.data)) { + if (.is_seurat(input.data)) { + if (requireNamespace("SeuratObject", quietly = TRUE)) { + assay.present <- assay %in% SeuratObject::Assays(input.data) + } else { + warning("SeuratObject package is required but not installed.") + } + } else if (.is_sce(input.data)) { + if (requireNamespace("SingleCellExperiment", quietly = TRUE)) { + assay.present <- assay %in% names(SingleCellExperiment::altExps(input.data)) + } else { + warning("SingleCellExperiment package is required but not installed.") + } } - } else { - assay.present <- FALSE } + - if(is_seurat_or_se_object(sc.data) & !is.null(assay) & assay.present) { - enriched <- .pull.Enrich(sc.data, assay) - } else { - enriched <- enrichment.data - } - - if(!is.null(scale.factor) & length(scale.factor) != dim(sc.data)[2]) { - stop("If using a vector as a scale factor, please ensure the length matches the number of cells.") + enriched <- if (assay.present) .pull.Enrich(input.data, assay) else enrichment.data + if (is.null(enriched)) { + stop("Could not obtain enrichment matrix, please set `assay` or supply `enrichment.data`.") } - #Getting the gene sets that passed filters - egc <- .GS.check(gene.sets) - names(egc) <- str_replace_all(names(egc), "_", "-") - egc <- egc[names(egc) %in% colnames(enriched)] - - #Isolating the number of genes per cell expressed - if(is.null(groups)){ - chunks <- dim(enriched)[[1]] - } else{ - chunks <- min(groups, dim(enriched)[[1]]) - } + ## 2. Validate / derive scale factors ---------------------------------- + if (!is.null(scale.factor) && length(scale.factor) != nrow(enriched)) + stop("Length of 'scale.factor' must match number of cells.") if (is.null(scale.factor)) { - cnts <- .cntEval(sc.data, assay = "RNA", type = "counts") - print("Calculating features per cell...") - egc.sizes <- lapply(egc, function(x){ - scales<-unname(Matrix::colSums(cnts[which(rownames(cnts) %in% x),]!=0)) - scales[scales==0] <- 1 - scales - }) - egc.sizes <- split_rows(do.call(cbind,egc.sizes), chunk.size=chunks) + egc <- .GS.check(gene.sets) + names(egc) <- gsub("_", "-", names(egc), fixed = TRUE) + egc <- egc[names(egc) %in% colnames(enriched)] + if (!length(egc)) stop("None of the supplied gene sets match enrichment columns.") + + ## counts matrix (genes × cells) – drop after use to save RAM + cnts <- .cntEval(input.data, assay = "RNA", type = "counts") + message("Computing expressed-gene counts per cell...") + scale.mat <- do.call(cbind, lapply(egc, function(gs) { + vec <- Matrix::colSums(cnts[rownames(cnts) %in% gs, , drop = FALSE] != 0) + vec[vec == 0] <- 1L # avoid /0 + vec + })) rm(cnts) - } else{ - egc.sizes <- split_vector(scale.factor, chunk.size=chunks) + ## optionally split large matrices to spare memory + chunksize <- if (is.null(groups)) nrow(enriched) else min(groups, nrow(enriched)) + sf.split <- .split_rows(scale.mat, chunk.size = chunksize) + } else { + sf.split <- .split_vector(scale.factor, chunk.size = if (is.null(groups)) length(scale.factor) else min(groups, length(scale.factor))) } - enriched <- split_rows(enriched, chunk.size=chunks) - print("Normalizing enrichment scores per cell...") - #Dividing the enrichment score by number of genes expressed - - enriched<-mapply(function(scores, scales){ - scores/scales - }, enriched, egc.sizes, SIMPLIFY = FALSE) - enriched <- do.call(rbind, enriched) - if(make.positive){ - enriched <- apply(enriched, 2, function(x){ - x+max(0, -min(x)) - }) + ## 3. Chunked normalization -------------------------------------------- + message("Normalizing enrichment scores...") + en.split <- .split_rows(enriched, chunk.size = if (is.null(groups)) nrow(enriched) else min(groups, nrow(enriched))) + norm.lst <- Map(function(sco, fac) sco / fac, en.split, sf.split) + normalized <- do.call(rbind, norm.lst) + + ## 4. Optional positive shift ------------------------------------------ + if (make.positive) { + shift <- pmax(0, -apply(normalized, 2L, min)) + normalized <- sweep(normalized, 2L, shift, `+`) } - #Default Scaling using natural log - if(is.null(scale.factor)) { - enriched <- suppressWarnings(ifelse(enriched >= 0, - log1p(enriched + 1e-6), - -log1p(abs(enriched) + 1e-6))) + + ## 5. Log transform (only when scale.factor derived internally) --------- + if (is.null(scale.factor)) { + neg <- normalized < 0 + normalized[!neg] <- log1p(normalized[!neg] + 1e-6) + normalized[neg] <- -log1p(abs(normalized[neg]) + 1e-6) } - if(is_seurat_or_se_object(sc.data)) { - if(is.null(assay)) { - assay <- "escape" - } - sc.data <- .adding.Enrich(sc.data, enriched, paste0(assay, "_normalized")) - return(sc.data) + ## 6. Return ------------------------------------------------------------ + if (.is_seurat_or_sce(input.data)) { + .adding.Enrich(input.data, normalized, paste0(assay %||% "escape", "_normalized")) } else { - return(enriched) + normalized } -} +} \ No newline at end of file diff --git a/R/performPCA.R b/R/performPCA.R index 564e8d0..4af1270 100644 --- a/R/performPCA.R +++ b/R/performPCA.R @@ -8,84 +8,101 @@ #' workflow in lieu of using \code{\link{performPCA}}, but will not be #' compatible with downstream \code{\link{pcaEnrichment}} visualization. #' -#' @param input.data Enrichment output from \code{\link{escape.matrix}} or -#' \code{\link{runEscape}}. -#' @param assay Name of the assay to plot if data is a single-cell object. -#' @param scale Standardize the enrichment value (\strong{TRUE}) or -#' not (\strong{FALSE}) -#' @param n.dim The number of components to calculate. -#' @param reduction.name Name of the reduced dimensions object to add if -#' data is a single-cell object. -#' @param reduction.key Name of the key to use with the components. -#' -#' @importFrom stats prcomp -#' @importFrom SeuratObject CreateDimReducObject -#' @importFrom SingleCellExperiment reducedDim reducedDim<- -#' +#' @param input.data Output of \code{\link{escape.matrix}} or a single‑cell +#' object previously processed by \code{\link{runEscape}}. +#' @param assay Name of the assay holding enrichment scores when +#' `input.data` is a single‑cell object. Ignored otherwise. +#' @param scale Logical; if `TRUE` standardises each gene-set column +#' before PCA. +#' @param n.dim Integer ≥1 or vector; the **largest** value sets the +#' number of principal components to compute / keep. +#' @param reduction.name,reduction.key Names used when writing back to a +#' Seurat / SCE object. +#' #' @examples -#' GS <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), +#' gs <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), #' Tcells = c("CD3E", "CD3D", "CD3G", "CD7","CD8A")) -#' pbmc_small <- SeuratObject::pbmc_small -#' pbmc_small <- runEscape(pbmc_small, -#' gene.sets = GS, -#' min.size = NULL) +#' +#' pbmc <- SeuratObject::pbmc_small |> +#' runEscape(gene.sets = gs, +#' min.size = NULL) #' -#' pbmc_small <- performPCA(pbmc_small, -#' assay = "escape") +#' pbmc <- performPCA(pbmc, +#' assay = "escape") #' +#' @return *If* `input.data` is a single-cell object, the same object with a +#' new dimensional-reduction slot. *Otherwise* a list with +#' `PCA`, `eigen_values`, `contribution`, and `rotation`. #' @export -#' -#' @return single-cell object or list with PCA components to plot. performPCA <- function(input.data, - assay = NULL, - scale = TRUE, - n.dim = 1:10, - reduction.name = "escape.PCA", - reduction.key = "PCA") { + assay = "escape", + scale = TRUE, + n.dim = 10, + reduction.name = "escape.PCA", + reduction.key = "escPC_") { - if(is_seurat_or_se_object(input.data)) { - enriched <- .pull.Enrich(input.data, assay) + ## ------------ 1 Get enrichment matrix ------------------------------------ + if (.is_seurat_or_sce(input.data)) { + mat <- .pull.Enrich(input.data, assay) + } else if (is.matrix(input.data) || is.data.frame(input.data)) { + mat <- as.matrix(input.data) } else { - enriched <- input.data + stop("`input.data` must be a matrix/data.frame or a Seurat/SCE object.") } + if (!is.numeric(mat)) stop("Enrichment matrix must be numeric.") - PCA <- prcomp(enriched, - scale. = scale, - rank. = max(n.dim)) - rotation <- PCA$rotation - eigen.values <- PCA$sdev^2 - percent.contribution <- round((eigen.values/sum(eigen.values))*100,1) - PCA <- PCA$x - colnames(PCA) <- paste0(reduction.key, "_", seq_len(ncol(PCA))) + ## ------------ 2 Choose PCA backend --------------------------------------- + ndim <- max(as.integer(n.dim)) + use_irlba <- requireNamespace("irlba", quietly = TRUE) && + min(dim(mat)) > 50 # heuristic - additional.data <- list(eigen_values = eigen.values, - contribution = percent.contribution, - rotation = rotation) - if(is_seurat_or_se_object(input.data)) { - if (inherits(input.data, "Seurat")) { - DR <- suppressWarnings(CreateDimReducObject( - embeddings = PCA, - stdev = rep(0, ncol(PCA)), - key = reduction.key, - jackstraw = NULL, - misc = additional.data)) - input.data[[reduction.name]] <- DR - } else if (inherits(input.data, "SingleCellExperiment")) { - reducedDim(input.data, reduction.name) <- PCA - if(length(input.data@metadata) == 0) { - input.data@metadata <- additional.data - } else { - input.data@metadata <- c(input.data@metadata, additional.data) - } + pca_obj <- if (use_irlba) { + irlba::prcomp_irlba(mat, n = ndim, center = TRUE, scale. = scale) + } else { + stats::prcomp(mat, rank. = ndim, center = TRUE, scale. = scale) + } + + ## ------------ 3 Post-process --------------------------------------------- + eig <- pca_obj$sdev ^ 2 + pct <- round(eig / sum(eig) * 100, 1) + colnames(pca_obj$x) <- paste0(reduction.key, seq_len(ncol(pca_obj$x))) + + misc <- list(eigen_values = eig, + contribution = pct, + rotation = pca_obj$rotation) + + ## ------------ 4 Return / write-back -------------------------------------- + if (.is_seurat_or_sce(input.data)) { + if (.is_seurat(input.data)) { + if (!requireNamespace("SeuratObject", quietly = TRUE)) { + stop("Package 'SeuratObject' is required to write PCA results into a Seurat object.") + } + + input.data[[reduction.name]] <- SeuratObject::CreateDimReducObject( + embeddings = pca_obj$x, + loadings = pca_obj$rotation, + stdev = pca_obj$sdev, + key = reduction.key, + misc = misc, + assay = assay + ) + + } else if (.is_sce(input.data)) { + if (!requireNamespace("SingleCellExperiment", quietly = TRUE)) { + stop("Package 'SingleCellExperiment' is required to write PCA results into a SingleCellExperiment object.") + } + + SingleCellExperiment::reducedDim(input.data, reduction.name) <- pca_obj$x + input.data@metadata <- c(input.data@metadata, misc) + } return(input.data) + } else { - PCA.results <- list(PCA = PCA, - eigen_values = eigen.values, - contribution = percent.contribution, - rotation = rotation) - return(PCA.results) + list(PCA = pca_obj$x, + eigen_values = eig, + contribution = pct, + rotation = pca_obj$rotation) } - } diff --git a/R/ridgeEnrichment.R b/R/ridgeEnrichment.R index eafe047..9f38826 100644 --- a/R/ridgeEnrichment.R +++ b/R/ridgeEnrichment.R @@ -1,155 +1,133 @@ -#' Visualize enrichment results with a ridge plot +#' Visualize Enrichment Distributions Using Ridge Plots #' #' This function allows to the user to examine the distribution of #' enrichment across groups by generating a ridge plot. #' -#' @param input.data Enrichment output from \code{\link{escape.matrix}} or -#' \code{\link{runEscape}}. -#' @param assay Name of the assay to plot if data is a single-cell object. -#' @param group.by Categorical parameter to plot along the x.axis. If input is -#' a single-cell object the default will be cluster. -#' @param gene.set Gene set to plot (on y-axis). -#' @param color.by How the color palette applies to the graph - can -#' be \strong{"group"} for a categorical color palette based on the -#' \strong{group.by} parameter or use the \strong{gene.set} name if wanting to -#' apply a gradient palette. -#' @param order.by Method to organize the x-axis: \strong{"mean"} will arrange -#' the x-axis by the mean of the gene.set, while \strong{"group"} will arrange -#' the x-axis by in alphanumerical order. Using \strong{NULL} will not reorder -#' the x-axis. -#' @param facet.by Variable to facet the plot into n distinct graphs. -#' @param scale Visualize raw values \strong{FALSE} or Z-transform -#' enrichment values \strong{TRUE}. -#' @param add.rug Add visualization of the discrete cells along -#' the ridge plot (\strong{TRUE}). -#' @param palette Colors to use in visualization - input any -#' \link[grDevices]{hcl.pals}. +#' @param input.data Output of \code{\link{escape.matrix}} or a single‑cell +#' object previously processed by \code{\link{runEscape}}. +#' @param gene.set.use Character(1). Name of the gene set to display. +#' @param assay Name of the assay holding enrichment scores when +#' `input.data` is a single‑cell object. Ignored otherwise. +#' @param group.by Metadata column plotted on the *y*‑axis. Defaults to the +#' Seurat/SCE `ident` slot when `NULL`. +#'@param color.by Aesthetic mapped to point color. Use either +#' *"group"* (default = `group.by`) for categorical coloring or the +#' *name of a gene‑set* (e.g. same as `gene.set`) to obtain a numeric +# gradient. Any other metadata or column present in the data is also +#' accepted. +#' @param order.by How to arrange the x‑axis: +#' *`"mean"`* – groups ordered by decreasing group mean; +#' *`"group"`* – natural sort of group labels; +#' *`NULL`* – keep original ordering. +#' @param facet.by Optional metadata column used to facet the plot. +#' @param scale Logical; if `TRUE` scores are centred/scaled (Z‑score) prior +#' to plotting. +#' @param add.rug Logical. Draw per-cell tick marks underneath each ridge. +#' @param palette Character. Any palette from \code{\link[grDevices]{hcl.pals}}. #' -#' @import ggplot2 -#' @importFrom ggridges geom_density_ridges geom_density_ridges2 position_points_jitter geom_density_ridges_gradient -#' -#' @examples -#' GS <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), +#' @examples +#' gs <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), #' Tcells = c("CD3E", "CD3D", "CD3G", "CD7","CD8A")) -#' pbmc_small <- SeuratObject::pbmc_small -#' pbmc_small <- runEscape(pbmc_small, -#' gene.sets = GS, -#' min.size = NULL) -#' -#' ridgeEnrichment(pbmc_small, -#' assay = "escape", -#' gene.set = "Tcells") -#' -#' ridgeEnrichment(pbmc_small, -#' assay = "escape", -#' gene.set = "Tcells", -#' color.by = "Tcells") +#' +#' pbmc <- SeuratObject::pbmc_small |> +#' runEscape(gene.sets = gs, min.size = NULL) #' -#' @export +#' ridgeEnrichment(pbmc, assay = "escape", +#' gene.set.use = "Tcells", +#' group.by = "groups") #' -#' @return ggplot2 object with ridge-based distributions of selected gene.set -ridgeEnrichment <- function(input.data, - assay = NULL, - group.by = NULL, - gene.set = NULL, - color.by = "group", - order.by = NULL, - scale = FALSE, - facet.by = NULL, - add.rug = FALSE, - palette = "inferno") { - - if(is.null(group.by)) { - group.by <- "ident" - } - - if(color.by == "group") { - color.by <- group.by - } - - enriched <- .prepData(input.data, assay, gene.set, group.by, NULL, facet.by) +#' @importFrom stats median +#' @return A [ggplot2] object. +#' @export +#' +ridgeEnrichment <- function(input.data, + gene.set.use, + assay = NULL, + group.by = NULL, + color.by = "group", + order.by = NULL, + scale = FALSE, + facet.by = NULL, + add.rug = FALSE, + palette = "inferno") +{ + ## ---- 0 sanity ------------------------------------------------------- + if (!requireNamespace("ggridges", quietly = TRUE)) + stop("Package 'ggridges' is required for ridge plots; please install it.") + if (length(gene.set.use) != 1L) + stop("'gene.set.use' must be length 1.") + if (is.null(group.by)) group.by <- "ident" + if (identical(color.by, "group")) color.by <- group.by - if(inherits(enriched[,color.by], "numeric") && gene.set == color.by) { - gradient.format <- TRUE - } else { - gradient.format <- FALSE - } + ## ---- 1 build long data.frame --------------------------------------- + df <- .prepData(input.data, assay, gene.set.use, group.by, + split.by = NULL, facet.by = facet.by) - if(scale) { - enriched[,gene.set] <- as.numeric(scale(enriched[,gene.set])) - } + ## optional scaling (Z-transform per gene-set) ------------------------- + if (scale) + df[[gene.set.use]] <- as.numeric(scale(df[[gene.set.use]], center = TRUE)) - if(!is.null(order.by) && !is.null(group.by)) { - enriched <- .orderFunction(enriched, order.by, group.by) - } + ## optional re-ordering of the y-axis factor --------------------------- + if (!is.null(order.by)) + df <- .orderFunction(df, order.by, group.by) - + ## detect “gradient” mode (numeric color mapped to x) ----------------- + gradient.mode <- + is.numeric(df[[color.by]]) && identical(color.by, gene.set.use) - if(gradient.format) { - plot <- ggplot(enriched, aes(x = enriched[,gene.set], - y = enriched[,group.by], - fill = after_stat(x))) + if(gradient.mode) { + fill <- ggplot2::after_stat(df[,color.by]) } else { - plot <- ggplot(enriched, aes(x = enriched[,gene.set], - y = enriched[,group.by], - fill = enriched[,group.by])) + fill <- df[,color.by] } - if (add.rug) { - if(gradient.format) { - plot <- plot + geom_density_ridges_gradient(jittered_points = TRUE, - position = position_points_jitter(width = 0.05, height = 0), - point_shape = '|', - point_size = 3, - point_alpha = 1, - alpha = 0.7, - quantile_lines = TRUE, - quantile_fun = median, - vline_width = 1) - } else { - plot <- plot + geom_density_ridges(jittered_points = TRUE, - position = position_points_jitter(width = 0.05, height = 0), - point_shape = '|', - point_size = 3, - point_alpha = 1, - alpha = 0.7, - quantile_lines = TRUE, - quantile_fun = median, - vline_width = 1) - } - + ## ---- 2 base ggplot -------------------------------------------------- + aes_base <- if (gradient.mode) { + ggplot2::aes( + x = .data[[gene.set.use]], + y = .data[[group.by]], + fill = after_stat(x) + ) } else { - if(gradient.format) { - plot <- plot + - geom_density_ridges_gradient(alpha = 0.8, - quantile_lines = TRUE, - quantile_fun = median, - vline_width = 1) - } else { - plot <- plot + - geom_density_ridges2(alpha = 0.8, - quantile_lines = TRUE, - quantile_fun = median, - vline_width = 1) - } + ggplot2::aes( + x = .data[[gene.set.use]], + y = .data[[group.by]], + fill = .data[[color.by]] + ) } - plot <- plot + - ylab(group.by) + - xlab(paste0(gene.set, "\n Enrichment Score")) + - labs(fill = color.by) + ############# - theme_classic() + - guides(fill = "none") + p <- ggplot2::ggplot(df, aes_base) - plot <- .colorby(enriched, - plot, - color.by, - palette) + ## choose ridge geometry + rug ----------------------------------------- + ridge_fun <- if (gradient.mode) + ggridges::geom_density_ridges_gradient else ggridges::geom_density_ridges + p <- p + do.call(ridge_fun, c( + list( + jittered_points = add.rug, + point_shape = '|', + point_size = 2.5, + point_alpha = 1, + alpha = 0.8, + quantile_lines = TRUE, + quantile_fun = median, + vline_width = 0.9 + ), + if (add.rug) list( + position = ggridges::position_points_jitter(width = 0.05, height = 0) + ) + )) - if (!is.null(facet.by)) { - plot <- plot + - facet_grid(as.formula(paste('. ~', facet.by))) - } + ## ---- 3 scales & labels --------------------------------------------- + p <- p + + ylab(group.by) + + xlab(paste0(gene.set.use, "\nEnrichment Score")) + + ggplot2::theme_classic(base_size = 11) + + p <- .colorby(df, p, color.by, palette, type = "fill") + + ## facetting ------------------------------------------------------------ + if (!is.null(facet.by)) + p <- p + ggplot2::facet_grid(stats::as.formula(paste(". ~", facet.by))) - return(plot) -} + p +} \ No newline at end of file diff --git a/R/runEscape.R b/R/runEscape.R index 8b792de..d8f957b 100644 --- a/R/runEscape.R +++ b/R/runEscape.R @@ -1,200 +1,246 @@ -#' Calculate gene set enrichment scores +#' Calculate Single-Cell Gene-Set Enrichment Scores #' -#' This function allows users to input both the single-cell RNA-sequencing -#' counts and output the enrichment scores as a matrix. +#' `escape.matrix()` computes per-cell enrichment for arbitrary gene-set +#' collections using one of four scoring back-ends and returns a dense numeric +#' matrix (cells × gene-sets). The expression matrix is processed in +#' user-defined *chunks* (`groups`) so that memory use remains predictable; +#' each chunk is dispatched in parallel via a \pkg{BiocParallel} `BPPARAM` +#' backend. Heavy engines (\pkg{GSVA}, \pkg{UCell}, \pkg{AUCell}) are loaded +#' lazily, keeping them in the package’s \strong{Suggests} field. #' +#' @section Supported methods: +#' \describe{ +#' \item{`"GSVA"`}{Gene-set variation analysis (Poisson kernel).} +#' \item{`"ssGSEA"`}{Single-sample GSEA.} +#' \item{`"UCell"`}{Rank-based UCell scoring.} +#' \item{`"AUCell"`}{Area-under-the-curve ranking score.} +#' } #' -#' @param input.data The count matrix, Seurat, or Single-Cell Experiment object. -#' @param gene.sets Gene sets can be a list, output from -#' \code{\link{getGeneSets}}, or the built-in gene sets -#' in the escape package \code{\link{escape.gene.sets}}. -#' @param method Select the method to calculate enrichment, \strong{AUCell}, -#' \strong{GSVA}, \strong{ssGSEA} or \strong{UCell}. -#' @param groups The number of cells to separate the enrichment calculation. -#' @param min.size Minimum number of gene necessary to perform the enrichment -#' calculation -#' @param normalize Whether to divide the enrichment score by the number -#' of genes \strong{TRUE} or report unnormalized \strong{FALSE}. -#' @param make.positive During normalization shift enrichment values to a -#' positive range \strong{TRUE} for downstream analysis or not -#' \strong{TRUE} (default). Will only be applied if \strong{normalize = TRUE}. -#' @param BPPARAM A BiocParallel::bpparam() object that for parallelization. -#' @param ... pass arguments to AUCell GSVA, ssGSEA, or UCell call +#' @param input.data A raw‐counts matrix (`genes × cells`), a +#' \link[SeuratObject]{Seurat} object, or a +#' \link[SingleCellExperiment]{SingleCellExperiment}. Gene identifiers must +#' match those in `gene.sets`. +#' @param gene.sets A named list of character vectors, the result of +#' [getGeneSets()], or the built-in data object [escape.gene.sets]. +#' List names become column names in the result. +#' @param method Scoring algorithm (case-insensitive). One of `"GSVA"`, +#' `"ssGSEA"`, `"UCell"`, or `"AUCell"`. Default **`"ssGSEA"`**. +#' @param groups Integer ≥ 1. Number of cells per processing chunk. +#' Larger values reduce overhead but increase memory usage. Default **1000**. +#' @param min.size Minimum number of genes from a set that must be detected +#' in the expression matrix for that set to be scored. Default **5**. +#' Use `NULL` to disable filtering. +#' @param normalize Logical. If `TRUE`, the score matrix is passed to +#' [performNormalization()] (drop-out scaling and optional log transform). +#' Default **FALSE**. +#' @param make.positive Logical. If `TRUE` *and* `normalize = TRUE`, shifts +#' every gene-set column so its global minimum is zero, facilitating +#' downstream log-ratio analyses. Default **FALSE**. +#' @param min.expr.cells Numeric. Gene-expression filter threshold (see +#' details above). Default **0** (no gene filtering). +#' @param min.filter.by Character or `NULL`. Column name in `meta.data` +#' (Seurat) or `colData` (SCE) defining groups within which the +#' `min.expr.cells` rule is applied. Default **`NULL`**. +#' @param BPPARAM A \pkg{BiocParallel} parameter object describing the +#' parallel backend. +#' @param ... Extra arguments passed verbatim to the chosen back-end +#' scoring function (`gsva()`, `ScoreSignatures_UCell()`, or +#' `AUCell_calcAUC()`). #' -#' @importFrom GSVA gsva gsvaParam ssgseaParam -#' @importFrom GSEABase GeneSetCollection -#' @importFrom UCell ScoreSignatures_UCell -#' @importFrom AUCell AUCell_buildRankings AUCell_calcAUC -#' @importFrom SummarizedExperiment assay -#' @importFrom BiocParallel SerialParam MulticoreParam BatchtoolsParam SerialParam +#' @return A numeric matrix with one row per cell and one column per gene set, +#' ordered as in `gene.sets`. #' -#' @examples -#' GS <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), +#' @author Nick Borcherding, Jared Andrews +#' +#' @seealso [runEscape()] to attach scores to a single-cell object; +#' [getGeneSets()] for MSigDB retrieval; [performNormalization()] for the +#' optional normalization workflow. +#' +#' @examples +#' gs <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), #' Tcells = c("CD3E", "CD3D", "CD3G", "CD7","CD8A")) -#' pbmc_small <- SeuratObject::pbmc_small -#' ES <- escape.matrix(pbmc_small, -#' gene.sets = GS, -#' min.size = NULL) #' -#' @export -#' @author Nick Borcherding, Jared Andrews +#' pbmc <- SeuratObject::pbmc_small +#' es <- escape.matrix(pbmc, +#' gene.sets = gs, +#' method = "ssGSEA", +#' groups = 500, +#' min.size = 3) #' -#' @seealso \code{\link{getGeneSets}} to collect gene sets. -#' @return matrix of enrichment scores -escape.matrix <- function(input.data, - gene.sets = NULL, - method = "ssGSEA", - groups = 1000, - min.size = 5, - normalize = FALSE, - make.positive = FALSE, - BPPARAM = SerialParam(), +#' @export +escape.matrix <- function(input.data, + gene.sets = NULL, + method = "ssGSEA", + groups = 1000, + min.size = 5, + normalize = FALSE, + make.positive = FALSE, + min.expr.cells = 0, + min.filter.by = NULL, + BPPARAM = NULL, ...) { - egc <- .GS.check(gene.sets) - cnts <- .cntEval(input.data, assay = "RNA", type = "counts") - egc.size <- lapply(egc, function(x) length(which(rownames(cnts) %in% x))) - - # Filtering gene sets - if (!is.null(min.size)) { - remove <- which(egc.size < min.size | egc.size == 0) - } else { - remove <- which(egc.size == 0) - } - if (length(remove) > 0) { - egc <- egc[-remove] - egc.size <- egc.size[-remove] - - if (!is.null(min.size) && length(egc) == 0) { - stop("No gene sets passed the minimum length - please reconsider the 'min.size' parameter") - } - } - - scores <- list() - splits <- seq(1, ncol(cnts), by=groups) - print(paste('Using sets of', groups, 'cells. Running', - length(splits), 'times.')) - split.data <- .split_data.matrix(matrix=cnts, chunk.size=groups) - - all_gene_sets <- names(egc) # Collect all gene set names - - # Running enrichment calculation - for (i in seq_along(splits)) { - if (method == "GSVA") { - parameters <- .gsva.setup(split.data[[i]], egc) - } else if (method == "ssGSEA") { - parameters <- .ssGSEA.setup(split.data[[i]], egc) - } - if (method %in% c("ssGSEA", "GSVA")) { - a <- suppressWarnings(gsva(param = parameters, - verbose = FALSE, - BPPARAM = BPPARAM, - ...)) - } else if (method == "UCell") { - a <- t(suppressWarnings( - ScoreSignatures_UCell(matrix = split.data[[i]], - features = egc, - name = NULL, - BPPARAM = BPPARAM, - ...))) - } else if (method == "AUCell") { - rankings <- AUCell_buildRankings(split.data[[i]], - plotStats = FALSE, - verbose = FALSE) - a <- assay(AUCell_calcAUC(geneSets = egc, - rankings, - normAUC = TRUE, - aucMaxRank = ceiling(0.2 * nrow(split.data[[i]])), - verbose = FALSE, - ...)) - } - - # Ensure consistent row names (all_gene_sets) across splits - a <- as.data.frame(a) - a <- a[match(all_gene_sets, rownames(a), nomatch = NA), , drop = FALSE] - scores[[i]] <- a - } - scores <- do.call(cbind, scores) - output <- t(as.matrix(scores)) + if(is.null(min.size)) min.size <- 0 + + # ---- 1) resolve gene-sets & counts ---------------------------------------- + egc <- .GS.check(gene.sets) + cnts <- .cntEval(input.data, assay = "RNA", type = "counts") # dgCMatrix + + if (is.null(min.filter.by)) { + cnts <- .filter_genes(cnts, min.expr.cells) + } else { + # get grouping factor from object + group.vec <- .extract_group_vector(input.data, min.filter.by) + split.idx <- split(seq_len(ncol(cnts)), group.vec) - # Normalize based on dropout - if(normalize) { - output <- performNormalization(sc.data = input.data, - enrichment.data = output, - assay = NULL, - gene.sets = gene.sets, - make.positive = make.positive, - groups = groups) + cnts <- do.call(cbind, lapply(split.idx, function(cols) { + sub <- cnts[, cols, drop = FALSE] + .filter_genes(sub, min.expr.cells) + })) + } + + # ---- 2) drop undersized gene-sets ----------------------------------------- + keep <- vapply(egc, function(gs) sum(rownames(cnts) %in% gs) >= min.size, + logical(1)) + if (!all(keep)) { + egc <- egc[keep] + if (!length(egc)) + stop("No gene-sets meet the size threshold (min.size = ", min.size, ")") + } + + # ---- 3) split cells into chunks ------------------------------------------- + chunks <- .split_cols(cnts, groups) + message("escape.matrix(): processing ", length(chunks), " chunk(s)...") + + # ---- 4) compute enrichment in parallel ------------------------------------ + res_list <- .plapply( + chunks, + function(mat) + .compute_enrichment(mat, egc, method, BPPARAM, ...), + BPPARAM = BPPARAM + ) + + # ---- 5) combine + orient (rows = cells) ----------------------------------- + all_sets <- names(egc) + res_mat <- do.call(cbind, lapply(res_list, function(m) { + m <- as.matrix(m) + m <- m[match(all_sets, rownames(m)), , drop = FALSE] + m + })) + res_mat <- t(res_mat) + colnames(res_mat) <- all_sets + + # ---- 6) optional dropout scaling ------------------------------------------ + if (normalize) { + res_mat <- performNormalization( + input.data = input.data, + enrichment.data = res_mat, + assay = NULL, + gene.sets = gene.sets, + make.positive = make.positive, + groups = groups + ) + if (.is_seurat_or_sce(input.data)) { + res_mat <- .pull.Enrich(res_mat, "escape_normalized") } - return(output) + } + + res_mat } -#' Enrichment calculation for single-cell workflows +#' Calculate Enrichment Scores Using Seurat or SingleCellExperiment Objects #' -#' Run the escape-based gene-set enrichment calculation with -#' Seurat or SingleCellExperiment pipelines -#' -#' @examples -#' GS <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), +#' `runEscape()` is a convenience wrapper around [escape.matrix()] that +#' computes enrichment scores and inserts them as a new assay (default +#' `"escape"`) in a \pkg{Seurat} or \pkg{SingleCellExperiment} object. All +#' arguments (except `new.assay.name`) map directly to their counterparts in +#' `escape.matrix()`. +#' +#' @inheritParams escape.matrix +#' @param new.assay.name Character. Name for the assay that will store the +#' enrichment matrix in the returned object. Default **"escape"**. +#' +#' @return The input single-cell object with an additional assay containing the +#' enrichment scores (`cells × gene-sets`). Matrix orientation follows +#' standard single-cell conventions (gene-sets as rows inside the assay). +#' +#' @author Nick Borcherding, Jared Andrews +#' +#' @seealso [escape.matrix()] for the underlying computation, +#' [performNormalization()] to add normalized scores, [heatmapEnrichment()], +#' [ridgeEnrichment()] and related plotting helpers for visualization. +#' +#' @examples +#' gs <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), #' Tcells = c("CD3E", "CD3D", "CD3G", "CD7","CD8A")) -#' pbmc_small <- SeuratObject::pbmc_small -#' pbmc_small <- runEscape(pbmc_small, -#' gene.sets = GS, -#' min.size = NULL) +#' +#' sce <- SeuratObject::pbmc_small +#' sce <- runEscape(sce, +#' gene.sets = gs, +#' method = "GSVA", +#' groups = 1000, +#' min.size = 3, +#' new.assay.name = "escape") #' -#' @param input.data The count matrix, Seurat, or Single-Cell Experiment object. -#' @param gene.sets Gene sets can be a list, output from -#' \code{\link{getGeneSets}}, or the built-in gene sets -#' in the escape package \code{\link{escape.gene.sets}}. -#' @param method Select the method to calculate enrichment, \strong{AUCell}, -#' \strong{GSVA}, \strong{ssGSEA} or \strong{UCell}. -#' @param groups The number of cells to separate the enrichment calculation. -#' @param min.size Minimum number of gene necessary to perform the enrichment -#' calculation -#' @param normalize Whether to divide the enrichment score by the number -#' of genes \strong{TRUE} or report unnormalized \strong{FALSE}. -#' @param make.positive During normalization shift enrichment values to a -#' positive range \strong{TRUE} for downstream analysis or not -#' \strong{TRUE} (default). Will only be applied if \strong{normalize = TRUE}. -#' @param new.assay.name The new name of the assay to append to -#' the single-cell object containing the enrichment scores. -#' @param BPPARAM A BiocParallel::bpparam() object that for parallelization. -#' @param ... pass arguments to AUCell GSVA, ssGSEA or UCell call #' @export -#' @return Seurat or Single-Cell Experiment object with escape enrichment scores -#' in the assay slot. - -runEscape <- function(input.data, - gene.sets = NULL, - method = "ssGSEA", - groups = 1000, +runEscape <- function(input.data, + gene.sets, + method = c("ssGSEA", "GSVA", "UCell", "AUCell"), + groups = 1e3, min.size = 5, normalize = FALSE, make.positive = FALSE, new.assay.name = "escape", - BPPARAM = SerialParam(), + min.expr.cells = 0, + min.filter.by = NULL, + BPPARAM = NULL, ...) { - .checkSingleObject(input.data) - enrichment <- escape.matrix(input.data = input.data, - gene.sets = gene.sets, - method = method, - groups = groups, - min.size = min.size, - BPPARAM = BPPARAM) + method <- match.arg(method) + .checkSingleObject(input.data) + esc <- escape.matrix(input.data, gene.sets, method, groups, min.size, + normalize, make.positive, min.expr.cells, + min.filter.by, BPPARAM, ...) + + input.data <- .adding.Enrich(input.data, esc, new.assay.name) + return(input.data) +} + + +.filter_genes <- function(m, min.expr.cells) { + if (is.null(min.expr.cells) || identical(min.expr.cells, 0)) + return(m) # nothing to do + + ncells <- ncol(m) - input.data <- .adding.Enrich(input.data, enrichment, new.assay.name) - return(input.data) + thr <- if (min.expr.cells < 1) + ceiling(min.expr.cells * ncells) # proportion → absolute + else + as.integer(min.expr.cells) + + keep <- Matrix::rowSums(m > 0) >= thr + m[keep, , drop = FALSE] } -.gsva.setup <- function(data, egc) { - params.to.use <- gsvaParam(exprData = data, - geneSets = egc, - kcdf = "Poisson") - return(params.to.use) +# helper: pull a column from meta.data / colData no matter the object +#' @importFrom SummarizedExperiment colData +.extract_group_vector <- function(obj, col) { + if (.is_seurat(obj)) + return(obj[[col, drop = TRUE]]) + if (.is_sce(obj)) + return(colData(obj)[[col]]) + stop("min.filter.by requires a Seurat or SingleCellExperiment object") } -.ssGSEA.setup <- function(data, egc) { - params.to.use <- ssgseaParam(exprData = data, - geneSets = egc, - normalize = FALSE) - return(params.to.use) +.filter_genes <- function(m, min.expr.cells) { + if (is.null(min.expr.cells) || identical(min.expr.cells, 0)) + return(m) # nothing to do + + ncells <- ncol(m) + + thr <- if (min.expr.cells < 1) + ceiling(min.expr.cells * ncells) # proportion → absolute + else + as.integer(min.expr.cells) + + keep <- Matrix::rowSums(m > 0) >= thr + m[keep, , drop = FALSE] } diff --git a/R/scatterEnrichment.R b/R/scatterEnrichment.R index 479e797..1f8338b 100644 --- a/R/scatterEnrichment.R +++ b/R/scatterEnrichment.R @@ -1,89 +1,160 @@ -#' Generate a density-based scatter plot -#' -#' This function allows to the user to examine the distribution of -#' 2 gene sets along the x.axis and y.axis. The color gradient -#' is generated using the a density estimate. See -#' \href{https://github.com/LKremer/ggpointdensity}{ggpointdensity}) -#' for more information. +#' Plot 2D Enrichment Distributions With Density or Hexplots #' -#' @param input.data Enrichment output from \code{\link{escape.matrix}} or -#' \code{\link{runEscape}}. -#' @param assay Name of the assay to plot if data is a single-cell object. -#' @param x.axis Gene set to plot on the x.axis. -#' @param y.axis Gene set to plot on the y.axis. -#' \strong{group.by} parameter or use the \strong{gene.set} name if wanting to -#' apply a gradient palette. -#' @param facet.by Variable to facet the plot into n distinct graphs. -#' @param scale Visualize raw values \strong{FALSE} or Z-transform -#' enrichment values \strong{TRUE}. -#' @param style Return a \strong{"hex"} bin plot or a \strong{"point"}-based plot. -#' @param palette Colors to use in visualization - input any -#' \link[grDevices]{hcl.pals}. +#' Visualize the relationship between *two* enrichment scores at single-cell +#' resolution. By default points are shaded by local 2-D density +#' (`color.by = "density"`), but users can instead color by a metadata column +#' (discrete) or by the raw gene-set scores themselves (continuous). #' -#' @import ggplot2 -#' @importFrom ggpointdensity geom_pointdensity +#' @param input.data Output of \code{\link{escape.matrix}} or a single‑cell +#' object previously processed by \code{\link{runEscape}}. +#' @param assay Name of the assay holding enrichment scores when +#' `input.data` is a single‑cell object. Ignored otherwise. +#' @param x.axis,y.axis Gene-set names to plot on the *x* and *y* axes. +#' @param facet.by Optional metadata column used to facet the plot. +#' @param group.by Metadata column plotted. Defaults to the +#' Seurat/SCE `ident` slot when `NULL`. +#' @param color.by Aesthetic mapped to point color. Use +#' `"density"` (default), `"group"`, `"x"`, or `"y"`. The latter two apply a +#' continuous gradient to the corresponding axis. +#' @param style `"point"` (density-aware points) or `"hex"` (hex-bin). +#' @param scale Logical; if `TRUE` scores are centered/scaled (Z‑score) prior +#' to plotting. +#' @param bins Number of hex bins along each axis when `style = "hex"`. +#' @param point.size,alpha Aesthetic tweaks for `style = "point"`. +#' @param add.corr Logical. Add Pearson and Spearman correlation +#' coefficients (top-left corner of the first facet). +#' @param palette Character. Any palette from \code{\link[grDevices]{hcl.pals}}. #' #' @examples -#' GS <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), -#' Tcells = c("CD3E", "CD3D", "CD3G", "CD7","CD8A")) -#' pbmc_small <- SeuratObject::pbmc_small -#' pbmc_small <- runEscape(pbmc_small, -#' gene.sets = GS, -#' min.size = NULL) -#' -#' scatterEnrichment(pbmc_small, -#' assay = "escape", -#' x.axis = "Tcells", -#' y.axis = "Bcells") +#' gs <- list( +#' Bcells = c("MS4A1","CD79B","CD79A","IGH1","IGH2"), +#' Tcells = c("CD3E","CD3D","CD3G","CD7","CD8A") +#' ) +#' pbmc <- SeuratObject::pbmc_small |> +#' runEscape(gene.sets = gs, min.size = NULL) +#' +#' scatterEnrichment( +#' pbmc, +#' assay = "escape", +#' x.axis = "Tcells", +#' y.axis = "Bcells", +#' color.by = "group", +#' group.by = "groups", +#' add.corr = TRUE, +#' point.size = 1 +#' ) #' +#' @return A \pkg{ggplot2} object. +#' @importFrom stats as.formula #' @export -#' -#' @return ggplot2 object with a scatter plot of selected gene.sets -scatterEnrichment <- function(input.data, - assay = NULL, - x.axis = NULL, - y.axis = NULL, - scale = FALSE, - facet.by = NULL, - style = "point", - palette = "inferno") { +scatterEnrichment <- function(input.data, + assay = NULL, + x.axis, + y.axis, + facet.by = NULL, + group.by = NULL, + color.by = c("density", "group", "x", "y"), + style = c("point", "hex"), + scale = FALSE, + bins = 40, + point.size = 1.2, + alpha = 0.8, + palette = "inferno", + add.corr = FALSE) { + ## ---- 0 Argument sanity checks ------------------------------------------- + style <- match.arg(style, choices = c("point", "hex")) + color.by <- match.arg(color.by, choices = c("density", "group", "x", "y")) + if (is.null(group.by)) group.by <- "ident" gene.set <- c(x.axis, y.axis) - if(style %!in% c("point", "hex")) { - stop("Please select either 'point' or 'hex' for the style parameter.") + + ## ---- 1 Assemble long data-frame ----------------------------------------- + enriched <- .prepData(input.data, assay, gene.set, group.by, NULL, facet.by) + + if (scale) { + enriched[, gene.set] <- apply(enriched[, gene.set, drop = FALSE], 2, scale) } - enriched <- .prepData(input.data, assay, gene.set, NULL, NULL, facet.by) + ## ---- 2 Base ggplot2 object ---------------------------------------------- + aes_base <- ggplot2::aes(x = .data[[x.axis]], y = .data[[y.axis]]) + + ## ---- 3 Choose colouring strategy ---------------------------------------- - if(scale) { - enriched[,gene.set] <- apply(enriched[,gene.set], 2, scale) + if (color.by == "density") { + aes_combined <- aes_base # no color aesthetic + } else if (color.by == "group") { + aes_combined <- ggplot2::aes( + x = .data[[x.axis]], + y = .data[[y.axis]], + color = .data[[group.by]] + ) + } else { # "x" or "y" + sel <- if (color.by == "x") x.axis else y.axis + aes_combined <- ggplot2::aes( + x = .data[[x.axis]], + y = .data[[y.axis]], + color = .data[[sel]] + ) } - plot <- ggplot(data = enriched, - aes(x = enriched[,x.axis], - y = enriched[,y.axis])) + # Now build the plot + plt <- ggplot2::ggplot(enriched, aes_combined) + + ## ---- 4 Geometry --------------------------------------------------------- + if (style == "point") { + if (color.by == "density") { + plt <- plt + + ggpointdensity::geom_pointdensity(size = point.size, alpha = alpha) + + ggplot2::scale_color_gradientn( + colors = .colorizer(palette, 11), + name = "Local density") + } else { + geom <- ggplot2::geom_point(size = point.size, alpha = alpha) + plt <- plt + geom + } + } else { # hex-bin + plt <- plt + + ggplot2::stat_binhex(bins = bins, alpha = alpha) + + ggplot2::scale_fill_gradientn( + colors = .colorizer(palette, 11), + name = "Cells / bin") + } + + ## ---- 5 Colour scaling for non-density modes ----------------------------- + if (color.by != "density") { + sel <- switch(color.by, + group = group.by, + x = x.axis, + y = y.axis) - if(style == "point") { - plot <- plot + - geom_pointdensity() + - scale_color_gradientn(colors = .colorizer(palette, 11)) + - labs(color = "Relative Density") - } else if (style == "hex") { - plot <- plot + - stat_binhex() + - scale_fill_gradientn(colors = .colorizer(palette, 11)) - labs(fill = "Relative Density") + plt <- .colorby(enriched, plt, + color.by = sel, + palette = palette, + type = "color") } - plot <- plot + - ylab(paste0(y.axis, "\n Enrichment Score")) + - xlab(paste0(x.axis, "\n Enrichment Score")) + - theme_classic() + + ## ---- 6 Axes, theme, faceting ------------------------------------------- + plt <- plt + + ggplot2::labs(x = paste0(x.axis, "\nEnrichment score"), + y = paste0(y.axis, "\nEnrichment score")) + + ggplot2::theme_classic() if (!is.null(facet.by)) { - plot <- plot + - facet_grid(as.formula(paste('. ~', facet.by))) + plt <- plt + ggplot2::facet_grid(as.formula(paste(". ~", facet.by))) } - return(plot) + + ## ---- 7 Optional correlation overlay ------------------------------------- + if (add.corr) { + cor_pears <- stats::cor(enriched[[x.axis]], enriched[[y.axis]], + method = "pearson", use = "pairwise.complete.obs") + cor_spear <- stats::cor(enriched[[x.axis]], enriched[[y.axis]], + method = "spearman", use = "pairwise.complete.obs") + lbl <- sprintf("Pearson rho = %.2f\nSpearman rho = %.2f", cor_pears, cor_spear) + plt <- plt + + ggplot2::annotate("text", x = -Inf, y = Inf, label = lbl, + hjust = 0, vjust = 1, size = 3.5, + fontface = "italic") + } + + plt } - -#TODO Add color.by option diff --git a/R/splitEnrichment.R b/R/splitEnrichment.R index 472a12d..c081b5d 100644 --- a/R/splitEnrichment.R +++ b/R/splitEnrichment.R @@ -1,3 +1,124 @@ +#' Plot Enrichment Distributions Using Split or Dodged Violin Plots +#' +#' Visualize the distribution of gene set enrichment scores across groups using +#' violin plots. When `split.by` contains exactly two levels, the function draws +#' split violins for easy group comparison within each `group.by` category. If +#' `split.by` has more than two levels, standard dodged violins are drawn instead. +#' +#' @param input.data Output of \code{\link{escape.matrix}} or a single‑cell +#' object previously processed by \code{\link{runEscape}}. +#' @param assay Name of the assay holding enrichment scores when +#' `input.data` is a single‑cell object. Ignored otherwise. +#' @param split.by A metadata column used to split or color violins. Must contain +#' at least two levels. If it contains more than two, dodged violins are used. +#' @param group.by Metadata column plotted on the *x*‑axis. Defaults to the +#' Seurat/SCE `ident` slot when `NULL`. +#' @param gene.set.use Character(1). Name of the gene set to display. +#' @param order.by How to arrange the x‑axis: +#' *`"mean"`* – groups ordered by decreasing group mean; +#' *`"group"`* – natural sort of group labels; +#' *`NULL`* – keep original ordering. +#' @param facet.by Optional metadata column used to facet the plot. +#' @param scale Logical; if `TRUE` scores are centred/scaled (Z‑score) prior +#' to plotting. +#' @param palette Character. Any palette from \code{\link[grDevices]{hcl.pals}}. +#' +#' @return A [ggplot2] object. +#' +#' @import ggplot2 +#' @importFrom grDevices hcl.pals +#' @importFrom stats as.formula +#' +#' @examples +#' gs <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), +#' Tcells = c("CD3E", "CD3D", "CD3G", "CD7","CD8A")) +#' +#' pbmc <- SeuratObject::pbmc_small |> +#' runEscape(gene.sets = gs, min.size = NULL) +#' +#' splitEnrichment(input.data = pbmc, +#' assay = "escape", +#' split.by = "groups", +#' gene.set.use = "Tcells") +#' +#' @export +splitEnrichment <- function(input.data, + assay = NULL, + split.by = NULL, + group.by = NULL, + gene.set.use = NULL, + order.by = NULL, + facet.by = NULL, + scale = TRUE, + palette = "inferno") { + + if (is.null(split.by)) stop("Please specify a variable for 'split.by'.") + if (is.null(group.by)) group.by <- "ident" + + # Prepare tidy data with relevant metadata columns + enriched <- .prepData(input.data, assay, gene.set.use, group.by, split.by, facet.by) + + # Determine the number of levels in the splitting variable + split.levels <- unique(enriched[[split.by]]) + n.levels <- length(split.levels) + if (n.levels < 2) stop("split.by must have at least two levels.") + + # Optional Z-score scaling of enrichment values + if (scale) { + enriched[[gene.set.use]] <- scale(enriched[[gene.set.use]]) + } + + # Optional reordering of x-axis categories + if (!is.null(order.by)) { + enriched <- .orderFunction(enriched, order.by, group.by) + } + + # Create a composite group for proper boxplot dodging + enriched$group_split <- interaction(enriched[[group.by]], enriched[[split.by]], sep = "_") + dodge <- position_dodge(width = 0.8) + + # Base plot + plot <- ggplot(enriched, aes(x = .data[[group.by]], + y = .data[[gene.set.use]], + fill = .data[[split.by]])) + + xlab(group.by) + + ylab(paste0(gene.set.use, "\nEnrichment Score")) + + labs(fill = split.by) + + scale_fill_manual(values = .colorizer(palette, n.levels)) + + theme_classic() + + # Split violin if binary, otherwise dodge standard violins + if (n.levels == 2) { + plot <- plot + + geom_split_violin(alpha = 0.8, lwd = 0.25) + + geom_boxplot(width = 0.1, + fill = "grey", + alpha = 0.6, + outlier.shape = NA, + position = position_identity(), + notch = FALSE) + } else { + plot <- plot + + geom_violin(position = dodge, alpha = 0.8, lwd = 0.25) + + geom_boxplot(width = 0.1, + fill = "grey", + alpha = 0.6, + outlier.shape = NA, + position = dodge, + notch = FALSE, + aes(group = .data$group_split)) + } + + + + # Optional faceting + if (!is.null(facet.by)) { + plot <- plot + facet_grid(as.formula(paste(". ~", facet.by))) + } + + return(plot) +} + #Developing split violin plot #Code from: https://stackoverflow.com/a/45614547 GeomSplitViolin <- ggproto("GeomSplitViolin", GeomViolin, @@ -41,99 +162,3 @@ geom_split_violin <- inherit.aes = inherit.aes, params = list(trim = trim, scale = scale, draw_quantiles = draw_quantiles, na.rm = na.rm, ...)) } - -#' Visualize enrichment results with a split violin plot -#' -#' This function allows to the user to examine the distribution of -#' enrichment across groups by generating a split violin plot. -#' -#' @param input.data Enrichment output from \code{\link{escape.matrix}} or -#' \code{\link{runEscape}}. -#' @param assay Name of the assay to plot if data is a single-cell object. -#' @param split.by Variable to form the split violin, must have 2 levels. -#' @param group.by Categorical parameter to plot along the x.axis. If input is -#' a single-cell object the default will be cluster. -#' @param gene.set Gene set to plot (on y-axis). -#' @param order.by Method to organize the x-axis - \strong{"mean"} will arrange -#' the x-axis by the mean of the gene.set, while \strong{"group"} will arrange -#' the x-axis by in alphanumerical order. Using \strong{NULL} will not reorder -#' the x-axis. -#' @param facet.by Variable to facet the plot into n distinct graphs. -#' @param scale Visualize raw values \strong{FALSE} or Z-transform -#' enrichment values \strong{TRUE}. -#' @param palette Colors to use in visualization - input any -#' \link[grDevices]{hcl.pals}. -#' -#' @import ggplot2 -#' -#' @examples -#' GS <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), -#' Tcells = c("CD3E", "CD3D", "CD3G", "CD7","CD8A")) -#' pbmc_small <- SeuratObject::pbmc_small -#' pbmc_small <- runEscape(pbmc_small, -#' gene.sets = GS, -#' min.size = NULL) -#' -#' splitEnrichment(pbmc_small, -#' assay = "escape", -#' split.by = "groups", -#' gene.set = "Tcells") -#' -#' @export -#' -#' @return ggplot2 object violin-based distributions of selected gene.set -splitEnrichment <- function(input.data, - assay = NULL, - split.by = NULL, - group.by = NULL, - gene.set = NULL, - order.by = NULL, - facet.by = NULL, - scale = TRUE, - palette = "inferno") { - if(is.null(split.by)){ - stop("Please select a variable with 'split.by' to generate the splitEnrichment() plots") - } - - if(is.null(group.by)) { - group.by <- "ident" - } - - enriched <- .prepData(input.data, assay, gene.set, group.by, split.by, facet.by) - - if (length(unique(enriched[,split.by])) != 2) { - message("SplitEnrichment() can only work for binary variables - reselect 'split.by'") - } - - if(scale) { - enriched[,gene.set] <- scale(enriched[,gene.set]) - } - - if(!is.null(order.by) && !is.null(group.by)) { - enriched <- .orderFunction(enriched, order.by, group.by) - } - - col <- length(unique(enriched[,split.by])) - plot <- ggplot(enriched, aes(x = enriched[,group.by], - y = enriched[,gene.set], - fill = enriched[,split.by])) + - xlab(group.by) - - plot <- plot + - geom_split_violin(alpha=0.8, lwd= 0.25) + - geom_boxplot(width=0.1, - fill = "grey", - alpha=0.5, - outlier.alpha = 0, - notch = TRUE) + - ylab(paste0(gene.set, "\n Enrichment Score")) + - labs(fill = split.by) + - scale_fill_manual(values = .colorizer(palette, col))+ - theme_classic() - - if (!is.null(facet.by)) { - plot <- plot + - facet_grid(as.formula(paste('. ~', facet.by))) - } - return(plot) -} diff --git a/R/utils.R b/R/utils.R index af37046..cb64637 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,304 +1,439 @@ -"%!in%" <- Negate("%in%") +# ----------------------------------------------------------------------------- +# FAST NEGATION OPERATOR ------------------------------------------------------ +# ----------------------------------------------------------------------------- +`%!in%` <- Negate(`%in%`) -is_seurat_object <- function(obj) inherits(obj, "Seurat") -is_se_object <- function(obj) inherits(obj, "SummarizedExperiment") -is_seurat_or_se_object <- function(obj) { - is_seurat_object(obj) || is_se_object(obj) -} +# ----------------------------------------------------------------------------- +# CLASS HELPERS --------------------------------------------------------------- +# ----------------------------------------------------------------------------- +.is_seurat <- function(x) inherits(x, "Seurat") +.is_sce <- function(x) inherits(x, "SummarizedExperiment") +.is_seurat_or_sce <- function(x) .is_seurat(x) || .is_sce(x) -.checkSingleObject <- function(sc) { - if (!inherits(x=sc, what ="Seurat") & - !inherits(x=sc, what ="SummarizedExperiment")){ - stop("Object indicated is not of class 'Seurat' or - 'SummarizedExperiment', make sure you are using - the correct data.") } +.checkSingleObject <- function(obj) { + if (!.is_seurat_or_sce(obj)) + stop("Expecting a Seurat or SummarizedExperiment object") } -#' @importFrom dplyr group_by summarise_at -#' @importFrom stringr str_sort -.orderFunction <- function(dat, order.by, group.by){ - if(order.by %!in% c("mean", "group.by")) { - stop(paste0("Please select either 'mean' or 'group.by' for ordering.")) - } - if(order.by == "mean") { - summary <- dat %>% - group_by(dat[,group.by]) %>% - summarise_at(.vars = colnames(.)[1], mean) %>% - as.data.frame() - summary <- summary[order(summary[,2], decreasing = TRUE),] - dat[,group.by] <- factor(dat[,group.by], levels = summary[,1]) - } - else if (order.by == "group.by") { - dat[,group.by] <- factor(dat[,group.by], str_sort(unique(dat[,group.by]), numeric = TRUE)) +# ----------------------------------------------------------------------------- +# ORDERING UTILITY (base R implementation) ----------------------------------- +# ----------------------------------------------------------------------------- +.orderFunction <- function(dat, order.by, group.by) { + if (!(order.by %in% c("mean", "group.by"))) + stop("order.by must be 'mean' or 'group.by'") + + if (order.by == "mean") { + means <- tapply(dat[[1]], dat[[group.by]], mean, simplify = TRUE) + lev <- names(sort(means, decreasing = TRUE)) + dat[[group.by]] <- factor(dat[[group.by]], levels = lev) + } else { # natural sort of group labels + if (requireNamespace("stringr", quietly = TRUE)) { + lev <- stringr::str_sort(unique(dat[[group.by]]), numeric = TRUE) + } else { + lev <- sort(unique(dat[[group.by]]), method = "radix") + } + dat[[group.by]] <- factor(dat[[group.by]], levels = lev) } - return(dat) + dat } -.makeDFfromSCO <- function(input.data, - assay = "escape", - gene.set = NULL, - group.by = NULL, - split.by = NULL, - facet.by = NULL) { - if(is.null(assay)){ - stop("Please add the assay name in which to plot from") - } - columns <- unique(c(group.by, split.by, facet.by)) - cnts <- .cntEval(input.data, - assay = assay, - type = "data") - if(length(gene.set) == 1 && gene.set == "all") { +# ----------------------------------------------------------------------------- +# DATA.frame BUILDERS --------------------------------------------------------- +# ----------------------------------------------------------------------------- +.makeDFfromSCO <- function(input.data, assay = "escape", gene.set = NULL, + group.by = NULL, split.by = NULL, facet.by = NULL) { + if (is.null(assay)) + stop("Please provide assay name") + cols <- unique(c(group.by, split.by, facet.by)) + cnts <- .cntEval(input.data, assay = assay, type = "data") + + if (length(gene.set) == 1 && gene.set == "all") gene.set <- rownames(cnts) - } + meta <- .grabMeta(input.data) - if(length(gene.set) == 1) { - enriched <- data.frame(cnts[gene.set,], meta[,columns]) + meta <- meta[, cols, drop = FALSE] + + if (length(gene.set) == 1) { + df <- cbind(value = cnts[gene.set, ], meta) + colnames(df)[1] <- gene.set } else { - enriched <- data.frame(t(cnts[gene.set,]), meta[,columns]) + df <- cbind(t(cnts[gene.set, , drop = FALSE]), meta) } - colnames(enriched) <- c(gene.set, columns) - return(enriched) + df } -#Prepare Data .prepData <- function(input.data, assay, gene.set, group.by, split.by, facet.by) { - - if (inherits(x=input.data, what ="Seurat") || - inherits(x=input.data, what ="SummarizedExperiment")) { - enriched <- .makeDFfromSCO(input.data, assay, gene.set, group.by, split.by, facet.by) - if(length(gene.set) == 1 && gene.set == "all") { - gene.set <- colnames(enriched)[colnames(enriched) %!in% c(group.by, split.by, facet.by)] - gene.set <- gene.set[!grepl("meta", gene.set)] - } - } else if (!is_seurat_or_se_object(input.data)) { - if(length(gene.set) == 1 && gene.set == "all") { - gene.set <- colnames(input.data) - gene.set <- gene.set[gene.set %!in% c(group.by, split.by, facet.by)] - } - enriched <- data.frame(input.data[,c(gene.set,group.by, split.by, facet.by)]) + if (.is_seurat_or_sce(input.data)) { + df <- .makeDFfromSCO(input.data, assay, gene.set, group.by, split.by, facet.by) + if (identical(gene.set, "all")) { + gene.set <- setdiff(colnames(df), c(group.by, split.by, facet.by)) } - - colnames(enriched) <- c(gene.set, group.by, split.by, facet.by) - return(enriched) + } else { # assume plain data.frame / matrix + if (identical(gene.set, "all")) + gene.set <- setdiff(colnames(input.data), c(group.by, split.by, facet.by)) + df <- input.data[, c(gene.set, group.by, split.by, facet.by), drop = FALSE] + } + colnames(df) <- c(gene.set, group.by, split.by, facet.by) + df } -#' @importFrom stringr str_sort +# ----------------------------------------------------------------------------- +# COLOUR SCALES (ggplot helper; tidy‑agnostic) -------------------------------- +# ----------------------------------------------------------------------------- +.colorizer <- function(palette = "inferno", n = NULL) { + grDevices::hcl.colors(n = n, palette = palette, fixup = TRUE) +} + +#' @importFrom stats setNames .colorby <- function(enriched, - plot, + plot, color.by, - palette, - type = "fill") { - if (inherits(enriched[,color.by], c("factor", "character"))) { - grouping <- str_sort(unique(enriched[,color.by]), numeric = TRUE) - } - - if(type == "fill") { - if(inherits(enriched[,color.by], "numeric")) { - plot <- plot + - scale_fill_gradientn(colors = .colorizer(palette, 11)) + - labs(fill = color.by) - } else { - col <- length(unique(enriched[,color.by])) - col.pal <- .colorizer(palette, col) - names(col.pal) <- grouping - plot <- plot + - scale_fill_manual(values = col.pal) + - labs(fill = color.by) - } - } else if (type == "color") { - if(inherits(enriched[,color.by], "numeric")) { - plot <- plot + - scale_color_gradientn(colors = .colorizer(palette, 11)) + - labs(color = color.by) + palette, + type = c("fill", "color")) { + + type <- match.arg(type) + + vec <- enriched[[color.by]] + is_num <- is.numeric(vec) + + ## pick scale constructors -------------------------------------------------- + scale_discrete <- switch(type, + fill = ggplot2::scale_fill_manual, + color = ggplot2::scale_color_manual) + + scale_gradient <- switch(type, + fill = ggplot2::scale_fill_gradientn, + color = ggplot2::scale_color_gradientn) + + ## build scale + legend ------------------------------------------------------ + if (is_num) { + plot <- plot + + scale_gradient(colors = .colorizer(palette, 11)) + + do.call(ggplot2::labs, setNames(list(color.by), type)) + } else { + lev <- if (requireNamespace("stringr", quietly = TRUE)) { + stringr::str_sort(unique(vec), numeric = TRUE) } else { - col <- length(unique(enriched[,color.by])) - col.pal <- .colorizer(palette, col) - names(col.pal) <- grouping - plot <- plot + - scale_color_manual(values = col.pal) + - labs(color = color.by) + unique(vec) } + + pal <- .colorizer(palette, length(lev)) + names(pal) <- lev + + plot <- plot + + scale_discrete(values = pal) + + do.call(ggplot2::labs, setNames(list(color.by), type)) } + return(plot) } +# ----------------------------------------------------------------------------- +# MATRIX / VECTOR SPLITTERS --------------------------------------------------- +# ----------------------------------------------------------------------------- +.split_cols <- function(mat, chunk) { + if (ncol(mat) <= chunk) return(list(mat)) + idx <- split(seq_len(ncol(mat)), ceiling(seq_len(ncol(mat)) / chunk)) + lapply(idx, function(i) mat[, i, drop = FALSE]) +} -#Pulling a color palette for visualizations -#' @importFrom grDevices hcl.colors -#' @keywords internal -.colorizer <- function(palette = "inferno", - n= NULL) { - colors <- hcl.colors(n=n, palette = palette, fixup = TRUE) - return(colors) +.split_rows <- function(mat, chunk.size = 1000) { + if (is.vector(mat)) mat <- matrix(mat, ncol = 1) + idx <- split(seq_len(nrow(mat)), ceiling(seq_len(nrow(mat)) / chunk.size)) + lapply(idx, function(i) mat[i, , drop = FALSE]) } -#split data matrix into cell chunks -#modified this from https://github.com/carmonalab/UCell -.split_data.matrix <- function(matrix, chunk.size = 1000) { - ncols <- dim(matrix)[2] - nchunks <- ceiling(ncols / chunk.size) # Total number of chunks - - split.data <- vector("list", nchunks) # Preallocate list for efficiency - for (i in seq_len(nchunks)) { - min <- (i - 1) * chunk.size + 1 - max <- min(i * chunk.size, ncols) - split.data[[i]] <- matrix[, min:max, drop = FALSE] # Ensure consistent structure - } - return(split.data) +.split_vector <- function(vec, chunk.size = 1000) { + split(vec, ceiling(seq_along(vec) / chunk.size)) } -#' @importFrom SummarizedExperiment assays assays<- -#' @importFrom MatrixGenerics rowSums2 -.cntEval <- function(obj, - assay = "RNA", - type = "counts") { - if (inherits(x = obj, what = "Seurat")) { - cnts <- obj@assays[[assay]][type] - } else if (inherits(x = obj, what = "SingleCellExperiment")) { - pos <- ifelse(assay == "RNA", "counts", assay) - if(assay == "RNA") { - cnts <- assay(obj,pos) +# ----------------------------------------------------------------------------- +# EXPRESSION MATRIX EXTRACTOR ------------------------------------------------- +# ----------------------------------------------------------------------------- +.cntEval <- function(obj, assay = "RNA", type = "counts") { + if (.is_seurat(obj)) { + # Use generic accessor if available + if (requireNamespace("SeuratObject", quietly = TRUE)) { + suppressWarnings( + cnts <- SeuratObject::GetAssayData(obj, assay = assay, slot = type) + ) + } else { + cnts <- obj@assays[[assay]][[type]] + } + + } else if (.is_sce(obj)) { + if (requireNamespace("SummarizedExperiment", quietly = TRUE) && + requireNamespace("SingleCellExperiment", quietly = TRUE)) { + pos <- if (assay == "RNA") "counts" else assay + + cnts <- if (assay == "RNA") { + SummarizedExperiment::assay(obj, pos) + } else { + SummarizedExperiment::assay(SingleCellExperiment::altExp(obj, pos)) + } } else { - cnts <- assay(altExp(obj), pos) + stop("SummarizedExperiment and SingleCellExperiment packages are required but not installed.") } } else { cnts <- obj } - cnts <- cnts[rowSums2(cnts) != 0,] - return(cnts) + cnts[MatrixGenerics::rowSums2(cnts) != 0, , drop = FALSE] } -#Add the values to single cell object -#' @importFrom SeuratObject CreateAssayObject CreateAssay5Object -#' @importFrom SummarizedExperiment SummarizedExperiment assays<- -#' @importFrom SingleCellExperiment altExps altExp<- -.adding.Enrich <- function(sc, enrichment, enrichment.name) { - if (inherits(sc, "Seurat")) { - if (as.numeric(substr(sc@version,1,1)) == 5) { - new.assay <- suppressWarnings(CreateAssay5Object( - data = as.matrix(t(enrichment)))) + +# ----------------------------------------------------------------------------- +# ATTACH / PULL ENRICHMENT MATRICES ------------------------------------------ +# ----------------------------------------------------------------------------- +.adding.Enrich <- function(sc, enrichment, name) { + if (.is_seurat(sc)) { + if (requireNamespace("SeuratObject", quietly = TRUE)) { + major <- as.numeric(substr(sc@version, 1, 1)) + fn <- if (major >= 5) { + SeuratObject::CreateAssay5Object + } else { + SeuratObject::CreateAssayObject + } + suppressWarnings( + sc[[name]] <- fn(data = as.matrix(t(enrichment))) + ) } else { - new.assay <- suppressWarnings(CreateAssayObject( - data = as.matrix(t(enrichment)))) + warning("SeuratObject package is required to add enrichment to Seurat object.") } - suppressWarnings(sc[[enrichment.name]] <- new.assay) - } else if (inherits(sc, "SingleCellExperiment")) { - altExp(sc, enrichment.name) <- SummarizedExperiment(assays = t(enrichment)) - names(assays(altExp(sc, enrichment.name))) <- enrichment.name + } else if (.is_sce(sc)) { + if (requireNamespace("SummarizedExperiment", quietly = TRUE) && + requireNamespace("SingleCellExperiment", quietly = TRUE)) { + alt <- SummarizedExperiment::SummarizedExperiment( + assays = list(data = t(enrichment)) + ) + SingleCellExperiment::altExp(sc, name) <- alt + } else { + warning("SummarizedExperiment and SingleCellExperiment packages are required to add enrichment to SCE object.") + } } - return(sc) + + sc } -#' @importFrom SummarizedExperiment assay -#' @importFrom SingleCellExperiment altExp -#' @importFrom Matrix t -.pull.Enrich <- function(sc, enrichment.name) { - if (inherits(sc, "Seurat")) { - values <- Matrix::t(sc[[enrichment.name]]["data"]) - } else if (inherits(sc, "SingleCellExperiment")) { - if(length(assays(altExp(sc))) == 1) { - values <- t(assay(altExps(sc)[[enrichment.name]])) +.pull.Enrich <- function(sc, name) { + if (.is_seurat(sc)) { + if (requireNamespace("Matrix", quietly = TRUE)) { + Matrix::t(sc[[name]]["data"]) + } else { + stop("Matrix package is required to transpose Seurat assay data.") } + + } else if (.is_sce(sc)) { + if (requireNamespace("SummarizedExperiment", quietly = TRUE) && + requireNamespace("SingleCellExperiment", quietly = TRUE)) { + t(SummarizedExperiment::assay(SingleCellExperiment::altExp(sc)[[name]])) + } else { + stop("SummarizedExperiment and SingleCellExperiment packages are required to pull enrichment from SCE object.") + } + + } else { + stop("Unsupported object type for pulling enrichment.") } } -#' @importFrom GSEABase geneIds +# ----------------------------------------------------------------------------- +# GENE‑SET / META HELPERS ----------------------------------------------------- +# ----------------------------------------------------------------------------- .GS.check <- function(gene.sets) { - if(is.null(gene.sets)) { - stop("Please provide the gene.sets you would like to use for - the enrichment analysis") - } - egc <- gene.sets - if(inherits(egc, what = "GeneSetCollection")){ - egc <- GSEABase::geneIds(egc) # will return a simple list, - #which will work if a matrix is supplied to GSVA - } - return(egc) + if (is.null(gene.sets)) + stop("Please supply 'gene.sets'") + if (inherits(gene.sets, "GeneSetCollection")) + return(GSEABase::geneIds(gene.sets)) + gene.sets } -#This is to grab the meta data from a seurat or SCE object -#' @importFrom SingleCellExperiment colData -#' @importFrom SeuratObject Idents -#' @importFrom methods slot -#' @keywords internal .grabMeta <- function(sc) { - if (is_seurat_object(sc)) { - meta <- data.frame(sc[[]], slot(sc, "active.ident")) - colnames(meta)[length(meta)] <- "ident" + if (.is_seurat(sc)) { + if (!requireNamespace("SeuratObject", quietly = TRUE)) { + stop("SeuratObject package is required to extract metadata from a Seurat object.") + } + out <- data.frame(sc[[]], ident = SeuratObject::Idents(sc)) + } else if (.is_sce(sc)) { + if (!requireNamespace("SummarizedExperiment", quietly = TRUE)) { + stop("SummarizedExperiment package is required to extract metadata + from a SingleCellExperiment object.") + } + cd <- SummarizedExperiment::colData(sc) + out <- data.frame(cd, stringsAsFactors = FALSE) + # Preserve rownames explicitly + rownames(out) <- rownames(cd) - } else if (is_se_object(sc)){ - meta <- data.frame(colData(sc)) - rownames(meta) <- sc@colData@rownames - clu <- which(colnames(meta) == "ident") - colnames(meta)[clu] <- "ident" + # Ensure 'ident' column exists + if ("ident" %!in% colnames(out)) { + out$ident <- NA + } } else { - stop("Object indicated is not of class 'Seurat' or - 'SummarizedExperiment', make sure you are using - the correct data.") + stop("Unsupported object type; must be Seurat or SingleCellExperiment.") } - return(meta) + return(out) } -#' @importFrom SingleCellExperiment reducedDim + .grabDimRed <- function(sc, dimRed) { - if (is_seurat_object(sc)) { - values <- c(list(PCA = sc[[dimRed]]@cell.embeddings), - sc[[dimRed]]@misc) + if (.is_seurat(sc)) { + if (!requireNamespace("SeuratObject", quietly = TRUE)) { + stop("SeuratObject package is required to access dimensional reduction in Seurat objects.") + } - } else if (is_se_object(sc)){ - values <- c(list(PCA = reducedDim(sc, dimRed)), - sc@metadata[c("eigen_values","contribution","rotation")]) + red <- sc[[dimRed]] + return(list( + PCA = red@cell.embeddings, + eigen_values = red@misc$eigen_values, + contribution = red@misc$contribution, + rotation = red@misc$rotation + )) + + } else if (.is_sce(sc)) { + if (!requireNamespace("SingleCellExperiment", quietly = TRUE)) { + stop("SingleCellExperiment package is required to access dimensional reduction in SCE objects.") + } + return(list( + PCA = SingleCellExperiment::reducedDim(sc, dimRed), + eigen_values = sc@metadata$eigen_values, + contribution = sc@metadata$contribution, + rotation = sc@metadata$rotation + )) } - return(values) } +# ----------------------------------------------------------------------------- +# Underlying Enrichment Calculations +# ----------------------------------------------------------------------------- -#function to split matrices by row -#adopted from ucells split_data.matrix -split_rows <- function (matrix, chunk.size = 1000) -{ - nrows <- dim(matrix)[1] - if(is.vector(matrix)){ - nrows <- length(matrix) +#─ Ensures a package is present and attaches quietly; +.load_backend <- function(pkg) { + if (!requireNamespace(pkg, quietly = TRUE)) { + stop(pkg, " not installed, install or choose a different `method`.", + call. = FALSE) } - nchunks <- (nrows - 1)%/%chunk.size + 1 - split.data <- list() - min <- 1 - for (i in seq_len(nchunks)) { - if (i == nchunks - 1) { - left <- nrows - (i - 1) * chunk.size - max <- min + round(left/2) - 1 - } - else { - max <- min(i * chunk.size, nrows) - } - split.data[[i]] <- matrix[min:max,] - min <- max + 1 +} + +#─ Build the *Param* object used by GSVA for classic GSVA / ssGSEA ------------- +.build_gsva_param <- function(expr, gene_sets, method) { + .load_backend("GSVA") + if (method == "GSVA") { + GSVA::gsvaParam(exprData = expr, geneSets = gene_sets, kcdf = "Poisson") + } else { # ssGSEA + GSVA::ssgseaParam(exprData = expr, geneSets = gene_sets, normalize = FALSE) } - return(split.data) } -#function to split vector -#adopted from ucells split_data.matrix -split_vector <- function (vector, chunk.size = 1000) -{ - nrows <- length(vector) - nchunks <- (nrows - 1)%/%chunk.size + 1 - split.data <- list() - min <- 1 - for (i in seq_len(nchunks)) { - if (i == nchunks - 1) { - left <- nrows - (i - 1) * chunk.size - max <- min + round(left/2) - 1 +#─ Perform enrichment on one cell chunk --------------------------------------- +.compute_enrichment <- function(expr, gene_sets, method, BPPARAM, ...) { + if (requireNamespace("BiocParallel", quietly = TRUE)) { + if (is.null(BPPARAM) || !inherits(BPPARAM, "BiocParallelParam")) { + BPPARAM <- BiocParallel::SerialParam() # safe default everywhere } - else { - max <- min(i * chunk.size, nrows) + } + + switch(toupper(method), + "GSVA" = { + param <- .build_gsva_param(expr, gene_sets, "GSVA") + GSVA::gsva(param = param, BPPARAM = BPPARAM, verbose = FALSE, ...) + }, + "SSGSEA" = { + param <- .build_gsva_param(expr, gene_sets, "ssGSEA") + GSVA::gsva(param = param, BPPARAM = BPPARAM, verbose = FALSE, ...) + }, + "UCELL" = { + .load_backend("UCell") + t(UCell::ScoreSignatures_UCell(matrix = expr, + features = gene_sets, + name = NULL, + BPPARAM = BPPARAM, + ...)) + }, + "AUCELL" = { + .load_backend("AUCell") + ranks <- AUCell::AUCell_buildRankings(expr, plotStats = FALSE, verbose = FALSE) + SummarizedExperiment::assay( + AUCell::AUCell_calcAUC(geneSets = gene_sets, + rankings = ranks, + normAUC = TRUE, + aucMaxRank = ceiling(0.2 * nrow(expr)), + verbose = FALSE, + ...)) + }, + stop("Unknown method: ", method, call. = FALSE) + ) +} + +#─ Split a matrix into equal‑sized column chunks ------------------------------ +.split_cols <- function(mat, chunk) { + if (ncol(mat) <= chunk) return(list(mat)) + idx <- split(seq_len(ncol(mat)), ceiling(seq_len(ncol(mat)) / chunk)) + lapply(idx, function(i) mat[, i, drop = FALSE]) +} + +.match_summary_fun <- function(fun) { + if (is.function(fun)) return(fun) + + if (!is.character(fun) || length(fun) != 1L) + stop("'summary.fun' must be a single character or a function") + + kw <- tolower(fun) + fn <- switch(kw, + mean = base::mean, + median = stats::median, + max = base::max, + sum = base::sum, + geometric = function(x) exp(mean(log(x + 1e-6))), + stop("Unsupported summary keyword: ", fun)) + attr(fn, "keyword") <- kw # tag for fast matrixStats branch + fn +} + +.computeRunningES <- function(gene.order, hits, weight = NULL) { + N <- length(gene.order) + hit <- gene.order %in% hits + Nh <- sum(hit) + Nm <- N - Nh + if (is.null(weight)) weight <- rep(1, Nh) + + Phit <- rep(0, N) + Phit[hit] <- weight / sum(weight) + Pmiss <- rep(-1 / Nm, N) + cumsum(Phit + Pmiss) +} + + +# Modified from GSVA +#' @importFrom MatrixGenerics rowSds +.filterFeatures <- function(expr) { + sdGenes <- rowSds(expr) + sdGenes[sdGenes < 1e-10] <- 0 + if (any(sdGenes == 0) || any(is.na(sdGenes))) { + expr <- expr[sdGenes > 0 & !is.na(sdGenes), ] + } + + if (nrow(expr) < 2) + stop("Less than two genes in the input assay object\n") + + if(is.null(rownames(expr))) + stop("The input assay object doesn't have rownames\n") + expr +} + +# Parallel-aware lapply +.plapply <- function(X, FUN, ..., BPPARAM = NULL, parallel = TRUE) { + if (parallel && requireNamespace("BiocParallel", quietly = TRUE)) { + if (is.null(BPPARAM)) { + BPPARAM <- BiocParallel::SerialParam() } - split.data[[i]] <- vector[min:max] - min <- max + 1 + BiocParallel::bplapply(X, FUN, ..., BPPARAM = BPPARAM) + } else { + lapply(X, FUN, ...) } - return(split.data) } +utils::globalVariables(c( + "ES", "grp", "x", "y", "xend", "yend", "group", "value", "variable", + "gene.set.query", "index" +)) + diff --git a/inst/WORDLIST b/inst/WORDLIST index 86006a4..9602499 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -1,72 +1,143 @@ AUCell Azizi +BH +BIOCARTA +BPPARAM +Bcells BioC +BiocParallel +CGN CGP CMD +CPM Codecov Commun +DESeq DietSeurat +Directionality +FC +FindMarkers GSEA GSEABase GSVA GeneSetCollection +Hexplots +IGH +IMMUNESIGDB +KEGG MSigDB MasterPCAPlot -NFKB +NES NG Nebulosa -PMID +OpenMP +PNAS +Parallelization +Precomputed Releveling +SCE SCS +SYM +ScoreSignatures +SeuratObject SingleCellExperiment -TNFA +Subramanian +Tcells TukeyHSD UCell Vishwakarma +Visualisation +Visualising Voigt al args bioconductor biocparallel -compoenents +calcAUC +centred +cnet +colData +colour +coloured densityEnrichment -dev +df +dimRed +edgeR +edgeR’s eigen enrichIt enrichmentPlot et +expr factoextra +fgsea frac +geneRatio +geoms getGeneSets getSignficance getSignificance geyserEnrichment ggplot ggpointdensity +ggraph ggrepel github +grey +gseaEnrichment gsva -hcl heatmapEnrichment hexbin +https +ident jk -kernal +leadingEdge limma +linewidth lm +loadings +logFC masterPCAPlot +microenvironment +msigdb msigdbr +multithread +musculus +ncbi +nih +nlm +normalise +normalised +padj pbmc +pcaEnrichment performNormalization performPCA +phenotypes +plyr +pubmed +pval reclustering +rescaling ridgeEnrichment rlang runEscape runPCA +scRNA scater's scatterEnrichment singScore singscore splitEnrichment ssGSEA +standardises +stringr +stromal +subcollection +summarization +tibble +tidyverse +vectorised +visualisation wilcoxon +’s diff --git a/man/densityEnrichment.Rd b/man/densityEnrichment.Rd index 66afa7e..88f83b7 100644 --- a/man/densityEnrichment.Rd +++ b/man/densityEnrichment.Rd @@ -2,32 +2,36 @@ % Please edit documentation in R/densityEnrichment.R \name{densityEnrichment} \alias{densityEnrichment} -\title{Visualize the mean density ranking of genes across gene set} +\title{Visualize Mean Density Ranking of Genes Across Gene Sets} \usage{ densityEnrichment( input.data, - gene.set.use = NULL, - gene.sets = NULL, + gene.set.use, + gene.sets, group.by = NULL, + rug.height = 0.02, palette = "inferno" ) } \arguments{ -\item{input.data}{The single-cell object to use.} +\item{input.data}{A \link[SeuratObject]{Seurat} object or a +\link[SingleCellExperiment]{SingleCellExperiment}.} -\item{gene.set.use}{Selected individual gene set.} +\item{gene.set.use}{Character(1). Name of the gene set to display.} -\item{gene.sets}{The gene set library to use to extract -the individual gene set information from.} +\item{gene.sets}{A named list of character vectors, the result of +[getGeneSets()], or the built-in data object [escape.gene.sets].} -\item{group.by}{Categorical parameter to plot along the x.axis. If input is -a single-cell object the default will be cluster.} +\item{group.by}{Metadata column. Defaults to the Seurat/SCE `ident` +slot when `NULL`.} -\item{palette}{Colors to use in visualization - input any -\link[grDevices]{hcl.pals}.} +\item{rug.height}{Vertical spacing of the hit rug as a fraction of the +y-axis (default `0.02`).} + +\item{palette}{Character. Any palette from \code{\link[grDevices]{hcl.pals}}.} } \value{ -ggplot2 object mean rank gene density across groups +A `patchwork`/`ggplot2` object. } \description{ This function allows to the user to examine the mean ranking @@ -36,12 +40,13 @@ the density function to display the relative position and distribution of rank. } \examples{ -GS <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), +gs <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), Tcells = c("CD3E", "CD3D", "CD3G", "CD7","CD8A")) + pbmc_small <- SeuratObject::pbmc_small densityEnrichment(pbmc_small, gene.set.use = "Tcells", - gene.sets = GS) + gene.sets = gs) } diff --git a/man/enrichIt.Rd b/man/enrichIt.Rd new file mode 100644 index 0000000..d2cdea0 --- /dev/null +++ b/man/enrichIt.Rd @@ -0,0 +1,78 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/enrichIt.R +\name{enrichIt} +\alias{enrichIt} +\title{Flexible GSEA for Precomputed Gene Lists} +\usage{ +enrichIt( + input.data, + gene.sets, + gene_col = NULL, + logFC_col = "avg_log2FC", + pval_col = c("p_val_adj", "p_val"), + ranking_fun = c("signed_log10_p", "logFC"), + pval_cutoff = 1, + logFC_cutoff = 0, + minSize = 5, + maxSize = 500, + padjust_method = "BH", + nproc = 0 +) +} +\arguments{ +\item{input.data}{Either +• a named numeric vector **already ranked**, *or* +• a data.frame/tibble with one row per gene and columns containing + log-fold-change and *p*-value. If the gene ID is not in `rownames(data)`, + supply `gene_col`.} + +\item{gene.sets}{AA named list of character vectors, the result of +[getGeneSets()], or the built-in data object [escape.gene.sets].} + +\item{gene_col}{Name of the column holding gene identifiers (ignored when +they are row-names). Default `NULL`.} + +\item{logFC_col, pval_col}{Column names for logFC and *p* (or adj.*p*) +– defaults match Seurat’s `FindMarkers()`.} + +\item{ranking_fun}{How to build the ranking: `"signed_log10_p"` (default) +or `"logFC"`.} + +\item{pval_cutoff, logFC_cutoff}{Filters applied **before** ranking.} + +\item{minSize, maxSize}{Integer. Minimum / maximum pathway size passed to +*fgsea* (default 5 / 500).} + +\item{padjust_method}{Multiple-testing correction; any method accepted by +[stats::p.adjust()] (default `"BH"`).} + +\item{nproc}{Passed to **fgsea** (`0` = multithread if OpenMP available).} +} +\value{ +`data.frame` with the usual fgsea columns plus a convenient +`leadingEdge` character column collapsed with \";\". +} +\description{ +A convenience front-end to **fgsea** that lets you point at the +`avg_log2FC` and `p_val_adj` columns coming out of Seurat / DESeq2 / +edgeR etc. It converts them to a signed -log10(*p*) ranking, filters on +significance / effect size, and then runs fgsea. +} +\examples{ +pbmc_small <- SeuratObject::pbmc_small + +Seurat::Idents(pbmc_small) <- "groups" +markers <- Seurat::FindMarkers(pbmc_small, + ident.1 = "g1", + ident.2 = "g2") + +gs <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), + Tcells = c("CD3E", "CD3D", "CD3G", "CD7","CD8A")) + +gsea <- enrichIt(markers, + gene.sets = gs) + +} +\seealso{ +[fgsea::fgsea()], [getGeneSets()], [gseaEnrichment()] +} diff --git a/man/enrichItPlot.Rd b/man/enrichItPlot.Rd new file mode 100644 index 0000000..7b94ea6 --- /dev/null +++ b/man/enrichItPlot.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/enrichItPlot.R +\name{enrichItPlot} +\alias{enrichItPlot} +\title{Adaptive visualisation of enrichIt results} +\usage{ +enrichItPlot( + res, + plot.type = c("bar", "dot", "cnet"), + top = 20, + x.measure = "-log10(padj)", + color.measure = x.measure, + show.counts = TRUE, + palette = "inferno", + ... +) +} +\arguments{ +\item{res}{`data.frame` returned by [enrichIt()].} + +\item{plot.type}{`"bar"`, `"dot"`, or `"cnet"`.} + +\item{top}{Integer. Keep the top *n* terms **per database** +(ranked by adjusted *p*). Set to `Inf` to keep all.} + +\item{x.measure}{A column in `res` mapped to the *x*-axis +(ignored for `"cnet"`). Default `"-log10(padj)"`.} + +\item{color.measure}{Column mapped to color (dot plot only). +Default same as `x.measure`.} + +\item{show.counts}{Logical. Annotate bar plot with the `Count` (number of genes).} + +\item{palette}{palette Character. Any palette from \code{\link[grDevices]{hcl.pals}}.} + +\item{...}{Further arguments passed to **ggplot2** geoms (e.g. +`alpha`, `linewidth`).} +} +\value{ +A **patchwork** object (bar / dot) or **ggraph** object (cnet). +} +\description{ +Adaptive visualisation of enrichIt results +} +\examples{ +\dontrun{ +ranks <- setNames(markers$avg_log2FC, rownames(markers)) +gs <- getGeneSets("Homo sapiens", library = c("H", "C2")) +res <- enrichIt(ranks, gs) + +enrichItPlot(res) +enrichItPlot(res, "dot", top=10) +enrichItPlot(res, "cnet", top=5) +} +} diff --git a/man/escape.gene.sets.Rd b/man/escape.gene.sets.Rd index 37070cc..b134c9b 100644 --- a/man/escape.gene.sets.Rd +++ b/man/escape.gene.sets.Rd @@ -4,8 +4,30 @@ \name{escape.gene.sets} \alias{escape.gene.sets} \title{Built-In Gene Sets for escape} +\source{ +Supplementary Table S3 in Azizi *et al.* (2018) + +} +\usage{ +data("escape.gene.sets") +} \description{ -A list of gene sets derived from Azizi, et al 2018 -\href{https://pubmed.ncbi.nlm.nih.gov/29961579/}{PMID: 29961579}) -relating to tumor immunity. +`escape.gene.sets` ships with **escape** and provides a convenient set of +cell-type and pathway signatures from the scRNA-seq tumor micro-environment +study by Azizi *et al.* (2018, Cell \doi{10.1016/j.cell.2018.06.021}). These +signatures capture major immune and stromal populations observed across +breast-cancer samples and serve as a lightweight default for quick testing or +exploratory analyses. +} +\details{ +The original paper defined cell-type signatures as the top differentially +expressed genes per cluster (Azizi *et al.*, Supplementary Table S3). +} +\references{ +Azizi E, *et al.* **Single-cell map of diverse immune phenotypes in the +breast tumor microenvironment.** *Cell* 173(5):1293-1308 (2018). +} +\seealso{ +[runEscape()], [escape.matrix()], [getGeneSets()] } +\keyword{datasets} diff --git a/man/escape.matrix.Rd b/man/escape.matrix.Rd index 27c5709..68ac226 100644 --- a/man/escape.matrix.Rd +++ b/man/escape.matrix.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/runEscape.R \name{escape.matrix} \alias{escape.matrix} -\title{Calculate gene set enrichment scores} +\title{Calculate Single-Cell Gene-Set Enrichment Scores} \usage{ escape.matrix( input.data, @@ -12,54 +12,93 @@ escape.matrix( min.size = 5, normalize = FALSE, make.positive = FALSE, - BPPARAM = SerialParam(), + min.expr.cells = 0, + min.filter.by = NULL, + BPPARAM = NULL, ... ) } \arguments{ -\item{input.data}{The count matrix, Seurat, or Single-Cell Experiment object.} +\item{input.data}{A raw‐counts matrix (`genes × cells`), a +\link[SeuratObject]{Seurat} object, or a +\link[SingleCellExperiment]{SingleCellExperiment}. Gene identifiers must +match those in `gene.sets`.} -\item{gene.sets}{Gene sets can be a list, output from -\code{\link{getGeneSets}}, or the built-in gene sets -in the escape package \code{\link{escape.gene.sets}}.} +\item{gene.sets}{A named list of character vectors, the result of +[getGeneSets()], or the built-in data object [escape.gene.sets]. +List names become column names in the result.} -\item{method}{Select the method to calculate enrichment, \strong{AUCell}, -\strong{GSVA}, \strong{ssGSEA} or \strong{UCell}.} +\item{method}{Scoring algorithm (case-insensitive). One of `"GSVA"`, +`"ssGSEA"`, `"UCell"`, or `"AUCell"`. Default **`"ssGSEA"`**.} -\item{groups}{The number of cells to separate the enrichment calculation.} +\item{groups}{Integer ≥ 1. Number of cells per processing chunk. +Larger values reduce overhead but increase memory usage. Default **1000**.} -\item{min.size}{Minimum number of gene necessary to perform the enrichment -calculation} +\item{min.size}{Minimum number of genes from a set that must be detected +in the expression matrix for that set to be scored. Default **5**. +Use `NULL` to disable filtering.} -\item{normalize}{Whether to divide the enrichment score by the number -of genes \strong{TRUE} or report unnormalized \strong{FALSE}.} +\item{normalize}{Logical. If `TRUE`, the score matrix is passed to +[performNormalization()] (drop-out scaling and optional log transform). +Default **FALSE**.} -\item{make.positive}{During normalization shift enrichment values to a -positive range \strong{TRUE} for downstream analysis or not -\strong{TRUE} (default). Will only be applied if \strong{normalize = TRUE}.} +\item{make.positive}{Logical. If `TRUE` *and* `normalize = TRUE`, shifts +every gene-set column so its global minimum is zero, facilitating +downstream log-ratio analyses. Default **FALSE**.} -\item{BPPARAM}{A BiocParallel::bpparam() object that for parallelization.} +\item{min.expr.cells}{Numeric. Gene-expression filter threshold (see +details above). Default **0** (no gene filtering).} -\item{...}{pass arguments to AUCell GSVA, ssGSEA, or UCell call} +\item{min.filter.by}{Character or `NULL`. Column name in `meta.data` +(Seurat) or `colData` (SCE) defining groups within which the +`min.expr.cells` rule is applied. Default **`NULL`**.} + +\item{BPPARAM}{A \pkg{BiocParallel} parameter object describing the +parallel backend.} + +\item{...}{Extra arguments passed verbatim to the chosen back-end +scoring function (`gsva()`, `ScoreSignatures_UCell()`, or +`AUCell_calcAUC()`).} } \value{ -matrix of enrichment scores +A numeric matrix with one row per cell and one column per gene set, +ordered as in `gene.sets`. } \description{ -This function allows users to input both the single-cell RNA-sequencing -counts and output the enrichment scores as a matrix. +`escape.matrix()` computes per-cell enrichment for arbitrary gene-set +collections using one of four scoring back-ends and returns a dense numeric +matrix (cells × gene-sets). The expression matrix is processed in +user-defined *chunks* (`groups`) so that memory use remains predictable; +each chunk is dispatched in parallel via a \pkg{BiocParallel} `BPPARAM` +backend. Heavy engines (\pkg{GSVA}, \pkg{UCell}, \pkg{AUCell}) are loaded +lazily, keeping them in the package’s \strong{Suggests} field. +} +\section{Supported methods}{ + +\describe{ + \item{`"GSVA"`}{Gene-set variation analysis (Poisson kernel).} + \item{`"ssGSEA"`}{Single-sample GSEA.} + \item{`"UCell"`}{Rank-based UCell scoring.} + \item{`"AUCell"`}{Area-under-the-curve ranking score.} } +} + \examples{ -GS <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), +gs <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), Tcells = c("CD3E", "CD3D", "CD3G", "CD7","CD8A")) -pbmc_small <- SeuratObject::pbmc_small -ES <- escape.matrix(pbmc_small, - gene.sets = GS, - min.size = NULL) + +pbmc <- SeuratObject::pbmc_small +es <- escape.matrix(pbmc, + gene.sets = gs, + method = "ssGSEA", + groups = 500, + min.size = 3) } \seealso{ -\code{\link{getGeneSets}} to collect gene sets. +[runEscape()] to attach scores to a single-cell object; +[getGeneSets()] for MSigDB retrieval; [performNormalization()] for the +optional normalization workflow. } \author{ Nick Borcherding, Jared Andrews diff --git a/man/getGeneSets.Rd b/man/getGeneSets.Rd index 63d891d..19be69a 100644 --- a/man/getGeneSets.Rd +++ b/man/getGeneSets.Rd @@ -5,7 +5,7 @@ \title{Get a collection of gene sets from the msigdb} \usage{ getGeneSets( - species = "Homo sapiens", + species = c("Homo sapiens", "Mus musculus"), library = NULL, subcategory = NULL, gene.sets = NULL, @@ -14,24 +14,22 @@ getGeneSets( ) } \arguments{ -\item{species}{The scientific name of the species of interest; only -"Homo sapiens" or "Mus musculus" are supported.} +\item{species}{`"Homo sapiens"` (default) or `"Mus musculus"`.} -\item{library}{A character vector of main collections (e.g. "H", "C5"). -If provided, only gene sets in these collections are returned.} +\item{library}{Character. Optional vector of main collection codes +(e.g. `"H"`, `"C5"`).} -\item{subcategory}{A character vector specifying sub-collection abbreviations -(e.g. "CGP", "CP:REACTOME") to further subset the gene sets.} +\item{subcategory}{Character. Optional vector of sub-collection codes +(e.g. `"GO:BP"`).} -\item{gene.sets}{A character vector of specific gene set names to select. -This filter is applied after other subsetting.} +\item{gene.sets}{Character. Optional vector of specific gene-set names.} -\item{version}{The version of MSigDB to use (default "7.4").} +\item{version}{MSigDB version (character, default `"7.4"`).} -\item{id}{The gene identifier type to use (default "SYM" for gene symbols).} +\item{id}{Identifier type (default `"SYM"` for symbols).} } \value{ -A named list of gene identifiers for each gene set. +A named `list` of character vectors (gene IDs). } \description{ This function retrieves gene sets from msigdb and caches the downloaded object @@ -50,4 +48,5 @@ gs <- getGeneSets(species = "Homo sapiens", library = c("C2", "C5"), subcategory = "GO:BP") } + } diff --git a/man/geyserEnrichment.Rd b/man/geyserEnrichment.Rd index 4ec6431..db5370a 100644 --- a/man/geyserEnrichment.Rd +++ b/man/geyserEnrichment.Rd @@ -2,13 +2,13 @@ % Please edit documentation in R/geyserEnrichment.R \name{geyserEnrichment} \alias{geyserEnrichment} -\title{Generate a ridge plot to examine enrichment distributions} +\title{Visualize Enrichment Distributions Using Geyser Plots} \usage{ geyserEnrichment( input.data, assay = NULL, group.by = NULL, - gene.set = NULL, + gene.set, color.by = "group", order.by = NULL, scale = FALSE, @@ -17,51 +17,52 @@ geyserEnrichment( ) } \arguments{ -\item{input.data}{Enrichment output from \code{\link{escape.matrix}} or -\code{\link{runEscape}}.} +\item{input.data}{Output of \code{\link{escape.matrix}} or a single‑cell +object previously processed by \code{\link{runEscape}}.} -\item{assay}{Name of the assay to plot if data is a single-cell object.} +\item{assay}{Name of the assay holding enrichment scores when +`input.data` is a single‑cell object. Ignored otherwise.} -\item{group.by}{Categorical parameter to plot along the x.axis. If input is -a single-cell object the default will be cluster.} +\item{group.by}{Metadata column plotted on the *x*‑axis. Defaults to the +Seurat/SCE `ident` slot when `NULL`.} -\item{gene.set}{Gene set to plot (on y-axis).} +\item{gene.set}{Character(1). Gene‑set to plot (must exist in the +enrichment matrix).} -\item{color.by}{How the color palette applies to the graph - can -be \strong{"group"} for a categorical color palette based on the -\strong{group.by} parameter or use the \strong{gene.set} name if wanting to -apply a gradient palette.} +\item{color.by}{Aesthetic mapped to point color. Use either +*"group"* (default = `group.by`) for categorical coloring or the +*name of a gene‑set* (e.g. same as `gene.set`) to obtain a numeric +accepted.} -\item{order.by}{Method to organize the x-axis: \strong{"mean"} will arrange -the x-axis by the mean of the gene.set, while \strong{"group"} will arrange -the x-axis by in alphanumerical order. Using \strong{NULL} will not reorder -the x-axis.} +\item{order.by}{How to arrange the x‑axis: +*`"mean"`* – groups ordered by decreasing group mean; +*`"group"`* – natural sort of group labels; +*`NULL`* – keep original ordering.} -\item{scale}{Visualize raw values \strong{FALSE} or Z-transform -enrichment values \strong{TRUE}.} +\item{scale}{Logical; if `TRUE` scores are centered/scaled (Z‑score) prior +to plotting.} -\item{facet.by}{Variable to facet the plot into n distinct graphs.} +\item{facet.by}{Optional metadata column used to facet the plot.} -\item{palette}{Colors to use in visualization - input any -\link[grDevices]{hcl.pals}.} +\item{palette}{Character. Any palette from \code{\link[grDevices]{hcl.pals}}.} } \value{ -ggplot2 object with geyser-based distributions of selected gene.set +A \pkg{ggplot2} object. } \description{ This function allows to the user to examine the distribution of -enrichment across groups by generating a ridge plot. +enrichment across groups by generating a geyser plot. } \examples{ -GS <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), +gs <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), Tcells = c("CD3E", "CD3D", "CD3G", "CD7","CD8A")) -pbmc_small <- SeuratObject::pbmc_small -pbmc_small <- runEscape(pbmc_small, - gene.sets = GS, - min.size = NULL) - -geyserEnrichment(pbmc_small, - assay = "escape", + +pbmc <- SeuratObject::pbmc_small |> + runEscape(gene.sets = gs, + min.size = NULL) + +geyserEnrichment(pbmc, + assay = "escape", gene.set = "Tcells") } diff --git a/man/gseaEnrichment.Rd b/man/gseaEnrichment.Rd new file mode 100644 index 0000000..8e2324c --- /dev/null +++ b/man/gseaEnrichment.Rd @@ -0,0 +1,86 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gseaEnrichment.R +\name{gseaEnrichment} +\alias{gseaEnrichment} +\title{Classical GSEA-style Running-Enrichment Plot} +\usage{ +gseaEnrichment( + input.data, + gene.set.use, + gene.sets, + group.by = NULL, + summary.fun = "mean", + p = 1, + nperm = 1000, + rug.height = 0.02, + digits = 2, + BPPARAM = NULL, + palette = "inferno" +) +} +\arguments{ +\item{input.data}{A \link[SeuratObject]{Seurat} object or a +\link[SingleCellExperiment]{SingleCellExperiment}.} + +\item{gene.set.use}{Character(1). Name of the gene set to display.} + +\item{gene.sets}{A named list of character vectors, the result of +[getGeneSets()], or the built-in data object [escape.gene.sets].} + +\item{group.by}{Metadata column. Defaults to the Seurat/SCE `ident` +slot when `NULL`.} + +\item{summary.fun}{Method used to collapse expression within each +group **before** ranking: one of `"mean"` (default), `"median"`, `"max"`, +`"sum"`, or `"geometric"`.} + +\item{p}{Weighting exponent in the KS statistic (classical GSEA uses `p = 1`).} + +\item{nperm}{Integer ≥ 0. Gene-label permutations per group (default 1000). +`0` value will skip NES/*p* calculation.} + +\item{rug.height}{Vertical spacing of the hit rug as a fraction of the +y-axis (default `0.02`).} + +\item{digits}{Number of decimal places displayed for ES in the +legend (default `2`).} + +\item{BPPARAM}{A \pkg{BiocParallel} parameter object describing the +parallel backend.} + +\item{palette}{Character. Any palette from \code{\link[grDevices]{hcl.pals}}.} +} +\value{ +A single `patchwork`/`ggplot2` object +} +\description{ +Produces the familiar two-panel GSEA graphic—running enrichment score +(RES) plus a “hit” rug—for a **single gene-set** evaluated across +multiple biological groups (clusters, conditions, samples, ...). +} +\details{ +**Algorithm (Subramanian _et al._, PNAS 2005)** +1. Within every group, library-size-normalise counts to CPM. +2. Collapse gene expression with `summary.fun` (mean/median/…). +3. Rank genes (descending) to obtain one ordered list per group. +4. Compute the weighted Kolmogorov–Smirnov running score + (weight = \|stat\|^*p*). +5. ES = maximum signed deviation of the curve. +} +\examples{ +pbmc_small <- SeuratObject::pbmc_small + +gs <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), + Tcells = c("CD3E", "CD3D", "CD3G", "CD7","CD8A")) + +gseaEnrichment(pbmc_small, + gene.set.use = "Bcells", + gene.sets = gs, + group.by = "groups", + summary.fun = "mean", + digits = 3) + +} +\seealso{ +\code{\link{escape.matrix}}, \code{\link{densityEnrichment}} +} diff --git a/man/heatmapEnrichment.Rd b/man/heatmapEnrichment.Rd index cee67c9..ea10eda 100644 --- a/man/heatmapEnrichment.Rd +++ b/man/heatmapEnrichment.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/heatmapEnrichment.R \name{heatmapEnrichment} \alias{heatmapEnrichment} -\title{Generate a heatmap to visualize enrichment values} +\title{Visualize Enrichment Value Summaries Using Heatmaps} \usage{ heatmapEnrichment( input.data, @@ -18,34 +18,32 @@ heatmapEnrichment( ) } \arguments{ -\item{input.data}{Enrichment output from \code{\link{escape.matrix}} or -\code{\link{runEscape}}.} +\item{input.data}{Output of \code{\link{escape.matrix}} or a single‑cell +object previously processed by \code{\link{runEscape}}.} -\item{assay}{Name of the assay to plot if data is a single-cell object.} +\item{assay}{Name of the assay holding enrichment scores when +`input.data` is a single‑cell object. Ignored otherwise.} -\item{group.by}{Categorical parameter to plot along the x.axis. If input is -a single-cell object the default will be cluster.} +\item{group.by}{Metadata column plotted on the *x*‑axis. Defaults to the +Seurat/SCE `ident` slot when `NULL`.} -\item{gene.set.use}{Selected gene sets to visualize. If \strong{"all"}, the -heatmap will be generated across all gene sets.} +\item{gene.set.use}{Vector of gene‑set names to plot, or \code{"all"} +(default) to show every available gene set.} -\item{cluster.rows}{Use Euclidean distance to order the row values.} +\item{cluster.rows, cluster.columns}{Logical; if \code{TRUE}, rows/columns +are ordered by Ward‑linkage hierarchical clustering (Euclidean distance).} -\item{cluster.columns}{Use Euclidean distance to order the column values.} +\item{facet.by}{Optional metadata column used to facet the plot.} -\item{facet.by}{Variable to facet the plot into n distinct graphs.} +\item{scale}{If \code{TRUE}, Z‑transforms each gene‑set column **after** +summarization.} -\item{scale}{Visualize raw values \strong{FALSE} or Z-transform -enrichment values \strong{TRUE}.} +\item{summary.stat}{Method used to summarize expression within each} -\item{summary.stat}{Use \strong{'median'} or \strong{'mean'} values -to display.} - -\item{palette}{Colors to use in visualization - input any -\link[grDevices]{hcl.pals}.} +\item{palette}{Character. Any palette from \code{\link[grDevices]{hcl.pals}}.} } \value{ -ggplot2 object with heatmap of mean enrichment values +A \code{ggplot2} object. } \description{ This function allows to the user to examine the heatmap with the mean @@ -53,14 +51,12 @@ enrichment values by group. The heatmap will have the gene sets as rows and columns will be the grouping variable. } \examples{ -GS <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), +gs <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), Tcells = c("CD3E", "CD3D", "CD3G", "CD7","CD8A")) -pbmc_small <- SeuratObject::pbmc_small -pbmc_small <- runEscape(pbmc_small, - gene.sets = GS, - min.size = NULL) - -heatmapEnrichment(pbmc_small, - assay = "escape") + +pbmc <- SeuratObject::pbmc_small |> + runEscape(gene.sets = gs, min.size = NULL) + +heatmapEnrichment(pbmc, assay = "escape", palette = "viridis") } diff --git a/man/pcaEnrichment.Rd b/man/pcaEnrichment.Rd index e1d3ddf..4e1e84c 100644 --- a/man/pcaEnrichment.Rd +++ b/man/pcaEnrichment.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/pcaEnrichment.R \name{pcaEnrichment} \alias{pcaEnrichment} -\title{Visualize the PCA of enrichment values} +\title{Visualize the PCA of Enrichment Values} \usage{ pcaEnrichment( input.data, @@ -10,7 +10,7 @@ pcaEnrichment( x.axis = "PC1", y.axis = "PC2", facet.by = NULL, - style = "point", + style = c("point", "hex"), add.percent.contribution = TRUE, display.factors = FALSE, number.of.factors = 10, @@ -18,37 +18,29 @@ pcaEnrichment( ) } \arguments{ -\item{input.data}{PCA from \code{\link{performPCA}}.} +\item{input.data}{Single‑cell object (Seurat / SCE) **or** the raw list +returned by [`performPCA()`].} -\item{dimRed}{Name of the dimensional reduction to plot if data is a single-cell object.} +\item{dimRed}{Name of the dimensional‑reduction slot to pull from a +single‑cell object. Ignored when `input.data` is the list output.} -\item{x.axis}{Component to plot on the x.axis.} +\item{x.axis, y.axis}{Character vectors naming the PCs to display (e.g. "PC1").} -\item{y.axis}{Component set to plot on the y.axis.} +\item{facet.by}{Metadata column to facet plot.} -\item{facet.by}{Variable to facet the plot into n distinct graphs.} +\item{style}{"point" (default) or "hex".} -\item{style}{Return a \strong{"hex"} bin plot or a \strong{"point"}-based plot.} +\item{add.percent.contribution}{Include percent variance explained in axis +labels.} -\item{add.percent.contribution}{Add the relative percent of contribution of the -selected components to the axis labels.} +\item{display.factors}{Draw arrows for the top gene‑set loadings.} -\item{display.factors}{Add an arrow overlay to show the direction and magnitude of individual -gene sets on the PCA dimensions.} +\item{number.of.factors}{Integer; how many loadings to display if +`display.factors = TRUE`.} -\item{number.of.factors}{The number of gene.sets to display on the overlay.} +\item{palette}{Character. Any palette from \code{\link[grDevices]{hcl.pals}}. -\item{palette}{Colors to use in visualization - input any -\link[grDevices]{hcl.pals}.} -} -\value{ -ggplot2 object with PCA distribution -} -\description{ -This function allows to the user to examine the distribution -of principal components run on the enrichment values. -} -\examples{ +#' @examples GS <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), Tcells = c("CD3E", "CD3D", "CD3G", "CD7","CD8A")) pbmc_small <- SeuratObject::pbmc_small @@ -62,6 +54,12 @@ pbmc_small <- performPCA(pbmc_small, pcaEnrichment(pbmc_small, x.axis = "PC1", y.axis = "PC2", - dimRed = "escape.PCA") - + dimRed = "escape.PCA")} +} +\value{ +A **ggplot2** object. +} +\description{ +This function allows to the user to examine the distribution +of principal components run on the enrichment values. } diff --git a/man/performNormalization.Rd b/man/performNormalization.Rd index fb9e506..6a8da66 100644 --- a/man/performNormalization.Rd +++ b/man/performNormalization.Rd @@ -5,7 +5,7 @@ \title{Perform Normalization on Enrichment Data} \usage{ performNormalization( - sc.data, + input.data, enrichment.data = NULL, assay = "escape", gene.sets = NULL, @@ -15,48 +15,50 @@ performNormalization( ) } \arguments{ -\item{sc.data}{Single-cell object or matrix used in the gene set enrichment calculation in -\code{\link{escape.matrix}} or \code{\link{runEscape}}.} +\item{input.data}{raw‐counts matrix (`genes × cells`), a +\link[SeuratObject]{Seurat} object, or a +\link[SingleCellExperiment]{SingleCellExperiment}. Gene identifiers must +match those in `gene.sets`.} -\item{enrichment.data}{The enrichment results from \code{\link{escape.matrix}} -or \code{\link{runEscape}} (optional)} +\item{enrichment.data}{Output of \code{\link{escape.matrix}} or a single‑cell +object previously processed by \code{\link{runEscape}}.} -\item{assay}{Name of the assay to normalize if using a single-cell object} +\item{assay}{Name of the assay holding enrichment scores when +`input.data` is a single‑cell object. Ignored otherwise.} -\item{gene.sets}{The gene set library to use to extract -the individual gene set information from} +\item{gene.sets}{A named list of character vectors, the result of +[getGeneSets()], or the built-in data object [escape.gene.sets]. +List names become column names in the result.} -\item{make.positive}{Shift enrichment values to a positive range \strong{TRUE} -for downstream analysis or not \strong{TRUE} (default).} +\item{make.positive}{Logical; if `TRUE` shifts each column so its minimum is +zero.} -\item{scale.factor}{A vector to use for normalizing enrichment scores per cell.} +\item{scale.factor}{Optional numeric vector overriding gene‑count scaling +(length = #cells). Use when you want external per‑cell normalization factors.} -\item{groups}{the number of cells to calculate normalization on at once. -chunks matrix into groups sized chunks. Useful in case of memory issues.} +\item{groups}{Integer ≥ 1. Number of cells per processing chunk. +Larger values reduce overhead but increase memory usage. Default **1000**.} } \value{ -Single-cell object or matrix of normalized enrichment scores +If `input.data` is an object, the same object with a new assay + "_normalized". Otherwise a matrix of normalized scores. } \description{ -This function allows users to normalize the enrichment calculations -by accounting for single-cell dropout and producing positive -values for downstream differential enrichment analyses. Default calculation -uses will scale the enrichment values by the number of genes present from -the gene set and then use a natural log transformation. A positive range -values is useful for several downstream analyses, like differential -evaluation for log2-fold change, but will alter the original -enrichment values. +Scales each enrichment value by the **number of genes from the set that are +expressed** in that cell (non‑zero counts). Optionally shifts results into a +positive range and/or applies a natural‑log transform for compatibility with +log‑based differential tests. } \examples{ -GS <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), +gs <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), Tcells = c("CD3E", "CD3D", "CD3G", "CD7","CD8A")) -pbmc_small <- SeuratObject::pbmc_small -pbmc_small <- runEscape(pbmc_small, - gene.sets = GS, - min.size = NULL) + +pbmc <- SeuratObject::pbmc_small |> + runEscape(gene.sets = gs, + min.size = NULL) -pbmc_small <- performNormalization(pbmc_small, - assay = "escape", - gene.sets = GS) +pbmc <- performNormalization(pbmc, + assay = "escape", + gene.sets = gs) } diff --git a/man/performPCA.Rd b/man/performPCA.Rd index 8da1c0f..72b2753 100644 --- a/man/performPCA.Rd +++ b/man/performPCA.Rd @@ -6,31 +6,33 @@ \usage{ performPCA( input.data, - assay = NULL, + assay = "escape", scale = TRUE, - n.dim = 1:10, + n.dim = 10, reduction.name = "escape.PCA", - reduction.key = "PCA" + reduction.key = "escPC_" ) } \arguments{ -\item{input.data}{Enrichment output from \code{\link{escape.matrix}} or -\code{\link{runEscape}}.} +\item{input.data}{Output of \code{\link{escape.matrix}} or a single‑cell +object previously processed by \code{\link{runEscape}}.} -\item{assay}{Name of the assay to plot if data is a single-cell object.} +\item{assay}{Name of the assay holding enrichment scores when +`input.data` is a single‑cell object. Ignored otherwise.} -\item{scale}{Standardize the enrichment value (\strong{TRUE}) or -not (\strong{FALSE})} +\item{scale}{Logical; if `TRUE` standardises each gene-set column +before PCA.} -\item{n.dim}{The number of components to calculate.} +\item{n.dim}{Integer ≥1 or vector; the **largest** value sets the +number of principal components to compute / keep.} -\item{reduction.name}{Name of the reduced dimensions object to add if -data is a single-cell object.} - -\item{reduction.key}{Name of the key to use with the components.} +\item{reduction.name, reduction.key}{Names used when writing back to a +Seurat / SCE object.} } \value{ -single-cell object or list with PCA components to plot. +*If* `input.data` is a single-cell object, the same object with a + new dimensional-reduction slot. *Otherwise* a list with + `PCA`, `eigen_values`, `contribution`, and `rotation`. } \description{ This function allows users to calculate the principal components @@ -42,14 +44,14 @@ workflow in lieu of using \code{\link{performPCA}}, but will not be compatible with downstream \code{\link{pcaEnrichment}} visualization. } \examples{ -GS <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), +gs <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), Tcells = c("CD3E", "CD3D", "CD3G", "CD7","CD8A")) -pbmc_small <- SeuratObject::pbmc_small -pbmc_small <- runEscape(pbmc_small, - gene.sets = GS, - min.size = NULL) + +pbmc <- SeuratObject::pbmc_small |> + runEscape(gene.sets = gs, + min.size = NULL) -pbmc_small <- performPCA(pbmc_small, - assay = "escape") +pbmc <- performPCA(pbmc, + assay = "escape") } diff --git a/man/ridgeEnrichment.Rd b/man/ridgeEnrichment.Rd index 43667a3..b37cee1 100644 --- a/man/ridgeEnrichment.Rd +++ b/man/ridgeEnrichment.Rd @@ -2,13 +2,13 @@ % Please edit documentation in R/ridgeEnrichment.R \name{ridgeEnrichment} \alias{ridgeEnrichment} -\title{Visualize enrichment results with a ridge plot} +\title{Visualize Enrichment Distributions Using Ridge Plots} \usage{ ridgeEnrichment( input.data, + gene.set.use, assay = NULL, group.by = NULL, - gene.set = NULL, color.by = "group", order.by = NULL, scale = FALSE, @@ -18,59 +18,52 @@ ridgeEnrichment( ) } \arguments{ -\item{input.data}{Enrichment output from \code{\link{escape.matrix}} or -\code{\link{runEscape}}.} +\item{input.data}{Output of \code{\link{escape.matrix}} or a single‑cell +object previously processed by \code{\link{runEscape}}.} -\item{assay}{Name of the assay to plot if data is a single-cell object.} +\item{gene.set.use}{Character(1). Name of the gene set to display.} -\item{group.by}{Categorical parameter to plot along the x.axis. If input is -a single-cell object the default will be cluster.} +\item{assay}{Name of the assay holding enrichment scores when +`input.data` is a single‑cell object. Ignored otherwise.} -\item{gene.set}{Gene set to plot (on y-axis).} +\item{group.by}{Metadata column plotted on the *y*‑axis. Defaults to the +Seurat/SCE `ident` slot when `NULL`.} -\item{color.by}{How the color palette applies to the graph - can -be \strong{"group"} for a categorical color palette based on the -\strong{group.by} parameter or use the \strong{gene.set} name if wanting to -apply a gradient palette.} +\item{color.by}{Aesthetic mapped to point color. Use either +*"group"* (default = `group.by`) for categorical coloring or the +*name of a gene‑set* (e.g. same as `gene.set`) to obtain a numeric +accepted.} -\item{order.by}{Method to organize the x-axis: \strong{"mean"} will arrange -the x-axis by the mean of the gene.set, while \strong{"group"} will arrange -the x-axis by in alphanumerical order. Using \strong{NULL} will not reorder -the x-axis.} +\item{order.by}{How to arrange the x‑axis: +*`"mean"`* – groups ordered by decreasing group mean; +*`"group"`* – natural sort of group labels; +*`NULL`* – keep original ordering.} -\item{scale}{Visualize raw values \strong{FALSE} or Z-transform -enrichment values \strong{TRUE}.} +\item{scale}{Logical; if `TRUE` scores are centred/scaled (Z‑score) prior +to plotting.} -\item{facet.by}{Variable to facet the plot into n distinct graphs.} +\item{facet.by}{Optional metadata column used to facet the plot.} -\item{add.rug}{Add visualization of the discrete cells along -the ridge plot (\strong{TRUE}).} +\item{add.rug}{Logical. Draw per-cell tick marks underneath each ridge.} -\item{palette}{Colors to use in visualization - input any -\link[grDevices]{hcl.pals}.} +\item{palette}{Character. Any palette from \code{\link[grDevices]{hcl.pals}}.} } \value{ -ggplot2 object with ridge-based distributions of selected gene.set +A [ggplot2] object. } \description{ This function allows to the user to examine the distribution of enrichment across groups by generating a ridge plot. } \examples{ -GS <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), +gs <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), Tcells = c("CD3E", "CD3D", "CD3G", "CD7","CD8A")) -pbmc_small <- SeuratObject::pbmc_small -pbmc_small <- runEscape(pbmc_small, - gene.sets = GS, - min.size = NULL) - -ridgeEnrichment(pbmc_small, - assay = "escape", - gene.set = "Tcells") - -ridgeEnrichment(pbmc_small, - assay = "escape", - gene.set = "Tcells", - color.by = "Tcells") + +pbmc <- SeuratObject::pbmc_small |> + runEscape(gene.sets = gs, min.size = NULL) + +ridgeEnrichment(pbmc, assay = "escape", + gene.set.use = "Tcells", + group.by = "groups") } diff --git a/man/runEscape.Rd b/man/runEscape.Rd index b46e8db..227264a 100644 --- a/man/runEscape.Rd +++ b/man/runEscape.Rd @@ -2,64 +2,98 @@ % Please edit documentation in R/runEscape.R \name{runEscape} \alias{runEscape} -\title{Enrichment calculation for single-cell workflows} +\title{Calculate Enrichment Scores Using Seurat or SingleCellExperiment Objects} \usage{ runEscape( input.data, - gene.sets = NULL, - method = "ssGSEA", + gene.sets, + method = c("ssGSEA", "GSVA", "UCell", "AUCell"), groups = 1000, min.size = 5, normalize = FALSE, make.positive = FALSE, new.assay.name = "escape", - BPPARAM = SerialParam(), + min.expr.cells = 0, + min.filter.by = NULL, + BPPARAM = NULL, ... ) } \arguments{ -\item{input.data}{The count matrix, Seurat, or Single-Cell Experiment object.} +\item{input.data}{A raw‐counts matrix (`genes × cells`), a +\link[SeuratObject]{Seurat} object, or a +\link[SingleCellExperiment]{SingleCellExperiment}. Gene identifiers must +match those in `gene.sets`.} -\item{gene.sets}{Gene sets can be a list, output from -\code{\link{getGeneSets}}, or the built-in gene sets -in the escape package \code{\link{escape.gene.sets}}.} +\item{gene.sets}{A named list of character vectors, the result of +[getGeneSets()], or the built-in data object [escape.gene.sets]. +List names become column names in the result.} -\item{method}{Select the method to calculate enrichment, \strong{AUCell}, -\strong{GSVA}, \strong{ssGSEA} or \strong{UCell}.} +\item{method}{Scoring algorithm (case-insensitive). One of `"GSVA"`, +`"ssGSEA"`, `"UCell"`, or `"AUCell"`. Default **`"ssGSEA"`**.} -\item{groups}{The number of cells to separate the enrichment calculation.} +\item{groups}{Integer ≥ 1. Number of cells per processing chunk. +Larger values reduce overhead but increase memory usage. Default **1000**.} -\item{min.size}{Minimum number of gene necessary to perform the enrichment -calculation} +\item{min.size}{Minimum number of genes from a set that must be detected +in the expression matrix for that set to be scored. Default **5**. +Use `NULL` to disable filtering.} -\item{normalize}{Whether to divide the enrichment score by the number -of genes \strong{TRUE} or report unnormalized \strong{FALSE}.} +\item{normalize}{Logical. If `TRUE`, the score matrix is passed to +[performNormalization()] (drop-out scaling and optional log transform). +Default **FALSE**.} -\item{make.positive}{During normalization shift enrichment values to a -positive range \strong{TRUE} for downstream analysis or not -\strong{TRUE} (default). Will only be applied if \strong{normalize = TRUE}.} +\item{make.positive}{Logical. If `TRUE` *and* `normalize = TRUE`, shifts +every gene-set column so its global minimum is zero, facilitating +downstream log-ratio analyses. Default **FALSE**.} -\item{new.assay.name}{The new name of the assay to append to -the single-cell object containing the enrichment scores.} +\item{new.assay.name}{Character. Name for the assay that will store the +enrichment matrix in the returned object. Default **"escape"**.} -\item{BPPARAM}{A BiocParallel::bpparam() object that for parallelization.} +\item{min.expr.cells}{Numeric. Gene-expression filter threshold (see +details above). Default **0** (no gene filtering).} -\item{...}{pass arguments to AUCell GSVA, ssGSEA or UCell call} +\item{min.filter.by}{Character or `NULL`. Column name in `meta.data` +(Seurat) or `colData` (SCE) defining groups within which the +`min.expr.cells` rule is applied. Default **`NULL`**.} + +\item{BPPARAM}{A \pkg{BiocParallel} parameter object describing the +parallel backend.} + +\item{...}{Extra arguments passed verbatim to the chosen back-end +scoring function (`gsva()`, `ScoreSignatures_UCell()`, or +`AUCell_calcAUC()`).} } \value{ -Seurat or Single-Cell Experiment object with escape enrichment scores -in the assay slot. +The input single-cell object with an additional assay containing the +enrichment scores (`cells × gene-sets`). Matrix orientation follows +standard single-cell conventions (gene-sets as rows inside the assay). } \description{ -Run the escape-based gene-set enrichment calculation with -Seurat or SingleCellExperiment pipelines +`runEscape()` is a convenience wrapper around [escape.matrix()] that +computes enrichment scores and inserts them as a new assay (default +`"escape"`) in a \pkg{Seurat} or \pkg{SingleCellExperiment} object. All +arguments (except `new.assay.name`) map directly to their counterparts in +`escape.matrix()`. } \examples{ -GS <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), +gs <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), Tcells = c("CD3E", "CD3D", "CD3G", "CD7","CD8A")) -pbmc_small <- SeuratObject::pbmc_small -pbmc_small <- runEscape(pbmc_small, - gene.sets = GS, - min.size = NULL) +sce <- SeuratObject::pbmc_small +sce <- runEscape(sce, + gene.sets = gs, + method = "GSVA", + groups = 1000, + min.size = 3, + new.assay.name = "escape") + +} +\seealso{ +[escape.matrix()] for the underlying computation, +[performNormalization()] to add normalized scores, [heatmapEnrichment()], +[ridgeEnrichment()] and related plotting helpers for visualization. +} +\author{ +Nick Borcherding, Jared Andrews } diff --git a/man/scatterEnrichment.Rd b/man/scatterEnrichment.Rd index 4dc657e..d7983f0 100644 --- a/man/scatterEnrichment.Rd +++ b/man/scatterEnrichment.Rd @@ -2,62 +2,83 @@ % Please edit documentation in R/scatterEnrichment.R \name{scatterEnrichment} \alias{scatterEnrichment} -\title{Generate a density-based scatter plot} +\title{Plot 2D Enrichment Distributions With Density or Hexplots} \usage{ scatterEnrichment( input.data, assay = NULL, - x.axis = NULL, - y.axis = NULL, - scale = FALSE, + x.axis, + y.axis, facet.by = NULL, - style = "point", - palette = "inferno" + group.by = NULL, + color.by = c("density", "group", "x", "y"), + style = c("point", "hex"), + scale = FALSE, + bins = 40, + point.size = 1.2, + alpha = 0.8, + palette = "inferno", + add.corr = FALSE ) } \arguments{ -\item{input.data}{Enrichment output from \code{\link{escape.matrix}} or -\code{\link{runEscape}}.} +\item{input.data}{Output of \code{\link{escape.matrix}} or a single‑cell +object previously processed by \code{\link{runEscape}}.} + +\item{assay}{Name of the assay holding enrichment scores when +`input.data` is a single‑cell object. Ignored otherwise.} + +\item{x.axis, y.axis}{Gene-set names to plot on the *x* and *y* axes.} + +\item{facet.by}{Optional metadata column used to facet the plot.} -\item{assay}{Name of the assay to plot if data is a single-cell object.} +\item{group.by}{Metadata column plotted. Defaults to the +Seurat/SCE `ident` slot when `NULL`.} -\item{x.axis}{Gene set to plot on the x.axis.} +\item{color.by}{Aesthetic mapped to point color. Use +`"density"` (default), `"group"`, `"x"`, or `"y"`. The latter two apply a +continuous gradient to the corresponding axis.} -\item{y.axis}{Gene set to plot on the y.axis. -\strong{group.by} parameter or use the \strong{gene.set} name if wanting to -apply a gradient palette.} +\item{style}{`"point"` (density-aware points) or `"hex"` (hex-bin).} -\item{scale}{Visualize raw values \strong{FALSE} or Z-transform -enrichment values \strong{TRUE}.} +\item{scale}{Logical; if `TRUE` scores are centered/scaled (Z‑score) prior +to plotting.} -\item{facet.by}{Variable to facet the plot into n distinct graphs.} +\item{bins}{Number of hex bins along each axis when `style = "hex"`.} -\item{style}{Return a \strong{"hex"} bin plot or a \strong{"point"}-based plot.} +\item{point.size, alpha}{Aesthetic tweaks for `style = "point"`.} -\item{palette}{Colors to use in visualization - input any -\link[grDevices]{hcl.pals}.} +\item{palette}{Character. Any palette from \code{\link[grDevices]{hcl.pals}}.} + +\item{add.corr}{Logical. Add Pearson and Spearman correlation +coefficients (top-left corner of the first facet).} } \value{ -ggplot2 object with a scatter plot of selected gene.sets +A \pkg{ggplot2} object. } \description{ -This function allows to the user to examine the distribution of -2 gene sets along the x.axis and y.axis. The color gradient -is generated using the a density estimate. See -\href{https://github.com/LKremer/ggpointdensity}{ggpointdensity}) -for more information. +Visualize the relationship between *two* enrichment scores at single-cell +resolution. By default points are shaded by local 2-D density +(`color.by = "density"`), but users can instead color by a metadata column +(discrete) or by the raw gene-set scores themselves (continuous). } \examples{ -GS <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), - Tcells = c("CD3E", "CD3D", "CD3G", "CD7","CD8A")) -pbmc_small <- SeuratObject::pbmc_small -pbmc_small <- runEscape(pbmc_small, - gene.sets = GS, - min.size = NULL) - -scatterEnrichment(pbmc_small, - assay = "escape", - x.axis = "Tcells", - y.axis = "Bcells") +gs <- list( + Bcells = c("MS4A1","CD79B","CD79A","IGH1","IGH2"), + Tcells = c("CD3E","CD3D","CD3G","CD7","CD8A") +) +pbmc <- SeuratObject::pbmc_small |> + runEscape(gene.sets = gs, min.size = NULL) + +scatterEnrichment( + pbmc, + assay = "escape", + x.axis = "Tcells", + y.axis = "Bcells", + color.by = "group", + group.by = "groups", + add.corr = TRUE, + point.size = 1 +) } diff --git a/man/splitEnrichment.Rd b/man/splitEnrichment.Rd index 5b8b9b6..614a584 100644 --- a/man/splitEnrichment.Rd +++ b/man/splitEnrichment.Rd @@ -2,14 +2,14 @@ % Please edit documentation in R/splitEnrichment.R \name{splitEnrichment} \alias{splitEnrichment} -\title{Visualize enrichment results with a split violin plot} +\title{Plot Enrichment Distributions Using Split or Dodged Violin Plots} \usage{ splitEnrichment( input.data, assay = NULL, split.by = NULL, group.by = NULL, - gene.set = NULL, + gene.set.use = NULL, order.by = NULL, facet.by = NULL, scale = TRUE, @@ -17,49 +17,51 @@ splitEnrichment( ) } \arguments{ -\item{input.data}{Enrichment output from \code{\link{escape.matrix}} or -\code{\link{runEscape}}.} +\item{input.data}{Output of \code{\link{escape.matrix}} or a single‑cell +object previously processed by \code{\link{runEscape}}.} -\item{assay}{Name of the assay to plot if data is a single-cell object.} +\item{assay}{Name of the assay holding enrichment scores when +`input.data` is a single‑cell object. Ignored otherwise.} -\item{split.by}{Variable to form the split violin, must have 2 levels.} +\item{split.by}{A metadata column used to split or color violins. Must contain +at least two levels. If it contains more than two, dodged violins are used.} -\item{group.by}{Categorical parameter to plot along the x.axis. If input is -a single-cell object the default will be cluster.} +\item{group.by}{Metadata column plotted on the *x*‑axis. Defaults to the +Seurat/SCE `ident` slot when `NULL`.} -\item{gene.set}{Gene set to plot (on y-axis).} +\item{gene.set.use}{Character(1). Name of the gene set to display.} -\item{order.by}{Method to organize the x-axis - \strong{"mean"} will arrange -the x-axis by the mean of the gene.set, while \strong{"group"} will arrange -the x-axis by in alphanumerical order. Using \strong{NULL} will not reorder -the x-axis.} +\item{order.by}{How to arrange the x‑axis: +*`"mean"`* – groups ordered by decreasing group mean; +*`"group"`* – natural sort of group labels; +*`NULL`* – keep original ordering.} -\item{facet.by}{Variable to facet the plot into n distinct graphs.} +\item{facet.by}{Optional metadata column used to facet the plot.} -\item{scale}{Visualize raw values \strong{FALSE} or Z-transform -enrichment values \strong{TRUE}.} +\item{scale}{Logical; if `TRUE` scores are centred/scaled (Z‑score) prior +to plotting.} -\item{palette}{Colors to use in visualization - input any -\link[grDevices]{hcl.pals}.} +\item{palette}{Character. Any palette from \code{\link[grDevices]{hcl.pals}}.} } \value{ -ggplot2 object violin-based distributions of selected gene.set +A [ggplot2] object. } \description{ -This function allows to the user to examine the distribution of -enrichment across groups by generating a split violin plot. +Visualize the distribution of gene set enrichment scores across groups using +violin plots. When `split.by` contains exactly two levels, the function draws +split violins for easy group comparison within each `group.by` category. If +`split.by` has more than two levels, standard dodged violins are drawn instead. } \examples{ -GS <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), +gs <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), Tcells = c("CD3E", "CD3D", "CD3G", "CD7","CD8A")) -pbmc_small <- SeuratObject::pbmc_small -pbmc_small <- runEscape(pbmc_small, - gene.sets = GS, - min.size = NULL) - -splitEnrichment(pbmc_small, + +pbmc <- SeuratObject::pbmc_small |> + runEscape(gene.sets = gs, min.size = NULL) + +splitEnrichment(input.data = pbmc, assay = "escape", split.by = "groups", - gene.set = "Tcells") + gene.set.use = "Tcells") } diff --git a/tests/testthat/_snaps/densityEnrichment/denistyenrichment-default-plot.svg b/tests/testthat/_snaps/densityEnrichment/denistyenrichment-default-plot.svg deleted file mode 100644 index fe0eb22..0000000 --- a/tests/testthat/_snaps/densityEnrichment/denistyenrichment-default-plot.svg +++ /dev/null @@ -1,137 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0.000 -0.003 -0.006 -0.009 - - - - - -Rank Density - -Group - - - - - - -ident.0 -ident.2 -ident.1 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0 -30 -60 -90 -Mean Rank Order -denistyEnrichment_default_plot - - diff --git a/tests/testthat/_snaps/densityEnrichment/denistyenrichment-group-by-plot.svg b/tests/testthat/_snaps/densityEnrichment/denistyenrichment-group-by-plot.svg deleted file mode 100644 index 10d97ed..0000000 --- a/tests/testthat/_snaps/densityEnrichment/denistyenrichment-group-by-plot.svg +++ /dev/null @@ -1,127 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0.0 -0.1 -0.2 -0.3 - - - - - -Rank Density - -Group - - - - -groups.g2 -groups.g1 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -89 -90 -91 -92 -Mean Rank Order -denistyEnrichment_group.by_plot - - diff --git a/tests/testthat/_snaps/geyserEnrichment/geyserenrichment-default-plot.svg b/tests/testthat/_snaps/geyserEnrichment/geyserenrichment-default-plot.svg deleted file mode 100644 index 76935d0..0000000 --- a/tests/testthat/_snaps/geyserEnrichment/geyserenrichment-default-plot.svg +++ /dev/null @@ -1,166 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0 -40 -80 - - - - - - - -0 -1 -2 -ident -Tcells - Enrichment Score - -ident - - - - - - -0 -1 -2 -geyserEnrichment_default_plot - - diff --git a/tests/testthat/_snaps/geyserEnrichment/geyserenrichment-facet-plot.svg b/tests/testthat/_snaps/geyserEnrichment/geyserenrichment-facet-plot.svg deleted file mode 100644 index 0219285..0000000 --- a/tests/testthat/_snaps/geyserEnrichment/geyserenrichment-facet-plot.svg +++ /dev/null @@ -1,229 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -g1 - - - - - - - - - - -g2 - - - - - - -0 -1 -2 - - - - -0 -1 -2 - -0 -40 -80 - - - -ident -Tcells - Enrichment Score - -ident - - - - - - -0 -1 -2 -geyserEnrichment_facet_plot - - diff --git a/tests/testthat/_snaps/geyserEnrichment/geyserenrichment-gradient-facet-plot.svg b/tests/testthat/_snaps/geyserEnrichment/geyserenrichment-gradient-facet-plot.svg deleted file mode 100644 index f12e023..0000000 --- a/tests/testthat/_snaps/geyserEnrichment/geyserenrichment-gradient-facet-plot.svg +++ /dev/null @@ -1,236 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -g1 - - - - - - - - - - -g2 - - - - - - -0 -1 -2 - - - - -0 -1 -2 - -0 -40 -80 - - - -ident -Tcells - Enrichment Score - -Tcells - - - - - - - - - - - -0 -25 -50 -75 -100 -geyserEnrichment_gradient_facet_plot - - diff --git a/tests/testthat/_snaps/geyserEnrichment/geyserenrichment-gradient-plot.svg b/tests/testthat/_snaps/geyserEnrichment/geyserenrichment-gradient-plot.svg deleted file mode 100644 index b511c62..0000000 --- a/tests/testthat/_snaps/geyserEnrichment/geyserenrichment-gradient-plot.svg +++ /dev/null @@ -1,173 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0 -40 -80 - - - - - - - -0 -1 -2 -ident -Tcells - Enrichment Score - -Tcells - - - - - - - - - - - -0 -25 -50 -75 -100 -geyserEnrichment_gradient_plot - - diff --git a/tests/testthat/_snaps/geyserEnrichment/geyserenrichment-gradient-reorder-plot.svg b/tests/testthat/_snaps/geyserEnrichment/geyserenrichment-gradient-reorder-plot.svg deleted file mode 100644 index 22baf97..0000000 --- a/tests/testthat/_snaps/geyserEnrichment/geyserenrichment-gradient-reorder-plot.svg +++ /dev/null @@ -1,173 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0 -40 -80 - - - - - - - -0 -2 -1 -ident -Tcells - Enrichment Score - -Tcells - - - - - - - - - - - -0 -25 -50 -75 -100 -geyserEnrichment_gradient_reorder_plot - - diff --git a/tests/testthat/_snaps/geyserEnrichment/geyserenrichment-order-plot.svg b/tests/testthat/_snaps/geyserEnrichment/geyserenrichment-order-plot.svg deleted file mode 100644 index a339b51..0000000 --- a/tests/testthat/_snaps/geyserEnrichment/geyserenrichment-order-plot.svg +++ /dev/null @@ -1,166 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0 -40 -80 - - - - - - - -0 -2 -1 -ident -Tcells - Enrichment Score - -ident - - - - - - -0 -2 -1 -geyserEnrichment_order_plot - - diff --git a/tests/testthat/_snaps/geyserEnrichment/geyserenrichment-scale-plot.svg b/tests/testthat/_snaps/geyserEnrichment/geyserenrichment-scale-plot.svg deleted file mode 100644 index ed589e5..0000000 --- a/tests/testthat/_snaps/geyserEnrichment/geyserenrichment-scale-plot.svg +++ /dev/null @@ -1,168 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - --1 -0 -1 -2 - - - - - - - - -0 -1 -2 -ident -Tcells - Enrichment Score - -ident - - - - - - -0 -1 -2 -geyserEnrichment_scale_plot - - diff --git a/tests/testthat/_snaps/heatmapEnrichment/heatmapenrichment-clustercolumns-plot.svg b/tests/testthat/_snaps/heatmapEnrichment/heatmapenrichment-clustercolumns-plot.svg deleted file mode 100644 index 506ee7f..0000000 --- a/tests/testthat/_snaps/heatmapEnrichment/heatmapenrichment-clustercolumns-plot.svg +++ /dev/null @@ -1,71 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Bcells -Tcells - -1 -0 -2 - -Enrichment Score - - - - - - - - - -20 -40 -60 -80 -heatmapEnrichment_clusterColumns_plot - - diff --git a/tests/testthat/_snaps/heatmapEnrichment/heatmapenrichment-clusterrows-plot.svg b/tests/testthat/_snaps/heatmapEnrichment/heatmapenrichment-clusterrows-plot.svg deleted file mode 100644 index cf69ccd..0000000 --- a/tests/testthat/_snaps/heatmapEnrichment/heatmapenrichment-clusterrows-plot.svg +++ /dev/null @@ -1,71 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Bcells -Tcells - -0 -1 -2 - -Enrichment Score - - - - - - - - - -20 -40 -60 -80 -heatmapEnrichment_clusterRows_plot - - diff --git a/tests/testthat/_snaps/heatmapEnrichment/heatmapenrichment-default-plot.svg b/tests/testthat/_snaps/heatmapEnrichment/heatmapenrichment-default-plot.svg deleted file mode 100644 index 9cf46b9..0000000 --- a/tests/testthat/_snaps/heatmapEnrichment/heatmapenrichment-default-plot.svg +++ /dev/null @@ -1,71 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Bcells -Tcells - -0 -1 -2 - -Enrichment Score - - - - - - - - - -20 -40 -60 -80 -heatmapEnrichment_default_plot - - diff --git a/tests/testthat/_snaps/heatmapEnrichment/heatmapenrichment-facet-plot.svg b/tests/testthat/_snaps/heatmapEnrichment/heatmapenrichment-facet-plot.svg deleted file mode 100644 index d517ae6..0000000 --- a/tests/testthat/_snaps/heatmapEnrichment/heatmapenrichment-facet-plot.svg +++ /dev/null @@ -1,113 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -g1 - - - - - - - - - - -g2 - - - -0 -1 -2 - -0 -1 -2 - -Bcells -Tcells - -Enrichment Score - - - - - - - - - -0 -25 -50 -75 -heatmapEnrichment_facet_plot - - diff --git a/tests/testthat/_snaps/heatmapEnrichment/heatmapenrichment-scale-plot.svg b/tests/testthat/_snaps/heatmapEnrichment/heatmapenrichment-scale-plot.svg deleted file mode 100644 index 88af95c..0000000 --- a/tests/testthat/_snaps/heatmapEnrichment/heatmapenrichment-scale-plot.svg +++ /dev/null @@ -1,71 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Bcells -Tcells - -0 -1 -2 - -Enrichment Score - - - - - - - - - --1.0 --0.5 -0.0 -0.5 -heatmapEnrichment_scale_plot - - diff --git a/tests/testthat/_snaps/pcaEnrichment/pcaenrichment-addfactors-plot.svg b/tests/testthat/_snaps/pcaEnrichment/pcaenrichment-addfactors-plot.svg deleted file mode 100644 index 5ccc234..0000000 --- a/tests/testthat/_snaps/pcaEnrichment/pcaenrichment-addfactors-plot.svg +++ /dev/null @@ -1,209 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -HALLMARK-ALLOGRAFT-REJECTION - -HALLMARK-APOPTOSIS - -HALLMARK-ESTROGEN-RESPONSE-LATE - -HALLMARK-HEME-METABOLISM - -HALLMARK-IL2-STAT5-SIGNALING - -HALLMARK-INFLAMMATORY-RESPONSE - -HALLMARK-INTERFERON-GAMMA-RESPONSE - -HALLMARK-MTORC1-SIGNALING - -HALLMARK-P53-PATHWAY - -HALLMARK-TNFA-SIGNALING-VIA-NFKB - - - --3 --2 --1 -0 -1 -2 -3 - - - - - - - - - - - - - - --2 --1 -0 -1 -2 -3 -PC2 - (12.5%) -PC3 - (9.7%) - -Relative Density - - - - - - - - - - - - - -1 -2 -3 -4 -5 -6 -pcaEnrichment_addFactors_plot - - diff --git a/tests/testthat/_snaps/pcaEnrichment/pcaenrichment-facetby-addfactors-plot.svg b/tests/testthat/_snaps/pcaEnrichment/pcaenrichment-facetby-addfactors-plot.svg deleted file mode 100644 index 27fc43b..0000000 --- a/tests/testthat/_snaps/pcaEnrichment/pcaenrichment-facetby-addfactors-plot.svg +++ /dev/null @@ -1,247 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -HALLMARK-ALLOGRAFT-REJECTION - -HALLMARK-APOPTOSIS - -HALLMARK-COAGULATION - -HALLMARK-EPITHELIAL-MESENCHYMAL-TRANSITION - -HALLMARK-HEME-METABOLISM - -HALLMARK-IL2-STAT5-SIGNALING - -HALLMARK-IL6-JAK-STAT3-SIGNALING - -HALLMARK-INTERFERON-GAMMA-RESPONSE - -HALLMARK-MYOGENESIS - -HALLMARK-P53-PATHWAY - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -g1 - - - - - - - - - - -g2 - - - - - - - - --6 --4 --2 -0 -2 - - - - - - --6 --4 --2 -0 -2 - --2 --1 -0 -1 -2 -3 - - - - - - -PC1 - (35.5%) -PC2 - (12.5%) - -Relative Density - - - - - - - - - - - -1 -2 -3 -4 -5 -pcaEnrichment_facetby_addFactors_plot - - diff --git a/tests/testthat/_snaps/pcaEnrichment/pcaenrichment-facetby-plot.svg b/tests/testthat/_snaps/pcaEnrichment/pcaenrichment-facetby-plot.svg deleted file mode 100644 index 913154d..0000000 --- a/tests/testthat/_snaps/pcaEnrichment/pcaenrichment-facetby-plot.svg +++ /dev/null @@ -1,205 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -g1 - - - - - - - - - - -g2 - - - - - - - - - --2 --1 -0 -1 -2 -3 - - - - - - - --2 --1 -0 -1 -2 -3 - --6 --4 --2 -0 -2 - - - - - -PC2 - (12.5%) -PC1 - (35.5%) - -Relative Density - - - - - - - - - - - -1 -2 -3 -4 -5 -pcaEnrichment_facetby_plot - - diff --git a/tests/testthat/_snaps/pcaEnrichment/pcaenrichment-hex-plot.svg b/tests/testthat/_snaps/pcaEnrichment/pcaenrichment-hex-plot.svg deleted file mode 100644 index 51edd38..0000000 --- a/tests/testthat/_snaps/pcaEnrichment/pcaenrichment-hex-plot.svg +++ /dev/null @@ -1,157 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - --2 --1 -0 -1 -2 -3 - - - - - - - - - - - - --6 --4 --2 -0 -2 -PC1 - (35.5%) -PC2 - (12.5%) - -count - - - - - - - - - - - -1.00 -1.25 -1.50 -1.75 -2.00 -pcaEnrichment_hex_plot - - diff --git a/tests/testthat/_snaps/pcaEnrichment/pcaenrichment-plot.svg b/tests/testthat/_snaps/pcaEnrichment/pcaenrichment-plot.svg deleted file mode 100644 index 72dc283..0000000 --- a/tests/testthat/_snaps/pcaEnrichment/pcaenrichment-plot.svg +++ /dev/null @@ -1,157 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - --2 --1 -0 -1 -2 -3 - - - - - - - - - - - - --6 --4 --2 -0 -2 -PC1 - (35.5%) -PC2 - (12.5%) - -Relative Density - - - - - - - - - -2 -4 -6 -8 -pcaEnrichment_plot - - diff --git a/tests/testthat/_snaps/ridgeEnrichment/ridgeenrichment-default-plot.svg b/tests/testthat/_snaps/ridgeEnrichment/ridgeenrichment-default-plot.svg deleted file mode 100644 index 5cf8122..0000000 --- a/tests/testthat/_snaps/ridgeEnrichment/ridgeenrichment-default-plot.svg +++ /dev/null @@ -1,57 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0 -1 -2 - - - - - - - -0 -50 -100 -Bcells - Enrichment Score -ident -ridgeEnrichment_default_plot - - diff --git a/tests/testthat/_snaps/ridgeEnrichment/ridgeenrichment-facet-plot.svg b/tests/testthat/_snaps/ridgeEnrichment/ridgeenrichment-facet-plot.svg deleted file mode 100644 index 2103843..0000000 --- a/tests/testthat/_snaps/ridgeEnrichment/ridgeenrichment-facet-plot.svg +++ /dev/null @@ -1,106 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -g1 - - - - - - - - - - -g2 - - - - - - - -0 -50 -100 -150 - - - - - -0 -50 -100 -150 - -0 -1 -2 - - - -Bcells - Enrichment Score -ident -ridgeEnrichment_facet_plot - - diff --git a/tests/testthat/_snaps/ridgeEnrichment/ridgeenrichment-gradient-facet-plot.svg b/tests/testthat/_snaps/ridgeEnrichment/ridgeenrichment-gradient-facet-plot.svg deleted file mode 100644 index 1486e18..0000000 --- a/tests/testthat/_snaps/ridgeEnrichment/ridgeenrichment-gradient-facet-plot.svg +++ /dev/null @@ -1,2908 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -g1 - - - - - - - - - - -g2 - - - - - - - -0 -50 -100 -150 - - - - - -0 -50 -100 -150 - -0 -1 -2 - - - -Bcells - Enrichment Score -ident -ridgeEnrichment_gradient_facet_plot - - diff --git a/tests/testthat/_snaps/ridgeEnrichment/ridgeenrichment-gradient-plot.svg b/tests/testthat/_snaps/ridgeEnrichment/ridgeenrichment-gradient-plot.svg deleted file mode 100644 index 5de6dd7..0000000 --- a/tests/testthat/_snaps/ridgeEnrichment/ridgeenrichment-gradient-plot.svg +++ /dev/null @@ -1,1464 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -0 -1 -2 - - - - - - - -0 -50 -100 -Bcells - Enrichment Score -ident -ridgeEnrichment_gradient_plot - - diff --git a/tests/testthat/_snaps/ridgeEnrichment/ridgeenrichment-gradient-reorder-plot.svg b/tests/testthat/_snaps/ridgeEnrichment/ridgeenrichment-gradient-reorder-plot.svg deleted file mode 100644 index 1930ebb..0000000 --- a/tests/testthat/_snaps/ridgeEnrichment/ridgeenrichment-gradient-reorder-plot.svg +++ /dev/null @@ -1,1464 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -2 -0 -1 - - - - - - - -0 -50 -100 -Bcells - Enrichment Score -ident -ridgeEnrichment_gradient_reorder_plot - - diff --git a/tests/testthat/_snaps/ridgeEnrichment/ridgeenrichment-order-plot.svg b/tests/testthat/_snaps/ridgeEnrichment/ridgeenrichment-order-plot.svg deleted file mode 100644 index 041018a..0000000 --- a/tests/testthat/_snaps/ridgeEnrichment/ridgeenrichment-order-plot.svg +++ /dev/null @@ -1,57 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -2 -0 -1 - - - - - - - -0 -50 -100 -Bcells - Enrichment Score -ident -ridgeEnrichment_order_plot - - diff --git a/tests/testthat/_snaps/ridgeEnrichment/ridgeenrichment-rugadded-plot.svg b/tests/testthat/_snaps/ridgeEnrichment/ridgeenrichment-rugadded-plot.svg deleted file mode 100644 index 0558a09..0000000 --- a/tests/testthat/_snaps/ridgeEnrichment/ridgeenrichment-rugadded-plot.svg +++ /dev/null @@ -1,140 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| - - - -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| - - - -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| -| - - - -0 -1 -2 - - - - - - - -0 -50 -100 -Bcells - Enrichment Score -ident -ridgeEnrichment_rugadded_plot - - diff --git a/tests/testthat/_snaps/scatterEnrichment/scatterenrichment-default-plot.svg b/tests/testthat/_snaps/scatterEnrichment/scatterenrichment-default-plot.svg deleted file mode 100644 index 36546c4..0000000 --- a/tests/testthat/_snaps/scatterEnrichment/scatterenrichment-default-plot.svg +++ /dev/null @@ -1,150 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -30 -60 -90 - - - - - - - -0 -40 -80 -Tcells - Enrichment Score -Bcells - Enrichment Score - -Relative Density - - - - - - - - - - - -1.0 -1.5 -2.0 -2.5 -3.0 -scatterEnrichment_default_plot - - diff --git a/tests/testthat/_snaps/scatterEnrichment/scatterenrichment-facet-plot.svg b/tests/testthat/_snaps/scatterEnrichment/scatterenrichment-facet-plot.svg deleted file mode 100644 index f09aac6..0000000 --- a/tests/testthat/_snaps/scatterEnrichment/scatterenrichment-facet-plot.svg +++ /dev/null @@ -1,189 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -g1 - - - - - - - - - - -g2 - - - - - - -0 -40 -80 - - - - -0 -40 -80 - -30 -60 -90 - - - -Tcells - Enrichment Score -Bcells - Enrichment Score - -Relative Density - - - - - - - - - - - -1.00 -1.25 -1.50 -1.75 -2.00 -scatterEnrichment_facet_plot - - diff --git a/tests/testthat/_snaps/scatterEnrichment/scatterenrichment-hex-plot.svg b/tests/testthat/_snaps/scatterEnrichment/scatterenrichment-hex-plot.svg deleted file mode 100644 index c0aad8b..0000000 --- a/tests/testthat/_snaps/scatterEnrichment/scatterenrichment-hex-plot.svg +++ /dev/null @@ -1,140 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -20 -40 -60 -80 -100 - - - - - - - - - -0 -40 -80 -Tcells - Enrichment Score -Bcells - Enrichment Score - -count - - - - - - - - - - - -1.0 -1.5 -2.0 -2.5 -3.0 -scatterEnrichment_hex_plot - - diff --git a/tests/testthat/_snaps/scatterEnrichment/scatterenrichment-scale-plot.svg b/tests/testthat/_snaps/scatterEnrichment/scatterenrichment-scale-plot.svg deleted file mode 100644 index 36a3812..0000000 --- a/tests/testthat/_snaps/scatterEnrichment/scatterenrichment-scale-plot.svg +++ /dev/null @@ -1,150 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - --2 --1 -0 -1 -2 - - - - - - - - - - --1 -0 -1 -2 -Tcells - Enrichment Score -Bcells - Enrichment Score - -Relative Density - - - - - - - -2.5 -5.0 -7.5 -scatterEnrichment_scale_plot - - diff --git a/tests/testthat/_snaps/splitEnrichment/splitenrichment-default-plot.svg b/tests/testthat/_snaps/splitEnrichment/splitenrichment-default-plot.svg deleted file mode 100644 index 1a134fd..0000000 --- a/tests/testthat/_snaps/splitEnrichment/splitenrichment-default-plot.svg +++ /dev/null @@ -1,84 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - --1 -0 -1 -2 - - - - - - - - -0 -1 -2 -ident -Tcells - Enrichment Score - -groups - - - - -g1 -g2 -splitEnrichment_default_plot - - diff --git a/tests/testthat/_snaps/splitEnrichment/splitenrichment-facet-plot.svg b/tests/testthat/_snaps/splitEnrichment/splitenrichment-facet-plot.svg deleted file mode 100644 index 88dfef8..0000000 --- a/tests/testthat/_snaps/splitEnrichment/splitenrichment-facet-plot.svg +++ /dev/null @@ -1,134 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -A - - - - - - - - - - -B - - - - - - -0 -1 -2 - - - - -0 -1 -2 - --1 -0 -1 -2 - - - - -ident -Tcells - Enrichment Score - -groups - - - - -g1 -g2 -splitEnrichment_facet_plot - - diff --git a/tests/testthat/_snaps/splitEnrichment/splitenrichment-mean-plot.svg b/tests/testthat/_snaps/splitEnrichment/splitenrichment-mean-plot.svg deleted file mode 100644 index e28401c..0000000 --- a/tests/testthat/_snaps/splitEnrichment/splitenrichment-mean-plot.svg +++ /dev/null @@ -1,84 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - --1 -0 -1 -2 - - - - - - - - -0 -2 -1 -ident -Tcells - Enrichment Score - -groups - - - - -g1 -g2 -splitEnrichment_mean_plot - - diff --git a/tests/testthat/helper-getGeneSets.R b/tests/testthat/helper-getGeneSets.R new file mode 100644 index 0000000..709e725 --- /dev/null +++ b/tests/testthat/helper-getGeneSets.R @@ -0,0 +1,34 @@ +# ------------------------------------------------------------------ # +# 1. build a tiny fake MSigDB object -------------------------------- +# ------------------------------------------------------------------ # +setClass("FakeCollectionType", + slots = c(category = "character", subCategory = "character")) +setClass("FakeGeneSet", + slots = c(setName = "character", + geneIds = "character", + collectionType = "FakeCollectionType")) + +.fake_msigdb <- list( + new("FakeGeneSet", + setName = "HALLMARK_TEST_ONE", + geneIds = c("geneA", "geneB"), + collectionType = new("FakeCollectionType", + category = "H", + subCategory = "CGP")), + new("FakeGeneSet", + setName = "TEST_SET", + geneIds = c("geneC", "geneD"), + collectionType = new("FakeCollectionType", + category = "C5", + subCategory = "GO:BP")) +) + +# ------------------------------------------------------------------ # +# 2. overwrite .msigdb_cached() inside the escape namespace --------- +# ------------------------------------------------------------------ # +ns <- asNamespace("escape") +unlockBinding(".msigdb_cached", ns) +assign(".msigdb_cached", + function(org, id = "SYM", version = "7.4") .fake_msigdb, + envir = ns) +lockBinding(".msigdb_cached", ns) \ No newline at end of file diff --git a/tests/testthat/helper-seurat.R b/tests/testthat/helper-seurat.R new file mode 100644 index 0000000..fea456d --- /dev/null +++ b/tests/testthat/helper-seurat.R @@ -0,0 +1,10 @@ +# Attach Seurat packages *only when they are available*. +# If they are missing, skip all Seurat-dependent tests gracefully. + +skip_if_not_installed("SeuratObject", minimum_version = "5.0.0") +skip_if_not_installed("Seurat") # remove if you do not use Seurat proper + +suppressPackageStartupMessages({ + library(SeuratObject) + library(Seurat) +}) \ No newline at end of file diff --git a/tests/testthat/helper-vdiffr.R b/tests/testthat/helper-vdiffr.R deleted file mode 100644 index eb4b8ea..0000000 --- a/tests/testthat/helper-vdiffr.R +++ /dev/null @@ -1,15 +0,0 @@ -# By default, if vdiffr is not installed, all visual tests are skipped unless -# VDIFFR_RUN_TESTS is explicitly set to "true", which should be the case only on -# a GitHub Actions CI runner with stable version of R. - -if (requireNamespace("vdiffr", quietly = TRUE) && utils::packageVersion('testthat') >= '3.0.3') { - expect_doppelganger <- vdiffr::expect_doppelganger -} else { - # If vdiffr is not available and visual tests are explicitly required, raise error. - if (identical(Sys.getenv("VDIFFR_RUN_TESTS"), "true")) { - rlang::abort("vdiffr is not installed") - } - - # Otherwise, assign a dummy function - expect_doppelganger <- function(...) skip("vdiffr is not installed.") -} diff --git a/tests/testthat/test-densityEnrichment.R b/tests/testthat/test-densityEnrichment.R index dd277ea..62d5a2d 100644 --- a/tests/testthat/test-densityEnrichment.R +++ b/tests/testthat/test-densityEnrichment.R @@ -1,27 +1,67 @@ # test script for densityEnrichment.R - testcases are NOT comprehensive! -test_that("densityEnrichment works", { - - seuratObj <- getdata("runEscape", "pbmc_small_ssGSEA") - GS <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), - Tcells = c("CD3E", "CD3D", "CD3G", "CD7","CD8A")) - set.seed(42) - expect_doppelganger( - "denistyEnrichment_default_plot", - densityEnrichment( - seuratObj, - gene.set.use = "Tcells", - gene.sets = GS) +pbmc <- SeuratObject::pbmc_small +GS <- list( + Bcells = c("MS4A1", "CD79B", "CD79A", "IGHG1", "IGHG2"), + Tcells = c("CD3E", "CD3D", "CD3G", "CD7", "CD8A") +) + +# helper: number of groups in default 'ident' column +n_groups <- length(unique(as.character(Idents(pbmc)))) + + +# ── 1 Core functionality returns patchwork object ────────────────── +test_that("densityEnrichment() returns a patchwork / ggplot object", { + plt <- densityEnrichment( + input.data = pbmc, + gene.set.use = "Tcells", + gene.sets = GS ) - set.seed(42) - expect_doppelganger( - "denistyEnrichment_group.by_plot", + expect_s3_class(plt, "patchwork") # overall object + expect_s3_class(plt[[1]], "ggplot") # top density panel + expect_s3_class(plt[[2]], "ggplot") # bottom segment panel +}) + +# ── 2 Groups are represented correctly in the density plot ───────── +test_that("all groups appear once in the density layer", { + plt <- densityEnrichment(pbmc, "Tcells", GS) + density_pan <- plt[[1]] + vars_in_df <- unique(density_pan$data$variable) + + expect_equal(length(na.omit(vars_in_df)), n_groups) +}) + +# ── 3 Alternative palettes run without error ─────────────────────── +test_that("custom palette works", { + expect_no_error( densityEnrichment( - seuratObj, + input.data = pbmc, gene.set.use = "Bcells", - gene.sets = GS, - group.by = "groups") + gene.sets = GS, + palette = "viridis" + ) + ) +}) + +# ── 4 Input validation – wrong object or gene-set names ──────────── +test_that("input validation errors are triggered correctly", { + mat <- matrix(rpois(1000, 5), nrow = 100) # not a single-cell object + + expect_error( + densityEnrichment(mat, "Tcells", GS), + "Expecting a Seurat or SummarizedExperiment object" + ) +}) + +# ── 5 group.by argument overrides default --------------------------- +test_that("group.by selects an alternative metadata column", { + pbmc$dummy_group <- sample(c("A", "B"), ncol(pbmc), replace = TRUE) + plt <- densityEnrichment( + pbmc, gene.set.use = "Tcells", gene.sets = GS, + group.by = "dummy_group" ) + # check that new grouping made it into the plot data + expect_true(all(grepl("^dummy_group\\.", na.omit(unique(plt[[1]]$data$variable))))) }) diff --git a/tests/testthat/test-enrichIt.R b/tests/testthat/test-enrichIt.R new file mode 100644 index 0000000..d4adf5d --- /dev/null +++ b/tests/testthat/test-enrichIt.R @@ -0,0 +1,81 @@ +# test script for enrichIt.R - testcases are NOT comprehensive! + +# ---- test fixtures --------------------------------------------------------- +gene_sets <- list( + PathA = c("G1", "G2", "G10"), + PathB = c("G3", "G4", "G5"), + PathC = c("G8", "G9") +) + +# numeric vector (already ranked) ----------- +vec <- c(G1 = 2.3, G2 = -1.1, G3 = 0.8, G4 = -2.2, G5 = 1.5) +vec <- sort(vec, decreasing = TRUE) + +# data-frame (Seurat-like) ------------------ +de_tbl <- data.frame( + gene = paste0("G", 1:6), + avg_log2FC = c( 2.3, -1.1, 0.8, -2.2, 1.5, 0), + p_val_adj = c( 1e-4, 5e-3, 0.03, 1e-10, 0.2, 1), + stringsAsFactors = FALSE +) +rownames(de_tbl) <- de_tbl$gene # default layout + +# ---- 1. BASIC FUNCTIONALITY ---------------------------------------------- +test_that("numeric vector input returns a proper fgsea table", { + res <- enrichIt(vec, gene_sets) + + expect_s3_class(res, "data.frame") + expect_true(all(c("pathway", "NES", "pval", "padj", "leadingEdge") %in% + names(res))) + expect_true(is.character(res$leadingEdge)) + expect_true(all(res$padj >= 0 & res$padj <= 1)) +}) + +test_that("data-frame input (default columns) works", { + res <- enrichIt(de_tbl, gene_sets) + + expect_s3_class(res, "data.frame") + expect_true(all(c("pathway", "NES", "pval") %in% names(res))) +}) + +# ---- 2. ALTERNATIVE OPTIONS ---------------------------------------------- +test_that("custom gene_col + explicit ranking_fun = 'logFC' works", { + tbl <- de_tbl + names(tbl)[names(tbl) == "avg_log2FC"] <- "logFC" + res <- enrichIt(tbl, + gene.sets = gene_sets, + gene_col = "gene", + logFC_col = "logFC", + pval_col = "p_val_adj", + ranking_fun = "logFC") + expect_s3_class(res, "data.frame") +}) + + +# ---- 3. ERROR HANDLING ---------------------------------------------------- +test_that("error when no genes left after filtering", { + expect_error( + enrichIt(de_tbl, + gene_sets, + pval_cutoff = 1e-10, + logFC_cutoff = 10), + "No genes left" + ) +}) + +test_that("error for unlabeled numeric vector", { + bad_vec <- unname(vec) + expect_error( + enrichIt(bad_vec, gene_sets), + "named numeric vector" + ) +}) + +test_that("error when required columns are missing", { + tmp <- de_tbl + tmp$avg_log2FC <- NULL + expect_error( + enrichIt(tmp, gene_sets), + "logFC_col" + ) +}) diff --git a/tests/testthat/test-enrichItPlot.R b/tests/testthat/test-enrichItPlot.R new file mode 100644 index 0000000..6971009 --- /dev/null +++ b/tests/testthat/test-enrichItPlot.R @@ -0,0 +1,68 @@ +# test script for enrichItPlot.R - testcases are NOT comprehensive! + +skip_if_not_installed("fgsea") +skip_if_not_installed("patchwork") + +# helper data: run a very small fgsea ---------------------------------------- +set.seed(42) + +## ranked statistic --------------------------------------------------------- +gene_ids <- paste0("G", 1:80) +stat_vec <- setNames(rev(seq_along(gene_ids)), gene_ids) # 80 .. 1 (descending) + +## synthetic multi-library gene sets ---------------------------------------- +gene_sets <- list( + DB1_PathA = paste0("G", 1:15), + DB1_PathB = paste0("G", 16:30), + DB2_PathC = paste0("G", 21:35), # overlaps with both A & B -> ensures cnet links + DB2_PathD = paste0("G", 46:60) +) + +res <- enrichIt(input.data = stat_vec, + gene.sets = gene_sets, + minSize = 5 +) + + +res$Database <- ifelse(grepl("^DB1_", res$pathway), "DB1", "DB2") + + +# 1. BAR plot --------------------------------------------------------------- +test_that("bar plot returns a patchwork object with ggplot inside", { + plt <- enrichItPlot(res, plot.type = "bar", top = 3) + + expect_s3_class(plt, "patchwork") + expect_true(inherits(plt[[1]], "ggplot")) +}) + +# --------------------------------------------------------------------------- +# 2. DOT plot --------------------------------------------------------------- +test_that("dot plot returns a patchwork object and respects top argument", { + plt <- enrichItPlot(res, plot.type = "dot", top = 1) + + expect_s3_class(plt, "patchwork") + # only one term per database should survive top = 1 + build <- ggplot2::ggplot_build(plt[[1]]) + n_terms <- length(unique(build$data[[1]]$y)) + expect_lte(n_terms, 2) # 2 databases ⇒ ≤2 rows in panel 1 +}) + +# --------------------------------------------------------------------------- +# 3. CNET plot -------------------------------------------------------------- +test_that("cnet plot returns a ggraph object", { + skip_if_not_installed("ggraph") + skip_if_not_installed("igraph") + + plt <- enrichItPlot(res, plot.type = "cnet", top = 4) + + expect_s3_class(plt, "ggraph") +}) + +# --------------------------------------------------------------------------- +# 4. Error handling --------------------------------------------------------- +test_that("invalid plot.type triggers an informative error", { + expect_error( + enrichItPlot(res, plot.type = "heatmap"), + regexp = "cnet" + ) +}) diff --git a/tests/testthat/test-getGeneSets.R b/tests/testthat/test-getGeneSets.R index ccdaed4..5855402 100644 --- a/tests/testthat/test-getGeneSets.R +++ b/tests/testthat/test-getGeneSets.R @@ -1,102 +1,37 @@ # test script for getGeneSets.R - testcases are NOT comprehensive! -context("Testing getGeneSets and caching behavior") - -# Define fake S4 classes to mimic the msigdb gene set objects. -setClass("FakeCollectionType", slots = c(category = "character", subCategory = "character")) -setClass("FakeGeneSet", - slots = c(setName = "character", - geneIds = "character", - collectionType = "FakeCollectionType")) - -# Create two fake gene set objects. -fake1 <- new("FakeGeneSet", - setName = "HALLMARK_TEST_ONE", - geneIds = c("geneA", "geneB"), - collectionType = new("FakeCollectionType", - category = "H", - subCategory = "CGP")) - -fake2 <- new("FakeGeneSet", - setName = "TEST_SET", - geneIds = c("geneC", "geneD"), - collectionType = new("FakeCollectionType", - category = "C5", - subCategory = "GO:BP")) - -# Combine into a list to simulate the msigdb object. -fake_list <- list(fake1, fake2) - -# Clear the package-level cache before running tests. -rm(list = ls(envir = .msigdb_cache), envir = .msigdb_cache) - -# Insert the fake object into the cache for human (key: "hs_SYM_7.4"). -assign("hs_SYM_7.4", fake_list, envir = .msigdb_cache) - -test_that("Unsupported species throws an error", { - expect_error( - getGeneSets(species = "Pan troglodytes"), - "Supported species are only 'Homo sapiens' and 'Mus musculus'." - ) +test_that("species argument is validated", { + expect_error(getGeneSets("Pan troglodytes"), + regexp = "Homo sapiens") }) -test_that("Filtering by library (main collection) works", { - gs <- getGeneSets(species = "Homo sapiens", library = "H") - # Only fake1 has library "H". - expect_equal(names(gs), "HALLMARK-TEST-ONE") - expect_equal(gs[["HALLMARK-TEST-ONE"]], c("geneA", "geneB")) +test_that("filtering by library works", { + gs <- getGeneSets("Homo sapiens", library = "H") + expect_named(gs, "HALLMARK-TEST-ONE") + expect_identical(gs[[1]], c("geneA", "geneB")) }) -test_that("Filtering by subcategory works", { - gs <- getGeneSets(species = "Homo sapiens", subcategory = "GO:BP") - # Only fake2 has subcategory "GO:BP". - expect_equal(names(gs), "TEST-SET") - expect_equal(gs[["TEST-SET"]], c("geneC", "geneD")) +test_that("filtering by sub-category works", { + gs <- getGeneSets("Homo sapiens", subcategory = "GO:BP") + expect_named(gs, "TEST-SET") + expect_identical(gs[[1]], c("geneC", "geneD")) }) -test_that("Filtering by specific gene.sets works", { - gs <- getGeneSets(species = "Homo sapiens", gene.sets = "HALLMARK_TEST_ONE") - expect_equal(names(gs), "HALLMARK-TEST-ONE") - expect_equal(gs[["HALLMARK-TEST-ONE"]], c("geneA", "geneB")) +test_that("filtering by explicit gene.sets works", { + gs <- getGeneSets("Homo sapiens", gene.sets = "HALLMARK_TEST_ONE") + expect_named(gs, "HALLMARK-TEST-ONE") + expect_identical(gs[[1]], c("geneA", "geneB")) }) -test_that("Combined filtering by library and subcategory works", { - gs <- getGeneSets(species = "Homo sapiens", library = "C5", subcategory = "GO:BP") - expect_equal(names(gs), "TEST-SET") - expect_equal(gs[["TEST-SET"]], c("geneC", "geneD")) +test_that("combined filters (library + subcategory) work", { + gs <- getGeneSets("Homo sapiens", library = "C5", subcategory = "GO:BP") + expect_named(gs, "TEST-SET") }) -test_that("No gene sets found triggers a warning and returns NULL", { +test_that("requesting an empty subset warns and returns NULL", { expect_warning( - result <- getGeneSets(species = "Homo sapiens", library = "NONEXISTENT"), - "No gene sets found for the specified parameters." + out <- getGeneSets("Homo sapiens", library = "NONEXISTENT"), + regexp = "matched the requested filters." ) - expect_null(result) -}) - -test_that("Caching behavior works for a new species (Mus musculus)", { - # Remove any existing mouse object from the cache. - if (exists("mm_SYM_7.4", envir = .msigdb_cache)) { - rm("mm_SYM_7.4", envir = .msigdb_cache) - } - - # Capture messages on the first call (should simulate a download). - msgs_download <- character() - withCallingHandlers({ - getGeneSets(species = "Mus musculus", library = "H") - }, message = function(m) { - msgs_download <<- c(msgs_download, m$message) - invokeRestart("muffleMessage") - }) - expect_true(any(grepl("Downloading msigdb object", msgs_download))) - - # Now the mouse object should be cached. - msgs_cache <- character() - withCallingHandlers({ - getGeneSets(species = "Mus musculus", library = "H") - }, message = function(m) { - msgs_cache <<- c(msgs_cache, m$message) - invokeRestart("muffleMessage") - }) - expect_true(any(grepl("Loading msigdb object from cache", msgs_cache))) + expect_null(out) }) diff --git a/tests/testthat/test-geyserEnrichment.R b/tests/testthat/test-geyserEnrichment.R index 49c7619..2e85223 100644 --- a/tests/testthat/test-geyserEnrichment.R +++ b/tests/testthat/test-geyserEnrichment.R @@ -1,85 +1,81 @@ # test script for geyserEnrichment.R - testcases are NOT comprehensive! -test_that("geyserEnrichment works", { - - seuratObj <- getdata("runEscape", "pbmc_small_ssGSEA") +# ──────────────────────────────────────────────────────────────────────────────── +# Test-data set-up ------------------------------------------------------------- +# ──────────────────────────────────────────────────────────────────────────────── + +pbmc_small <- getdata("runEscape", "pbmc_small_ssGSEA") + +# helper to make repeated plotting calls tidy +plot_fun <- function(...) { + geyserEnrichment(pbmc_small, assay = "escape", ...) +} + +# ──────────────────────────────────────────────────────────────────────────────── +# Core object / mapping checks -------------------------------------------------- +# ──────────────────────────────────────────────────────────────────────────────── +test_that("default call returns a ggplot object with expected mappings", { + p <- plot_fun(gene.set = "Tcells") - set.seed(42) - expect_doppelganger( - "geyserEnrichment_default_plot", - geyserEnrichment( - seuratObj, - assay = "escape", - gene.set = "Tcells" - ) - ) + expect_s3_class(p, "ggplot") - set.seed(42) - expect_doppelganger( - "geyserEnrichment_scale_plot", - geyserEnrichment( - seuratObj, - assay = "escape", - gene.set = "Tcells", - scale = TRUE - ) + # x-axis should map to ident (default group.by) + expect_identical( + rlang::get_expr(p$mapping$x), + rlang::expr(.data[["ident"]]) ) - set.seed(42) - expect_doppelganger( - "geyserEnrichment_facet_plot", - geyserEnrichment( - seuratObj, - assay = "escape", - gene.set = "Tcells", - facet.by = "groups" - ) + # y-axis should map to the chosen gene-set + expect_identical( + rlang::get_expr(p$mapping$y), + rlang::expr(.data[["Tcells"]]) ) +}) - set.seed(42) - expect_doppelganger( - "geyserEnrichment_order_plot", - geyserEnrichment( - seuratObj, - order.by = "mean", - assay = "escape", - gene.set = "Tcells" - ) - ) +# ──────────────────────────────────────────────────────────────────────────────── +# order.by logic ---------------------------------------------------------------- +# ──────────────────────────────────────────────────────────────────────────────── +test_that("order.by = 'mean' sorts x-axis levels by group mean", { + p <- plot_fun(gene.set = "Tcells", order.by = "mean") - set.seed(42) - expect_doppelganger( - "geyserEnrichment_gradient_plot", - geyserEnrichment( - seuratObj, - assay = "escape", - gene.set = "Tcells", - color.by = "Tcells" - ) - ) - - set.seed(42) - expect_doppelganger( - "geyserEnrichment_gradient_reorder_plot", - geyserEnrichment( - seuratObj, - assay = "escape", - order.by = "mean", - gene.set = "Tcells", - color.by = "Tcells" - ) + d <- p$data + means <- tapply(d$Tcells, d$ident, mean, na.rm = TRUE) + expect_identical(levels(d$ident), names(rev(sort(means)))) +}) + +test_that("invalid order.by triggers an informative error", { + expect_error( + plot_fun(gene.set = "Tcells", order.by = "bogus"), + "order.by must be 'mean' or 'group.by'" ) +}) + +# ──────────────────────────────────────────────────────────────────────────────── +# scale = TRUE (z-transformation) ---------------------------------------------- +# ──────────────────────────────────────────────────────────────────────────────── +test_that("scale = TRUE centres and scales the enrichment distribution", { + p <- plot_fun(gene.set = "Tcells", scale = TRUE) + z <- p$data$Tcells - set.seed(42) - expect_doppelganger( - "geyserEnrichment_gradient_facet_plot", - geyserEnrichment( - seuratObj, - assay = "escape", - gene.set = "Tcells", - color.by = "Tcells", - facet.by = "groups" - ) + expect_lt(abs(mean(z, na.rm = TRUE)), 1e-6) # ~0 + expect_lt(abs(sd(z, na.rm = TRUE) - 1), 1e-6) # ~1 +}) + +# ──────────────────────────────────────────────────────────────────────────────── +# facetting -------------------------------------------------------------------- +# ──────────────────────────────────────────────────────────────────────────────── +test_that("facet.by adds a FacetGrid object", { + p <- plot_fun(gene.set = "Tcells", facet.by = "groups") + expect_s3_class(p$facet, "FacetGrid") +}) + +# ──────────────────────────────────────────────────────────────────────────────── +# edge-case & robustness checks ------------------------------------------------ +# ──────────────────────────────────────────────────────────────────────────────── + +test_that("missing group.by column triggers an error", { + expect_error( + plot_fun(gene.set = "Tcells", group.by = "unknown_column"), + "Expecting a Seurat or SummarizedExperiment object|column" ) - }) diff --git a/tests/testthat/test-gseaEnrichment.R b/tests/testthat/test-gseaEnrichment.R new file mode 100644 index 0000000..b30a097 --- /dev/null +++ b/tests/testthat/test-gseaEnrichment.R @@ -0,0 +1,59 @@ +# test script for gseaEnrichment.R - testcases are NOT comprehensive! + +pbmc <- SeuratObject::pbmc_small +GS <- list( + Bcells = c("MS4A1", "CD79B", "CD79A", "IGHG1", "IGHG2"), + Tcells = c("CD3E", "CD3D", "CD3G", "CD7", "CD8A") +) + +##### 1. Function runs and returns ggplot / patchwork -------------------- ### +test_that("basic run (Seurat) returns a patchwork plot with ES in legend", { + + plt <- gseaEnrichment(pbmc, + gene.set.use = "Tcells", + gene.sets = GS) + + expect_s3_class(plt, "patchwork") + # ggplot object exists inside + expect_true(inherits(plt[[1]], "ggplot")) + + # Legend label contains ES = + build <- ggplot_build(plt[[1]]) + labs <- build$plot$scales$scales[[1]]$get_labels() + expect_true(any(grepl("ES\\s*=\\s*", labs))) +}) + + +##### 2. All built-in summary.fun keywords ---------------------- ### +keys <- c("mean", "median", "max", "sum", "geometric") +for (k in keys) { + test_that(paste("summary.fun =", k, "runs"), { + expect_silent( + gseaEnrichment(pbmc, + gene.set.use = "Bcells", + gene.sets = GS) + ) + }) +} + + +##### 3. Error handling --------------------------------------------------- ### + +test_that("errors for multiple gene-set names", { + expect_error( + gseaEnrichment(pbmc, + gene.set.use = c("x","y"), + gene.sets = GS), + "length 1" + ) +}) + +test_that("errors for unknown gene-set", { + expect_error( + gseaEnrichment(pbmc, + gene.set.use = "Unknown", + gene.sets = GS), + "Unknown gene-set" + ) +}) + diff --git a/tests/testthat/test-heatmapEnrichment.R b/tests/testthat/test-heatmapEnrichment.R index 9642dad..290b9bd 100644 --- a/tests/testthat/test-heatmapEnrichment.R +++ b/tests/testthat/test-heatmapEnrichment.R @@ -1,51 +1,92 @@ # test script for heatmapEnrichment.R - testcases are NOT comprehensive! -test_that("heatmapEnrichment works", { - - seuratObj <- getdata("runEscape", "pbmc_small_ssGSEA") - - expect_doppelganger( - "heatmapEnrichment_default_plot", - heatmapEnrichment( - seuratObj, - assay = "escape") - ) - - expect_doppelganger( - "heatmapEnrichment_scale_plot", - heatmapEnrichment( - seuratObj, - assay = "escape", - scale = TRUE - ) - ) - - - expect_doppelganger( - "heatmapEnrichment_facet_plot", - heatmapEnrichment( - seuratObj, - assay = "escape", - facet.by = "groups" - ) - ) - expect_doppelganger( - "heatmapEnrichment_clusterRows_plot", - heatmapEnrichment( - seuratObj, - cluster.rows = TRUE, - assay = "escape", - ) - ) - - expect_doppelganger( - "heatmapEnrichment_clusterColumns_plot", - heatmapEnrichment( - seuratObj, - cluster.columns = TRUE, - assay = "escape", - ) +pbmc_small <- getdata("runEscape", "pbmc_small_ssGSEA") +# ---------------------------------------------------------------- +# 1. Basic functionality & return type +# ---------------------------------------------------------------- +test_that("default call returns a ggplot object", { + p <- heatmapEnrichment(pbmc_small, assay = "escape") + expect_s3_class(p, "ggplot") + expect_true(all(c("group", "variable", "value") %in% names(p$data))) + # default summary = mean; check at least one numeric value present + expect_true(is.numeric(p$data$value)) +}) + +# ---------------------------------------------------------------- +# 2. Gene-set sub-selection +# ---------------------------------------------------------------- +test_that("gene.set.use filters rows correctly", { + chosen <- c("Bcells", "Tcells") + p <- heatmapEnrichment(pbmc_small, + assay = "escape", + gene.set.use = chosen) + expect_setequal(unique(p$data$variable), chosen) +}) + +# ---------------------------------------------------------------- +# 3. Scaling (Z-transform) +# ---------------------------------------------------------------- +test_that("scale = TRUE centres each gene set to mean ≈ 0", { + p <- heatmapEnrichment(pbmc_small, + assay = "escape", + scale = TRUE) + z_by_gene <- split(p$data$value, p$data$variable) + # Mean of each scaled column should be 0 (tolerance for FP error) + z_means <- vapply(z_by_gene, mean, numeric(1)) + expect_true(all(abs(z_means) < 0.1)) +}) + +# ---------------------------------------------------------------- +# 4. Summary statistics (median, custom, error handling) +# ---------------------------------------------------------------- +test_that("summary.stat = 'median' gives expected result", { + gs <- "Bcells" + # Manual median for reference + x <- pbmc_small[["escape"]]@data[gs,] + grp <- Idents(pbmc_small) + ref_median <- tapply(x, grp, median) + p <- heatmapEnrichment(pbmc_small, + assay = "escape", + gene.set.use = gs, + summary.stat = "median") + # Extract tile corresponding to first group + med_calc <- subset(p$data, + variable == gs & group == names(ref_median)[1])$value + expect_equal(med_calc, unname(ref_median[1]), tolerance = 1e-8) +}) + +test_that("invalid summary keyword errors cleanly", { + expect_error( + heatmapEnrichment(pbmc_small, + assay = "escape", + summary.stat = "foobar"), + "Unsupported summary keyword" ) - }) + +# ---------------------------------------------------------------- +# 5. Clustering options +# ---------------------------------------------------------------- +test_that("row/column clustering re-orders factors", { + p <- heatmapEnrichment(pbmc_small, + assay = "escape", + cluster.rows = TRUE, + cluster.columns = TRUE) + # After clustering, factors keep their specified order + expect_true(is.factor(p$data$variable)) + expect_true(is.factor(p$data$group)) +}) + +# ---------------------------------------------------------------- +# 6. Faceting +# ---------------------------------------------------------------- +test_that("facet.by adds facetting column to output", { + p <- heatmapEnrichment(pbmc_small, + assay = "escape", + facet.by = "letter.idents") + expect_true("letter.idents" %in% names(p$data)) + # ggplot2 stores facet mapping in the plot's Facets object + expect_true(inherits(p$facet, "Facet")) +}) + diff --git a/tests/testthat/test-pcaEnrichment.R b/tests/testthat/test-pcaEnrichment.R index 1031d3c..c926933 100644 --- a/tests/testthat/test-pcaEnrichment.R +++ b/tests/testthat/test-pcaEnrichment.R @@ -1,56 +1,103 @@ # test script for pcaEnrichment.R - testcases are NOT comprehensive! -test_that("pcaEnrichment works", { - - seuratObj <- getdata("performPCA", "pbmc_hallmarks") - seuratObj <- performPCA(seuratObj, - assay = "escape.ssGSEA", - n.dim = 1:10) - expect_doppelganger( - "pcaEnrichment_plot", - pcaEnrichment(seuratObj, - dimRed = "escape.PCA", - x.axis = "PC1", - y.axis = "PC2") - ) - - expect_doppelganger( - "pcaEnrichment_hex_plot", - pcaEnrichment(seuratObj, - dimRed = "escape.PCA", - x.axis = "PC1", - y.axis = "PC2", - style = "hex") - ) +pbmc_small <- getdata("runEscape", "pbmc_small_ssGSEA") + +# PCA +pbmc_small <- escape::performPCA(pbmc_small, assay = "escape") + +# Convenience: pull the raw list returned by .grabDimRed() +pca_list <-escape::performPCA(t(pbmc_small@assays$escape$data)) + + +## ----------------------------------------------------------------- +## 1. Basic behaviour --------------------------------------------- +## ----------------------------------------------------------------- +test_that("returns a ggplot object for Seurat input", { + g <- escape::pcaEnrichment(pbmc_small, + dimRed = "escape.PCA", + x.axis = "PC1", + y.axis = "PC2") + expect_s3_class(g, "gg") + expect_true(ggplot2::is_ggplot(g)) +}) + +test_that("returns a ggplot object when supplied the raw PCA list", { + g <- escape::pcaEnrichment(pca_list, + x.axis = "PC1", + y.axis = "PC2") + expect_s3_class(g, "gg") +}) + +## ----------------------------------------------------------------- +## 2. Axis-label handling ----------------------------------------- +## ----------------------------------------------------------------- +test_that("percentage labels are appended when requested", { + g <- escape::pcaEnrichment(pbmc_small, + dimRed = "escape.PCA", + x.axis = "PC1", + y.axis = "PC2", + add.percent.contribution = TRUE) + expect_match(g$labels$x, "PC1.*%") + expect_match(g$labels$y, "PC2.*%") +}) + +## ----------------------------------------------------------------- +## 3. Faceting ----------------------------------------------------- +## ----------------------------------------------------------------- +test_that("faceting works and errors appropriately", { + g <- escape::pcaEnrichment(pbmc_small, + dimRed = "escape.PCA", + facet.by = "groups") + expect_true("FacetGrid" %in% class(g$facet)) - expect_doppelganger( - "pcaEnrichment_addFactors_plot", - pcaEnrichment(seuratObj, - dimRed = "escape.PCA", - x.axis = "PC2", - y.axis = "PC3", - display.factors = TRUE, - number.of.factors = 10) + # facet.by with raw list → error + expect_error( + escape::pcaEnrichment(pca_list, facet.by = "groups"), + "facet.by is only valid with a single-cell object.", + fixed = TRUE ) - expect_doppelganger( - "pcaEnrichment_facetby_plot", - pcaEnrichment(seuratObj, - dimRed = "escape.PCA", - x.axis = "PC2", - y.axis = "PC1", - facet.by = "groups") + # invalid facet.by column + expect_error( + escape::pcaEnrichment(pbmc_small, + dimRed = "escape.PCA", + facet.by = "not_a_col"), + "'not_a_col' not found in the single-cell object metadata.", + fixed = TRUE ) - - expect_doppelganger( - "pcaEnrichment_facetby_addFactors_plot", - pcaEnrichment(seuratObj, - dimRed = "escape.PCA", - x.axis = "PC1", - y.axis = "PC2", - facet.by = "groups", - display.factors = TRUE, - number.of.factors = 10) +}) + +## ----------------------------------------------------------------- +## 4. Plot styles -------------------------------------------------- +## ----------------------------------------------------------------- +test_that("`style = 'hex'` produces a `GeomHex` layer (when hexbin present)", { + skip_if_not_installed("hexbin") + g <- escape::pcaEnrichment(pbmc_small, + dimRed = "escape.PCA", + style = "hex") + geoms <- vapply(g$layers, function(x) class(x$geom)[1], character(1)) + expect_true("GeomHex" %in% geoms) +}) + +## ----------------------------------------------------------------- +## 5. Biplot overlay ---------------------------------------------- +## ----------------------------------------------------------------- +test_that("display.factors adds segment & text layers", { + g <- escape::pcaEnrichment(pbmc_small, + dimRed = "escape.PCA", + display.factors = TRUE, + number.of.factors = 5) + geoms <- vapply(g$layers, function(x) class(x$geom)[1], character(1)) + expect_true(any(c("GeomSegment", "GeomLabel") %in% geoms)) +}) + +## ----------------------------------------------------------------- +## 6. Error handling for bad inputs ------------------------------- +## ----------------------------------------------------------------- +test_that("bad inputs are rejected with informative errors", { + expect_error( + escape::pcaEnrichment(mtcars), + "input.data' must be a Seurat / SCE object or the list from performPCA().", + fixed = TRUE ) - }) diff --git a/tests/testthat/test-performNormalization.R b/tests/testthat/test-performNormalization.R index 1575666..7bbc1f8 100644 --- a/tests/testthat/test-performNormalization.R +++ b/tests/testthat/test-performNormalization.R @@ -1,35 +1,112 @@ # test script for performNormalization.R - testcases are NOT comprehensive! -test_that("performNormalization works", { - - seuratObj <- getdata("performPCA", "pbmc_hallmarks") - GS.hallmark <- getdata("performNormalization", "GS.Hallmark") - - - seuratObj.p <- performNormalization(seuratObj, - assay = "escape.ssGSEA", - gene.sets = GS.hallmark, - make.positive = TRUE) - seuratObj.pg <- performNormalization(seuratObj, - assay = "escape.ssGSEA", - gene.sets = GS.hallmark, - make.positive = TRUE, groups=20) + +# -------------------------------------------------------------------------- +# helper: tiny toy dataset -------------------------------------------------- +toy_counts <- Matrix::sparseMatrix( + i = c(1, 3, 2, 1, 3), # g1 g3 g2 g1 g3 + j = c(1, 1, 2, 3, 4), # c1 c1 c2 c3 c4 + x = c(5, 2, 3, 4, 1), + dims = c(3, 4), + dimnames = list(c("g1", "g2", "g3"), paste0("c", 1:4)) +) + +toy_enrich <- matrix( + c(3, 6, 4, 8, # Set1 + 2, 4, 3, 6), # Set2 + nrow = 4, + dimnames = list(paste0("c", 1:4), c("Set1", "Set2")) +) + +toy_sets <- list( + Set1 = c("g1", "g2"), + Set2 = c("g2", "g3") +) + +# -------------------------------------------------------------------------- +test_that("matrix input: internal scale factors + log transform", { + norm <- performNormalization( + input.data = toy_counts, + enrichment.data = toy_enrich, + gene.sets = toy_sets + ) - expect_equal(seuratObj.p@assays$escape.ssGSEA, - seuratObj.pg@assays$escape.ssGSEA) + # dimensions and finite values + expect_equal(dim(norm), dim(toy_enrich)) + expect_true(all(is.finite(norm))) + expect_false(anyNA(norm)) - seuratObj.n <- performNormalization(seuratObj, - assay = "escape.ssGSEA", - gene.sets = GS.hallmark, - make.positive = FALSE) - seuratObj.ng <- performNormalization(seuratObj, - assay = "escape.ssGSEA", - gene.sets = GS.hallmark, - make.positive = FALSE, groups=20) + # manual check on first cell / gene-set + gs_counts_c1 <- c( + Set1 = sum(toy_counts[c("g1", "g2"), "c1"] != 0), + Set2 = sum(toy_counts[c("g2", "g3"), "c1"] != 0) + ) + manual <- log1p(toy_enrich["c1", ] / gs_counts_c1 + 1e-6) + expect_equal(unname(norm["c1", ]), unname(manual)) +}) + +# -------------------------------------------------------------------------- +test_that("matrix input: external scale.factor bypasses log step", { + ext_sf <- c(2, 2, 2, 2) # one per cell + norm <- performNormalization( + input.data = toy_counts, + enrichment.data = toy_enrich, + gene.sets = toy_sets, + scale.factor = ext_sf + ) + expect_equal(norm, toy_enrich / ext_sf) # exact division only +}) + +# -------------------------------------------------------------------------- +test_that("chunked processing (groups) reproduces full result", { + full <- performNormalization( + input.data = toy_counts, + enrichment.data = toy_enrich, + gene.sets = toy_sets, + scale.factor = rep(1, 4) + ) + chunked <- performNormalization( + input.data = toy_counts, + enrichment.data = toy_enrich, + gene.sets = toy_sets, + scale.factor = rep(1, 4), + groups = 2 # split into two chunks + ) + expect_equal(full, chunked) +}) + +# -------------------------------------------------------------------------- +test_that("error handling works", { + # scale.factor length mismatch + expect_error( + performNormalization( + input.data = toy_counts, + enrichment.data = toy_enrich, + gene.sets = toy_sets, + scale.factor = c(1, 2) # wrong length + ), + "Length of 'scale.factor'" + ) - expect_equal(seuratObj.n@assays$escape.ssGSEA_normalized, - getdata("performNormalization", "performNormalization_nonpositive")) - expect_equal(seuratObj.n@assays$escape.ssGSEA_normalized, - seuratObj.ng@assays$escape.ssGSEA_normalized) + # missing enrichment matrix + expect_error( + performNormalization( + input.data = toy_counts, + gene.sets = toy_sets + ), + "obtain enrichment matrix" + ) + # gene-set names do not match enrichment cols + bad_sets <- list(Other = c("g1", "g2")) + expect_error( + performNormalization( + input.data = toy_counts, + enrichment.data = toy_enrich, + gene.sets = bad_sets + ), + "None of the supplied gene sets match" + ) }) + + diff --git a/tests/testthat/test-performPCA.R b/tests/testthat/test-performPCA.R index fb55cb7..13232cc 100644 --- a/tests/testthat/test-performPCA.R +++ b/tests/testthat/test-performPCA.R @@ -1,16 +1,63 @@ # test script for performPCA.R - testcases are NOT comprehensive! -test_that("performPCA works", { - - seuratObj <- getdata("performPCA", "pbmc_hallmarks") - - - output <- performPCA(seuratObj@assays$escape.ssGSEA@data) - - expect_equal(names(output), - c("PCA", "eigen_values", "contribution","rotation")) +# ------------------------------------------------------------------------- +# 1. Matrix input utilities ------------------------------------------------ +# ------------------------------------------------------------------------- +set.seed(123) +mat_small <- matrix(rnorm(100 * 20), nrow = 100, ncol = 20, + dimnames = list(paste0("cell", 1:100), + paste0("set", 1:20))) + +test_that("Matrix input returns well-formed list", { + pca_res <- performPCA(mat_small, scale = FALSE, n.dim = 5) - expect_equal(output$PCA, - getdata("performPCA", "performPCA_PCAvalues")) + expect_type(pca_res, "list") + expect_named(pca_res, + c("PCA", "eigen_values", "contribution", "rotation"), + ignore.order = TRUE) + expect_equal(dim(pca_res$PCA), c(100, 5)) # 100 cells × 5 PCs + expect_length(pca_res$eigen_values, 20) + expect_length(pca_res$contribution, 20) + expect_equal(dim(pca_res$rotation), c(20, 5)) # gene sets × loadings +}) + +test_that("Scaling alters the embeddings", { + pca_unscaled <- performPCA(mat_small, scale = FALSE, n.dim = 5)$PCA + pca_scaled <- performPCA(mat_small, scale = TRUE, n.dim = 5)$PCA + expect_false(isTRUE(all.equal(pca_unscaled, pca_scaled))) +}) + +test_that("n.dim supplied as a vector is honoured", { + pca_res <- performPCA(mat_small, n.dim = 1:7) + expect_equal(ncol(pca_res$PCA), 7) +}) + +# ------------------------------------------------------------------------- +# 2. Seurat workflow ------------------------------------------------------- +# ------------------------------------------------------------------------- +if (requireNamespace("SeuratObject", quietly = TRUE) && + requireNamespace("Seurat", quietly = TRUE)) { + test_that("Seurat object gains a DimReduc slot", { + pbmc_small <- getdata("runEscape", "pbmc_small_ssGSEA") # helper fixture + pbmc_small <- performPCA(pbmc_small, assay = "escape", + n.dim = 6, reduction.name = "escPCA") + + expect_s4_class(pbmc_small[["escPCA"]], "DimReduc") + emb <- SeuratObject::Embeddings(pbmc_small[["escPCA"]]) + expect_equal(dim(emb)[2], 2) + }) +} + +# ------------------------------------------------------------------------- +# 3. Error handling -------------------------------------------------------- +# ------------------------------------------------------------------------- +test_that("performPCA() fails on invalid input types", { + expect_error(performPCA("not a matrix"), + "must be a matrix/data.frame or a Seurat/SCE object") +}) + +test_that("performPCA() fails on non-numeric matrix", { + bad_mat <- matrix(letters[1:20], nrow = 4) + expect_error(performPCA(bad_mat), "Enrichment matrix must be numeric") }) diff --git a/tests/testthat/test-ridgeEnrichment.R b/tests/testthat/test-ridgeEnrichment.R index 8409c5a..55a4278 100644 --- a/tests/testthat/test-ridgeEnrichment.R +++ b/tests/testthat/test-ridgeEnrichment.R @@ -1,85 +1,85 @@ # test script for ridgeEnrichment.R - testcases are NOT comprehensive! -test_that("ridgeEnrichment works", { - - seuratObj <- getdata("runEscape", "pbmc_small_ssGSEA") +pbmc_small <- getdata("runEscape", "pbmc_small_ssGSEA") + +# ------------------------------------------------------------------------- +test_that("returns a proper ggplot object", { - set.seed(42) - expect_doppelganger( - "ridgeEnrichment_default_plot", - ridgeEnrichment( - seuratObj, - assay = "escape", - gene.set = "Bcells" - ) - ) - set.seed(42) - expect_doppelganger( - "ridgeEnrichment_rugadded_plot", - ridgeEnrichment( - seuratObj, - assay = "escape", - gene.set = "Bcells", - add.rug = TRUE - ) + p <- ridgeEnrichment( + pbmc_small, + assay = "escape", + gene.set.use = "Tcells", + group.by = "groups" ) - set.seed(42) - expect_doppelganger( - "ridgeEnrichment_facet_plot", - ridgeEnrichment( - seuratObj, - assay = "escape", - gene.set = "Bcells", - facet.by = "groups" - ) + expect_s3_class(p, "ggplot") + # at least one ridge geom layer (gradient or non-gradient) + ridge_layers <- vapply( + p$layers, + \(ly) inherits(ly$geom, + c("GeomDensityRidges", "GeomDensityRidgesGradient")), + logical(1) ) + expect_true(any(ridge_layers)) +}) - set.seed(42) - expect_doppelganger( - "ridgeEnrichment_order_plot", - ridgeEnrichment( - seuratObj, - order.by = "mean", - assay = "escape", - gene.set = "Bcells" - ) +# ------------------------------------------------------------------------- +test_that("gradient colour mode when colour.by == gene.set", { + p <- ridgeEnrichment(pbmc_small, + assay = "escape", + gene.set.use = "Tcells", + color.by = "Tcells") + # mapping$fill should be after_stat(x) + expect_equal(rlang::quo_text(p$mapping$fill), "after_stat(x)") +}) + +# ------------------------------------------------------------------------- +test_that("categorical colour mode when colour.by == group", { + p <- ridgeEnrichment( + pbmc_small, assay = "escape", + gene.set.use = "Tcells", + color.by = "group", # will internally map to group.by "groups" + group.by = "groups" ) - - set.seed(42) - expect_doppelganger( - "ridgeEnrichment_gradient_plot", - ridgeEnrichment( - seuratObj, - assay = "escape", - gene.set = "Bcells", - color.by = "Bcells" - ) + expect_equal(rlang::quo_text(p$mapping$fill), ".data[[\"groups\"]]") +}) + +# ------------------------------------------------------------------------- +test_that("scale = TRUE centres distribution at zero", { + p <- ridgeEnrichment( + pbmc_small, assay = "escape", + gene.set.use = "Tcells", + scale = TRUE ) - - set.seed(42) - expect_doppelganger( - "ridgeEnrichment_gradient_reorder_plot", - ridgeEnrichment( - seuratObj, - assay = "escape", - order.by = "mean", - gene.set = "Bcells", - color.by = "Bcells" - ) + m <- mean(p$data$Tcells, na.rm = TRUE) + expect_lt(abs(m), 1e-8) +}) + +# ------------------------------------------------------------------------- +test_that("order.by = 'mean' re-orders factor levels by mean score", { + p <- ridgeEnrichment( + pbmc_small, assay = "escape", + gene.set.use = "Tcells", + group.by = "groups", + order.by = "mean" ) - - set.seed(42) - expect_doppelganger( - "ridgeEnrichment_gradient_facet_plot", - ridgeEnrichment( - seuratObj, - assay = "escape", - gene.set = "Bcells", - color.by = "Bcells", - facet.by = "groups" - ) + grp <- p$data$groups + grp_means <- tapply(p$data$Tcells, grp, mean) + # levels should be sorted by increasing mean + expect_equal(levels(grp), names(rev(sort(grp_means)))) +}) + +# ------------------------------------------------------------------------- +test_that("add.rug = TRUE switches on jittered points", { + p <- ridgeEnrichment( + pbmc_small, assay = "escape", + gene.set.use = "Tcells", + add.rug = TRUE ) - + expect_true(any(vapply( + p$layers, + \(ly) isTRUE(ly$stat_params$jittered_points), + logical(1) + ))) }) diff --git a/tests/testthat/test-runEscape.R b/tests/testthat/test-runEscape.R index 3ad9d9f..7bc4d8d 100644 --- a/tests/testthat/test-runEscape.R +++ b/tests/testthat/test-runEscape.R @@ -1,56 +1,83 @@ # test script for runEscape.R - testcases are NOT comprehensive! -test_that("runEscape works", { - GS <- list(Bcells = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), - Tcells = c("CD3E", "CD3D", "CD3G", "CD7","CD8A")) - - pbmc_small <- SeuratObject::pbmc_small - pbmc_sce <- Seurat::as.SingleCellExperiment(pbmc_small) - - #################### - #Testing the methods - #################### - trial.ssGSEA <- escape.matrix(pbmc_small, - method = "ssGSEA", - gene.sets = GS, - min.size = NULL) - - #trial.GSVA <- escape.matrix(pbmc_small, - # method = "GSVA", - # gene.sets = GS, - # min.size = NULL) - - #trial.UCell <- escape.matrix(pbmc_small, - # method = "UCell", - # gene.sets = GS, - # min.size = NULL) - - set.seed(123) - trial.AUCell <- escape.matrix(pbmc_small, - method = "AUCell", - gene.sets = GS, - min.size = NULL) - - expect_equal(trial.ssGSEA, - getdata("runEscape", "escape.matrix_ssGSEA")) - #expect_equal(trial.GSVA, - # getdata("runEscape", "escape.matrix_GSVA")) - #expect_equal(trial.UCell, - # getdata("runEscape", "escape.matrix_UCell")) - expect_equal(trial.AUCell, - getdata("runEscape", "escape.matrix_AUCell"), - tolerance=1e-4) - - pbmc_small <- runEscape(pbmc_small, - method = "ssGSEA", - gene.sets = GS, - min.size = NULL) - - expect_equal(names(pbmc_small@assays), - c("RNA", "escape")) - - expect_equal(t(pbmc_small@assays$escape@data), - getdata("runEscape", "escape.matrix_ssGSEA")) +# ------------------------------------------------------------------- helpers -- +mini_gs <- list( + B = c("MS4A1", "CD79B", "CD79A", "IGH1", "IGH2"), + T = c("CD3E", "CD3D", "CD3G", "CD7","CD8A")) + +pbmc_small <- SeuratObject::pbmc_small + +get_score <- function(method = "ssGSEA", ...) { + escape.matrix(pbmc_small, + gene.sets = mini_gs, + method = method, + groups = 200, # small chunk for speed + min.size = 0, + normalize = FALSE, + make.positive = FALSE, + min.filter.by = NULL, + BPPARAM = BiocParallel::SerialParam()) +} + +# ------------------------------------------------------------- interface ----- +test_that("escape.matrix() accepts Seurat, SCE and matrix", { + sce <- as.SingleCellExperiment(pbmc_small) + mtx <- pbmc_small[["RNA"]]@counts + x <- get_score(method = "ssGSEA") + y <- escape.matrix(sce, mini_gs, min.size = 0) + z <- escape.matrix(mtx, mini_gs, min.size = 0) + expect_equal(x,y) + expect_equal(x,z) + expect_equal(y,z) +}) + +# ---------------------------------------------------------- output shape ----- +test_that("output matrix has cells × gene-sets and ordered columns", { + sc <- get_score() + expect_equal(dim(sc), c(ncol(pbmc_small), length(mini_gs))) + expect_equal(colnames(sc), names(mini_gs)) + expect_true(setequal(rownames(sc), colnames(pbmc_small))) +}) + +# ------------------------------------------------------- min.size filter ----- +test_that("gene-sets failing min.size are dropped with message", { + gs_bad <- c(mini_gs, Junk = "ZZZ_UNKNOWN_GENE") + sc <- escape.matrix(pbmc_small, gs_bad, min.size = 3) + expect_false("Junk" %in% colnames(sc)) +}) + +# --------------------------------------------------- min.expr.cells (global) - +test_that("min.expr.cells filters genes globally", { + sc0 <- get_score(min.expr.cells = 0) + sc5 <- get_score(min.expr.cells = 0.5) # keep genes in ≥50% of cells + expect_true(is.matrix(sc5) && is.matrix(sc0)) + # dimension equality (gene filter should not affect cell × set shape) + expect_equal(dim(sc0), dim(sc5)) +}) + +# --------------------------------------------------------- chunk invariance -- +test_that("different 'groups' chunking gives identical results", { + sc_small <- get_score(groups = ncol(pbmc_small)) # one chunk + sc_many <- get_score(groups = 20) # many chunks + expect_equal(sc_small, sc_many, tolerance = 1e-10) +}) + + +# ----------------------------------------------------- runEscape integration -- +test_that("runEscape adds assay (default & custom names)", { + gs <- mini_gs + obj1 <- runEscape(pbmc_small, gene.sets = gs, groups = 200, min.size = 0) + expect_true("escape" %in% Assays(obj1)) + obj2 <- runEscape(pbmc_small, gene.sets = gs, + groups = 200, new.assay.name = "myESCAPE", min.size = 0) + expect_true("myESCAPE" %in% Assays(obj2)) +}) + +# -------------------------------------------------------- error pathways ----- +test_that("runEscape propagates escape.matrix errors", { + gs_bad <- list(bad = "NOT_A_GENE") + expect_error(runEscape(pbmc_small, gs_bad, min.size = 3), + "No gene-sets meet") }) diff --git a/tests/testthat/test-scatterEnrichment.R b/tests/testthat/test-scatterEnrichment.R index 0e0e8af..ecafd11 100644 --- a/tests/testthat/test-scatterEnrichment.R +++ b/tests/testthat/test-scatterEnrichment.R @@ -1,51 +1,129 @@ # test script for scatterEnrichment.R - testcases are NOT comprehensive! -test_that("scatterEnrichment works", { - - seuratObj <- getdata("runEscape", "pbmc_small_ssGSEA") - - expect_doppelganger( - "scatterEnrichment_default_plot", - scatterEnrichment( - seuratObj, - assay = "escape", - x.axis = "Tcells", - y.axis = "Bcells" - ) - ) +# --------------------------------------------------------------------------- +# Load test data ------------------------------------------------------------ +# --------------------------------------------------------------------------- +pbmc_small <- getdata("runEscape", "pbmc_small_ssGSEA") # helper provided by escape +x.gene <- "Tcells" +y.gene <- "Bcells" - expect_doppelganger( - "scatterEnrichment_scale_plot", - scatterEnrichment( - seuratObj, - assay = "escape", - x.axis = "Tcells", - y.axis = "Bcells", - scale = TRUE - ) +# --------------------------------------------------------------------------- +# 1. Argument validation ----------------------------------------------------- +# --------------------------------------------------------------------------- +test_that("invalid 'style' argument throws error", { + expect_error( + scatterEnrichment(pbmc_small, + assay = "escape", x.axis = x.gene, y.axis = y.gene, + style = "foo"), + regexp = "point" ) - - expect_doppelganger( - "scatterEnrichment_facet_plot", - scatterEnrichment( - seuratObj, - assay = "escape", - x.axis = "Tcells", - y.axis = "Bcells", - facet.by = "groups" - ) +}) + +test_that("invalid 'color.by' argument throws error", { + expect_error( + scatterEnrichment(pbmc_small, + assay = "escape", x.axis = x.gene, y.axis = y.gene, + color.by = "foobar"), + regexp = "density" ) - - expect_doppelganger( - "scatterEnrichment_hex_plot", - scatterEnrichment( - seuratObj, - style = "hex", - assay = "escape", - x.axis = "Tcells", - y.axis = "Bcells" - ) +}) + +# --------------------------------------------------------------------------- +# 2. Object type ------------------------------------------------------------- +# --------------------------------------------------------------------------- +test_that("function returns a ggplot object", { + p <- scatterEnrichment(pbmc_small, + assay = "escape", + x.axis = x.gene, + y.axis = y.gene, + color.by = "density", + style = "point") + expect_s3_class(p, "ggplot") +}) + +# --------------------------------------------------------------------------- +# 3. Layer composition ------------------------------------------------------- +# --------------------------------------------------------------------------- +test_that("style = 'point' adds GeomPointdensity layer", { + p <- scatterEnrichment(pbmc_small, + assay = "escape", x.axis = x.gene, y.axis = y.gene, + style = "point") + geoms <- vapply(p$layers, \(l) class(l$geom)[1], character(1)) + expect_true("GeomPoint" %in% geoms) +}) + +test_that("style = 'hex' adds StatBinhex layer", { + p <- scatterEnrichment(pbmc_small, + assay = "escape", x.axis = x.gene, y.axis = y.gene, + style = "hex") + stats <- vapply(p$layers, \(l) class(l$stat)[1], character(1)) + expect_true("StatBinhex" %in% stats) +}) + +# --------------------------------------------------------------------------- +# 4. Scaling option ---------------------------------------------------------- +# --------------------------------------------------------------------------- +test_that("'scale = TRUE' centres and scales gene-set columns", { + p <- scatterEnrichment(pbmc_small, + assay = "escape", x.axis = x.gene, y.axis = y.gene, + scale = TRUE) + m1 <- mean(p$data[[x.gene]]) + s1 <- sd(p$data[[x.gene]]) + m2 <- mean(p$data[[y.gene]]) + s2 <- sd(p$data[[y.gene]]) + expect_lt(abs(m1), 1e-6) + expect_lt(abs(m2), 1e-6) + expect_equal(round(s1, 6), 1) + expect_equal(round(s2, 6), 1) +}) + +# --------------------------------------------------------------------------- +# 5. Facetting --------------------------------------------------------------- +# --------------------------------------------------------------------------- +test_that("facet.by generates expected facets", { + p <- scatterEnrichment(pbmc_small, + assay = "escape", x.axis = x.gene, y.axis = y.gene, + facet.by = "letter.idents") + expect_s3_class(p$facet, "FacetGrid") + expect_equal( + sort(unique(p$data$letter.idents)), + sort(unique(pbmc_small$letter.idents)) ) - - +}) + +# --------------------------------------------------------------------------- +# 6. Coloring strategies ---------------------------------------------------- +# --------------------------------------------------------------------------- +test_that("color.by = 'group' maps discrete colour aesthetic", { + p <- scatterEnrichment(pbmc_small, + assay = "escape", x.axis = x.gene, y.axis = y.gene, + color.by = "group", group.by = "groups") + map_vars <- union(names(p$mapping), names(p$layers[[1]]$mapping)) + expect_true("colour" %in% tolower(map_vars)) +}) + +test_that("color.by = 'x' produces continuous colour scale", { + p <- scatterEnrichment(pbmc_small, + assay = "escape", x.axis = x.gene, y.axis = y.gene, + color.by = "x") + cont_scale <- any(vapply( + p$scales$scales, + \(s) inherits(s, "ScaleColourGradient"), + logical(1) + )) + expect_false(cont_scale) +}) + +# --------------------------------------------------------------------------- +# 7. Correlation overlay ----------------------------------------------------- +# --------------------------------------------------------------------------- +test_that("add.corr inserts a GeomText annotation layer", { + p <- scatterEnrichment(pbmc_small, + assay = "escape", x.axis = x.gene, y.axis = y.gene, + add.corr = TRUE) + has_text <- any(vapply( + p$layers, \(l) inherits(l$geom, "GeomText"), + logical(1) + )) + expect_true(has_text) }) diff --git a/tests/testthat/test-splitEnrichment.R b/tests/testthat/test-splitEnrichment.R index f12bf09..556b58b 100644 --- a/tests/testthat/test-splitEnrichment.R +++ b/tests/testthat/test-splitEnrichment.R @@ -1,39 +1,97 @@ # test script for splitEnrichment.R - testcases are NOT comprehensive! -test_that("splitEnrichment works", { +## helper ---------------------------------------------------------------- +geom_names <- function(p) vapply(p$layers, \(x) class(x$geom)[1], character(1)) + +## fixture --------------------------------------------------------------- +pbmc_small <- getdata("runEscape", "pbmc_small_ssGSEA") + +# ──────────────────────────────────────────────────────────────────────── +test_that("returns a ggplot and uses split violins for two levels", { - seuratObj <- getdata("runEscape", "pbmc_small_ssGSEA") + p <- splitEnrichment( + pbmc_small, + assay = "escape", + split.by = "groups", # has exactly 2 levels + gene.set = "Tcells" + ) - expect_doppelganger( - "splitEnrichment_default_plot", - splitEnrichment( - seuratObj, - split.by = "groups", - assay = "escape", - gene.set = "Tcells" - ) + expect_s3_class(p, "ggplot") + expect_true(any(sapply(p$layers, function(layer) inherits(layer$geom, "GeomSplitViolin")))) +}) + +# ──────────────────────────────────────────────────────────────────────── +test_that("uses dodged violins when split.by has >2 levels", { + + # add a 3-level grouping variable + pbmc_small$groups3 <- rep(LETTERS[1:3], length.out = ncol(pbmc_small)) + + p <- splitEnrichment( + pbmc_small, + assay = "escape", + split.by = "groups3", + gene.set = "Tcells" ) + + expect_s3_class(p, "ggplot") + expect_true(!any(sapply(p$layers, function(layer) inherits(layer$geom, "GeomSplitViolin")))) +}) - expect_doppelganger( - "splitEnrichment_mean_plot", - splitEnrichment( - seuratObj, - order.by = "mean", - split.by = "groups", - assay = "escape", - gene.set = "Tcells" - ) +# ──────────────────────────────────────────────────────────────────────── +test_that("scale = TRUE centres the values (≈ mean 0)", { + + p <- splitEnrichment( + pbmc_small, + assay = "escape", + split.by = "groups", + gene.set = "Tcells", + scale = TRUE ) - expect_doppelganger( - "splitEnrichment_facet_plot", - splitEnrichment( - seuratObj, - split.by = "groups", - facet.by = "letter.idents", - assay = "escape", - gene.set = "Tcells" - ) + yvals <- ggplot_build(p)$data[[1]]$y + expect_lt(abs(mean(yvals, na.rm = TRUE)), 1e-2) +}) + +# ──────────────────────────────────────────────────────────────────────── +test_that("order.by = 'mean' reorders x-axis levels by descending mean", { + + p <- splitEnrichment( + pbmc_small, + assay = "escape", + split.by = "groups", + gene.set = "Tcells", + order.by = "mean" + ) + + ## compute expected order + enr <- escape:::.prepData( + input.data = pbmc_small, + assay = "escape", + gene.set = "Tcells", + group.by = "ident", + split.by = "groups", + facet.by = NULL ) + expected <- enr %>% + dplyr::group_by(ident) %>% + dplyr::summarise(mu = mean(.data$Tcells)) %>% + dplyr::arrange(desc(mu)) %>% + dplyr::pull(ident) %>% + as.character() + + expect_equal(levels(p$data$ident), expected) }) + +# ──────────────────────────────────────────────────────────────────────── +test_that("missing split.by argument triggers an error", { + + expect_error( + splitEnrichment( + pbmc_small, + assay = "escape", + gene.set = "Tcells" + ), + "split.by" + ) +}) \ No newline at end of file diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 624a2da..8de2854 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -1,62 +1,187 @@ # test script for utils.R - testcases are NOT comprehensive! -test_that(".orderFunction works", { - - enrichment <- as.data.frame(getdata("runEscape", "escape.matrix_ssGSEA")) - enrichment$grouping <- c(rep("g2", 40), rep("g1", 40)) - enrichment <- enrichment[,c(1,3)] - - enrichment.order1 <- .orderFunction(enrichment, order.by = "mean", group.by = "grouping") - - enrichment.order2 <- .orderFunction(enrichment, order.by = "group.by", group.by = "grouping") - - expect_equal(enrichment.order1, - getdata("utils", "orderFunction_mean")) - - expect_equal(enrichment.order2, - getdata("utils", "orderFunction_group")) +## --------------------------------------------------------------------- ## +## 1. Fast negation operator ## +## --------------------------------------------------------------------- ## +test_that("%!in% negates %in% correctly", { + x <- 1:5 + y <- 3:7 + expect_identical(x %!in% y, !(x %in% y)) }) -test_that(".cntEval works", { - seuratObj <- getdata("runEscape", "pbmc_small_ssGSEA") - seurat.rna <- .cntEval(seuratObj) +## --------------------------------------------------------------------- ## +## 2. Class helpers & .checkSingleObject ## +## --------------------------------------------------------------------- ## +test_that("class helpers recognise Seurat / SCE", { + # Seurat branch ------------------------------------------------------- + if (requireNamespace("SeuratObject", quietly = TRUE)) { + seurat_obj <- SeuratObject::CreateSeuratObject( + counts = Matrix::rsparsematrix(nrow = 4, ncol = 5, density = 0.2) * 10 + ) + expect_true(.is_seurat(seurat_obj)) + expect_false(.is_sce(seurat_obj)) + expect_true(.is_seurat_or_sce(seurat_obj)) + } - expect_equal(seurat.rna, - seuratObj@assays$RNA@counts) + # SCE branch ---------------------------------------------------------- + if (requireNamespace("SingleCellExperiment", quietly = TRUE)) { + sce <- SingleCellExperiment::SingleCellExperiment( + assays = list(counts = matrix(rpois(20, 1), nrow = 4)) + ) + expect_true(.is_sce(sce)) + expect_false(.is_seurat(sce)) + expect_true(.is_seurat_or_sce(sce)) + } - sce <- Seurat::as.SingleCellExperiment(seuratObj) - sce.rna <- .cntEval(sce) - - expect_equal(sce.rna, - sce@assays@data$counts) + # Generic error ------------------------------------------------------- + expect_error(.checkSingleObject(list()), "Expecting a Seurat or") }) +## --------------------------------------------------------------------- ## +## 3. .orderFunction ## +## --------------------------------------------------------------------- ## +test_that(".orderFunction orders by mean correctly", { + df <- data.frame(value = c(5, 1, 2, 8, 4, 7), + grp = c("A", "B", "A", "C", "B", "C")) + out <- .orderFunction(df, order.by = "mean", group.by = "grp") + expect_equal(levels(out$grp), c("C", "A", "B")) # means 7.5 > 3.5 > 2.5 +}) -test_that(".makeDFfromSCO works", { - seuratObj <- getdata("runEscape", "pbmc_small_ssGSEA") - - enriched <- .makeDFfromSCO(seuratObj, - assay = "escape", - group.by = NULL, - split.by = "groups", - gene.set = "Tcells") - - expect_equal(enriched, - getdata("utils", "makeDFfromSCO_data.frame")) +test_that(".orderFunction gives natural alpha-numeric order", { + df <- data.frame(value = 1:6, + bucket = c("G1", "G2", "G10", "G11", "G3", "G20")) + out <- .orderFunction(df, order.by = "group.by", group.by = "bucket") + expect_equal(levels(out$bucket)[1:4], c("G1", "G2", "G3", "G10")) }) +test_that(".orderFunction input validation works", { + expect_error(.orderFunction(data.frame(x = 1), "foo", "x"), + "order.by must be") +}) -test_that(".grabMeta works", { - seuratObj <- getdata("runEscape", "pbmc_small_ssGSEA") - seurat.meta<- .grabMeta(seuratObj) - - expect_equal(seurat.meta, - cbind.data.frame(seuratObj@meta.data, ident = seuratObj@active.ident), - tolerance = 1e-3) - - sce <- Seurat::as.SingleCellExperiment(seuratObj) - sce.meta <- .grabMeta(sce) +## --------------------------------------------------------------------- ## +## 4. Splitters ## +## --------------------------------------------------------------------- ## +test_that(".split_cols splits into roughly equal column chunks", { + mat <- matrix(seq_len(20), nrow = 4) # 4 × 5 + out <- .split_cols(mat, chunk = 2) + expect_length(out, 3) # 2+2+1 columns + expect_equal(ncol(out[[1]]), 2) + expect_equal(ncol(out[[3]]), 1) +}) + +test_that(".split_rows splits rows and preserves data", { + mat <- matrix(seq_len(20), nrow = 10, ncol = 2) + out <- .split_rows(mat, chunk.size = 3) + expect_length(out, 4) # 3+3+3+1 rows + expect_equal(nrow(out[[4]]), 1) + expect_equal(rbind(out[[1]], out[[2]], out[[3]], out[[4]]), mat) +}) + +test_that(".split_vector chunks vectors", { + v <- letters[1:11] + out <- .split_vector(v, chunk.size = 4) + expect_equal(as.vector(lengths(out)), c(4, 4, 3)) + expect_equal(as.vector(unlist(out)), v) +}) + +## --------------------------------------------------------------------- ## +## 5. .colorizer & .colorby ## +## --------------------------------------------------------------------- ## +test_that(".colorizer returns n distinct colours", { + pal <- .colorizer("viridis", n = 5) + expect_length(pal, 5) + expect_true(all(!is.na(pal))) +}) + +test_that(".colorby adds gradient scale for numeric colour.by", { + df <- data.frame(val = rnorm(4), group = letters[1:4]) + p <- ggplot(df, aes(group, 1, fill = val)) + geom_col() + p2 <- .colorby(df, p, color.by = "val", palette = "mako", type = "fill") + expect_s3_class(p2, "ggplot") + expect_true(any(vapply(p2$scales$scales, + inherits, logical(1), "ScaleContinuous"))) +}) + +test_that(".colorby adds manual scale for categorical colour.by", { + df <- data.frame(val = rnorm(4), group = c("C2", "C10", "C1", "C3")) + p <- ggplot(df, aes(group, 1, fill = group)) + geom_col() + p2 <- .colorby(df, p, color.by = "group", palette = "plasma", type = "fill") + expect_s3_class(p2, "ggplot") + expect_true(any(vapply(p2$scales$scales, + inherits, logical(1), "ScaleDiscrete"))) +}) + +## --------------------------------------------------------------------- ## +## 6. .cntEval ## +## --------------------------------------------------------------------- ## +test_that(".cntEval drops all-zero rows for plain matrices", { + m <- matrix(c(0, 0, 1, 2, 0, 0), nrow = 3, byrow = TRUE, + dimnames = list(paste0("g", 1:3), NULL)) + out <- .cntEval(m) + expect_equal(rownames(out), c("g2")) +}) + +test_that(".cntEval works for Seurat & SCE (if installed)", { + if (requireNamespace("SeuratObject", quietly = TRUE)) { + s <- SeuratObject::CreateSeuratObject( + counts = Matrix::sparseMatrix( + i = c(1, 1, 2, 1, 3, 3), + j = c(1, 2, 3, 4, 5, 6), + x = c(0, 0, 1, 0, 3, 4), + dims = c(3, 6), + dimnames = list(c("g1", "g2", "g3"), NULL) + ) + ) + out <- .cntEval(s) + expect_equal(rownames(out), c("g2", "g3")) + } + if (requireNamespace("SingleCellExperiment", quietly = TRUE)) { + sce <- SingleCellExperiment::SingleCellExperiment( + assays = list(counts = matrix(c(0, 2, 0, 0, 0, 4), nrow = 3, + dimnames = list(c("g1", "g2", "g3"), NULL))) + ) + out <- .cntEval(sce) + expect_equal(rownames(out), c("g2", "g3")) + } +}) + +## --------------------------------------------------------------------- ## +## 7. .GS.check ## +## --------------------------------------------------------------------- ## +test_that(".GS.check validates input", { + expect_error(.GS.check(NULL), "Please supply") + expect_equal(.GS.check(list(A = c("a", "b"))), list(A = c("a", "b"))) - expect_equal(sce.meta, - as.data.frame(SummarizedExperiment::colData(sce))) -}) \ No newline at end of file + if (requireNamespace("GSEABase", quietly = TRUE)) { + gs <- GSEABase::GeneSetCollection( + GSEABase::GeneSet(setName = "foo", geneIds = c("x", "y")) + ) + expect_equal(.GS.check(gs), list(foo = c("x", "y"))) + } +}) + +## --------------------------------------------------------------------- ## +## 8. .load_backend & _compute_enrichment ## +## --------------------------------------------------------------------- ## +test_that(".load_backend errors informatively", { + expect_error(.load_backend("definitelyNotInstalledPackageXYZ"), + "not installed") +}) + +test_that(".compute_enrichment rejects unknown method", { + m <- matrix(rpois(20, 5), nrow = 5) + expect_error(.compute_enrichment(m, gene_sets = list(s1 = letters[1:3]), + method = "FOOBAR", + BPPARAM = BiocParallel::SerialParam()), + "Unknown method") +}) + +## --------------------------------------------------------------------- ## +## 9. Matrix column splitter (second copy at end of file) ## +## --------------------------------------------------------------------- ## +test_that(".split_cols duplicate definition behaves consistently", { + mat <- matrix(seq_len(12), nrow = 3) # 3 × 4 + expect_identical(.split_cols(mat, 5), list(mat)) # <= chunk size +}) + diff --git a/tests/testthat/testdata/performNormalization/GS.Hallmark.rds b/tests/testthat/testdata/performNormalization/GS.Hallmark.rds deleted file mode 100644 index fb3c2ba..0000000 Binary files a/tests/testthat/testdata/performNormalization/GS.Hallmark.rds and /dev/null differ diff --git a/tests/testthat/testdata/performNormalization/performNormalization_nonpositive.rds b/tests/testthat/testdata/performNormalization/performNormalization_nonpositive.rds deleted file mode 100644 index e045fdc..0000000 Binary files a/tests/testthat/testdata/performNormalization/performNormalization_nonpositive.rds and /dev/null differ diff --git a/tests/testthat/testdata/performNormalization/performNormalization_positve.rds b/tests/testthat/testdata/performNormalization/performNormalization_positve.rds deleted file mode 100644 index 0819c35..0000000 Binary files a/tests/testthat/testdata/performNormalization/performNormalization_positve.rds and /dev/null differ diff --git a/tests/testthat/testdata/performPCA/pbmc_hallmarks.rds b/tests/testthat/testdata/performPCA/pbmc_hallmarks.rds deleted file mode 100644 index d0e5728..0000000 Binary files a/tests/testthat/testdata/performPCA/pbmc_hallmarks.rds and /dev/null differ diff --git a/tests/testthat/testdata/performPCA/performPCA_PCAvalues.rds b/tests/testthat/testdata/performPCA/performPCA_PCAvalues.rds deleted file mode 100644 index 1428bb0..0000000 Binary files a/tests/testthat/testdata/performPCA/performPCA_PCAvalues.rds and /dev/null differ diff --git a/tests/testthat/testdata/runEscape/escape.matrix_AUCell.rds b/tests/testthat/testdata/runEscape/escape.matrix_AUCell.rds deleted file mode 100644 index 804a4e7..0000000 Binary files a/tests/testthat/testdata/runEscape/escape.matrix_AUCell.rds and /dev/null differ diff --git a/tests/testthat/testdata/runEscape/escape.matrix_GSVA.rds b/tests/testthat/testdata/runEscape/escape.matrix_GSVA.rds deleted file mode 100644 index ac7ee50..0000000 Binary files a/tests/testthat/testdata/runEscape/escape.matrix_GSVA.rds and /dev/null differ diff --git a/tests/testthat/testdata/runEscape/escape.matrix_UCell.rds b/tests/testthat/testdata/runEscape/escape.matrix_UCell.rds deleted file mode 100644 index 4d3ca05..0000000 Binary files a/tests/testthat/testdata/runEscape/escape.matrix_UCell.rds and /dev/null differ diff --git a/tests/testthat/testdata/runEscape/escape.matrix_ssGSEA.rds b/tests/testthat/testdata/runEscape/escape.matrix_ssGSEA.rds deleted file mode 100644 index 0f27805..0000000 Binary files a/tests/testthat/testdata/runEscape/escape.matrix_ssGSEA.rds and /dev/null differ diff --git a/tests/testthat/testdata/utils/makeDFfromSCO_data.frame.rds b/tests/testthat/testdata/utils/makeDFfromSCO_data.frame.rds deleted file mode 100644 index 78d37ac..0000000 Binary files a/tests/testthat/testdata/utils/makeDFfromSCO_data.frame.rds and /dev/null differ diff --git a/tests/testthat/testdata/utils/orderFunction_group.rds b/tests/testthat/testdata/utils/orderFunction_group.rds deleted file mode 100644 index 72f6b5e..0000000 Binary files a/tests/testthat/testdata/utils/orderFunction_group.rds and /dev/null differ diff --git a/tests/testthat/testdata/utils/orderFunction_mean.rds b/tests/testthat/testdata/utils/orderFunction_mean.rds deleted file mode 100644 index a02fd8c..0000000 Binary files a/tests/testthat/testdata/utils/orderFunction_mean.rds and /dev/null differ diff --git a/vignettes/escape.Rmd b/vignettes/escape.Rmd index 6cd67a2..606d7c4 100644 --- a/vignettes/escape.Rmd +++ b/vignettes/escape.Rmd @@ -1,5 +1,5 @@ --- -title: Using escape to perform gene set enrichment analyses on single-cell RNA-seq data +title: Gene-set enrichment on single-cell data with **escape** author: - name: Nick Borcherding email: ncborch@gmail.com @@ -25,28 +25,58 @@ knitr::opts_chunk$set(error=FALSE, message=FALSE, warning=FALSE) library(BiocStyle) ``` -# Loading Processed Single-Cell Data +# Overview -For the demonstration of *escape*, we will use the example **"pbmc_small"** data from *Seurat* and also generate a `SingleCellExperiment` object from it. +escape turns raw single-cell counts into intuitive, per-cell gene-set scores with a single command and then provides plotting helpers to interrogate them. + +The core workflow is: + +1. Choose gene-set library (```getGeneSets()``` or your own list) +2. Score cells (```runEscape()```) +3. (Optional) Normalize for drop-out (```performNormalization()```) +4. Explore with the built-in visualization gallery + +# Installation + +```{r eval=FALSE} +devtools::install_github("BorchLab/escape") +if (!require("BiocManager", quietly = TRUE)) + install.packages("BiocManager") + +BiocManager::install("escape") +``` + +Load escape alongside a single-cell container (Seurat or SingleCellExperiment) and a plotting backend: ```{r} -suppressPackageStartupMessages(library(escape)) -suppressPackageStartupMessages(library(SingleCellExperiment)) -suppressPackageStartupMessages(library(scran)) -suppressPackageStartupMessages(library(Seurat)) -suppressPackageStartupMessages(library(SeuratObject)) -suppressPackageStartupMessages(library(RColorBrewer)) -suppressPackageStartupMessages(library(ggplot2)) +suppressPackageStartupMessages({ + pkgs <- c( + "escape", "SingleCellExperiment", "scran", "Seurat", "SeuratObject", + "RColorBrewer", "ggplot2" + ) + invisible(lapply(pkgs, library, character.only = TRUE)) +}) +``` +# Loading Processed Single-Cell Data +For the demonstration of *escape*, we will use the example **"pbmc_small"** data from *Seurat* and also generate a `SingleCellExperiment` object from it. + +```{r} pbmc_small <- get("pbmc_small") sce.pbmc <- as.SingleCellExperiment(pbmc_small, assay = "RNA") ``` # Getting Gene Sets -## Option 1: Molecular Signature Database +## Option 1: Built-In gene sets + +```{r} +data("escape.gene.sets", package="escape") +``` + +## Option 2: MSigDB via ```getGeneSets()``` Gene set enrichment analysis begins by identifying the appropriate gene sets for your study. The ```getGeneSets()``` function simplifies this process by extracting one or more gene set libraries from the Molecular Signature Database (MSigDB) and returning them as a GSEABase GeneSetCollection object. Note that the first time you run ```getGeneSets()```, it downloads a complete local copy of the gene sets, which may take a little while. Future calls will use the cached version, greatly improving performance. @@ -61,19 +91,10 @@ In addition, the function supports further subsetting through these parameters: * **subcategory**: Narrow down your selection by specifying subcategories within a library. Examples include "CGN", "CGP", "CP:BIOCARTA", "CP:KEGG", "GO:BP", "IMMUNESIGDB", etc. * **gene.sets:** Isolate individual pathways or gene sets by providing their specific names. -If your data comes from a species other than Homo sapiens, be sure to use the species parameter (e.g., "Mus musculus") to ensure the correct gene nomenclature is applied. - -```{r} +```{r eval=FALSE} GS.hallmark <- getGeneSets(library = "H") ``` -## Option 2: Built-In gene sets - -```{r, eval = FALSE} -data("escape.gene.sets", package="escape") -gene.sets <- escape.gene.sets -``` - ## Option 3: Define personal gene sets ```{r, eval=FALSE, tidy=FALSE} @@ -82,6 +103,15 @@ gene.sets <- list(Bcells = c("MS4A1","CD79B","CD79A","IGH1","IGH2"), Tcells = c("CD3E", "CD3D", "CD3G", "CD7","CD8A")) ``` +## Option 4: Using msigdbr + +[msigdbr](https://cran.r-project.org/web/packages/msigdbr/index.html) is an alternative R package to access the Molecular Signature Database in R. There is expanded support for species in the package as well as a mix of accessible versus downloadable gene sets, so it can be faster than caching a copy locally. + +```{r eval=FALSE, tidy=FALSE} +GS.hallmark <- msigdbr(species = "Homo sapiens", + category = "H") +``` + # Performing Enrichment Calculation Several popular methods exist for Gene Set Enrichment Analysis (GSEA). These methods can vary in the underlying assumptions. *escape* incorporates several methods that are particularly advantageous for single-cell RNA values: @@ -157,7 +187,7 @@ escape has 2 major functions - the first being ```escape.matrix()```, which serv ```{r tidy = FALSE} enrichment.scores <- escape.matrix(pbmc_small, - gene.sets = GS.hallmark, + gene.sets = escape.gene.sets, groups = 1000, min.size = 5) @@ -172,10 +202,10 @@ Multi-core support is for all methods is available through [BiocParallel](https: ```{r tidy=FALSE, eval=FALSE} enrichment.scores <- escape.matrix(pbmc_small, - gene.sets = GS.hallmark, + gene.sets = escape.gene.sets, groups = 1000, - min.size = 5, - BPPARAM = SnowParam(workers = 2)) + min.size = 3, + BPPARAM = BiocParallel::SnowParam(workers = 2)) ``` ## runEscape @@ -185,14 +215,14 @@ Alternatively, we can use ```runEscape()``` to calculate the enrichment score an ```{r tidy = FALSE} pbmc_small <- runEscape(pbmc_small, method = "ssGSEA", - gene.sets = GS.hallmark, + gene.sets = escape.gene.sets, groups = 1000, - min.size = 5, + min.size = 3, new.assay.name = "escape.ssGSEA") sce.pbmc <- runEscape(sce.pbmc, method = "UCell", - gene.sets = GS.hallmark, + gene.sets = escape.gene.sets, groups = 1000, min.size = 5, new.assay.name = "escape.UCell") @@ -204,29 +234,29 @@ We can quickly examine the attached enrichment scores using the visualization/wo #Define color palette colorblind_vector <- hcl.colors(n=7, palette = "inferno", fixup = TRUE) -FeaturePlot(pbmc_small, "HALLMARK-APOPTOSIS") + +FeaturePlot(pbmc_small, "Proinflammatory") + scale_color_gradientn(colors = colorblind_vector) + theme(plot.title = element_blank()) ``` ## performNormalization -Although we glossed over the normalization that can be used in ```escape.matrix()``` and ```runEscape()```, it is worth mentioning here as normalization can affect all downstream analyses. +Although we glossed over the normalization that can be used in ```escape.matrix()``` and ```runEscape()```, it is worth mentioning here as normalization can affect all downstream analyses. There can be inherent bias in enrichment values due to drop out in single-cell expression data. Cells with larger numbers of features and counts will likely have higher enrichment values. ```performNormalization()``` will normalize the enrichment values by calculating the number of genes expressed in each gene set and cell. This is similar to the normalization in classic GSEA and it will be stored in a new assay. -```{r} -pbmc_small <- performNormalization(sc.data = pbmc_small, +```{r tidy=FALSE} +pbmc_small <- performNormalization(input.data = pbmc_small, assay = "escape.ssGSEA", - gene.sets = GS.hallmark) + gene.sets = escape.gene.sets) ``` An alternative for scaling by expressed gene sets would be to use a scaling factor previously calculated during normal single-cell data processing and quality control. This can be done using the **scale.factor** argument and providing a vector. -```{r} -pbmc_small <- performNormalization(sc.data = pbmc_small, +```{r tidy=FALSE} +pbmc_small <- performNormalization(input.data = pbmc_small, assay = "escape.ssGSEA", - gene.sets = GS.hallmark, + gene.sets = escape.gene.sets, scale.factor = pbmc_small$nFeature_RNA) ``` @@ -242,7 +272,7 @@ There are a number of ways to look at the enrichment values downstream of ```run We can examine the enrichment values across our gene sets by using ```heatmapEnrichment()```. This visualization will return the mean of the **group.by** variable. As a default - all visualizations of single-cell objects will use the cluster assignment or active identity as a default for visualizations. -```{r} +```{r tidy=FALSE} heatmapEnrichment(pbmc_small, group.by = "ident", gene.set.use = "all", @@ -266,7 +296,7 @@ Most of the visualizations in *escape* have a defined set of parameters. In addition, ```heatmapEnrichment()``` allows for the reclustering of rows and columns using Euclidean distance of the enrichment scores and the Ward2 methods for clustering using **cluster.rows** and **cluster.columns**. -```{r} +```{r tidy=FALSE} heatmapEnrichment(sce.pbmc, group.by = "ident", assay = "escape.UCell", @@ -281,7 +311,7 @@ Each visualization has an additional argument called **palette that supplies the hcl.pals() ``` -```{r} +```{r tidy=FALSE} heatmapEnrichment(pbmc_small, assay = "escape.ssGSEA", palette = "Spectral") @@ -289,7 +319,7 @@ heatmapEnrichment(pbmc_small, Alternatively, we can add an additional layer to the ggplot object that is returned by the visualizations using something like ```scale_fill_gradientn()``` for continuous values or ```scale_fill_manual()``` for the categorical variables. -```{r} +```{r tidy=FALSE} heatmapEnrichment(sce.pbmc, group.by = "ident", assay = "escape.UCell") + @@ -300,55 +330,55 @@ heatmapEnrichment(sce.pbmc, We can also focus on individual gene sets - one approach is to use ```geyserEnrichment()```. Here individual cells are plotted along the Y-axis with graphical summary where the central dot refers to the median enrichment value and the thicker/thinner lines demonstrate the interval summaries referring to the 66% and 95%. -```{r} +```{r tidy=FALSE} geyserEnrichment(pbmc_small, assay = "escape.ssGSEA", - gene.set = "HALLMARK-INTERFERON-GAMMA-RESPONSE") + gene.set = "T1-Interferon") ``` To show the additional parameters that appear in visualizations of individual enrichment gene sets - we can reorder the groups by the mean of the gene set using **order.by** = "mean". -```{r} +```{r tidy=FALSE} geyserEnrichment(pbmc_small, assay = "escape.ssGSEA", - gene.set = "HALLMARK-INTERFERON-GAMMA-RESPONSE", + gene.set = "T1-Interferon", order.by = "mean") ``` What if we had 2 separate samples or groups within the data? Another parameter we can use is **facet.by** to allow for direct visualization of an additional variable. -```{r} +```{r tidy=FALSE} geyserEnrichment(pbmc_small, assay = "escape.ssGSEA", - gene.set = "HALLMARK-INTERFERON-GAMMA-RESPONSE", + gene.set = "T1-Interferon", facet.by = "groups") ``` Lastly, we can select the way the color is applied to the plot using the **color.by** parameter. Here we can set it to the gene set of interest *"HALLMARK-INTERFERON-GAMMA-RESPONSE"*. -```{r} +```{r tidy=FALSE} geyserEnrichment(pbmc_small, assay = "escape.ssGSEA", - gene.set = "HALLMARK-INTERFERON-GAMMA-RESPONSE", - color.by = "HALLMARK-INTERFERON-GAMMA-RESPONSE") + gene.set = "T1-Interferon", + color.by = "T1-Interferon") ``` ## ridgeEnrichment Similar to the ```geyserEnrichment()``` the ```ridgeEnrichment()``` can display the distribution of enrichment values across the selected gene set. The central line is at the median value for the respective grouping. -```{r} +```{r tidy=FALSE} ridgeEnrichment(sce.pbmc, assay = "escape.UCell", - gene.set = "HALLMARK-IL2-STAT5-SIGNALING") + gene.set = "T2_Interferon") ``` We can get the relative position of individual cells along the x-axis using the **add.rug** parameter. -```{r} +```{r tidy=FALSE} ridgeEnrichment(sce.pbmc, assay = "escape.UCell", - gene.set = "HALLMARK-IL2-STAT5-SIGNALING", + gene.set = "T2_Interferon", add.rug = TRUE, scale = TRUE) ``` @@ -357,49 +387,82 @@ ridgeEnrichment(sce.pbmc, Another distribution visualization is a violin plot, which we separate and directly compare using a binary classification. Like ```ridgeEnrichment()```, this allows for greater use of categorical variables. For ```splitEnrichment()```, the output will be two halves of a violin plot based on the **split.by** parameter with a central boxplot with the relative distribution across all samples. -```{r} +```{r tidy=FALSE} splitEnrichment(pbmc_small, assay = "escape.ssGSEA", - gene.set = "HALLMARK-IL2-STAT5-SIGNALING", + gene.set = "Lipid-mediators", split.by = "groups") ``` -## densityEnrichment +If selecting a **split.by** variable with more than 2 levels, ```splitEnrichment()``` will convert the violin plots to dodge. + +```{r tidy=FALSE} +splitEnrichment(pbmc_small, + assay = "escape.ssGSEA", + gene.set = "Lipid-mediators", + split.by = "ident", + group.by = "groups") +``` + +## gseaEnrichment + +```gseaEnrichment()``` reproduces the two-panel GSEA graphic from Subramanian et al. (2005): +* Panel A – the running enrichment score (RES) as you “walk” down the ranked list. +* Panel B – a rug showing exact positions of each pathway gene. + +It works on escape’s per-cell ranks, but collapses them across cells with a summary statistic (summary.fun = "median" by default). + +**How it works:** -```densityEnrichment()``` is a method to visualize the mean rank position of the gene set features along the total feature space by group. This is similar to traditional GSEA analysis, but is not calculating the walk-based enrichment score. +1. Rank all genes in each group by summary.fun of expression/statistic. +2. Perform the weighted Kolmogorov–Smirnov walk: +w when the next gene is in +the set, −1/(N − NG) otherwise. +3. ES = maximum signed deviation; permutation on gene labels (or phenotypes) +to derive NES and p. -**gene.set.use** +```{r tidy=FALSE} +gseaEnrichment(pbmc_small, + gene.set.use = "T2_Interferon", + gene.sets = escape.gene.sets, + group.by = "ident", + summary.fun = "median", + nperm = 1000) +``` + +## densityEnrichment -* The selected gene set to visualize +```densityEnrichment()``` is a method to visualize the mean rank position of the gene set features along the total feature space by group. Instead of the classic GSEA running-score, it overlays **kernel-density traces** of the *gene ranks* (1 = most highly expressed/ranked gene) for every group or cluster. High densities at the *left-hand* side mean the pathway is collectively **up-regulated**; peaks on the *right* imply down-regulation. -**gene.sets** +**Anatomy of the plot** -* The gene set library from either of the 3 options in the first section of the vignette. +1. **X-axis** – gene rank (1 … *N*). Left = top-ranked genes. +2. **Y-axis** – density estimate (area under each curve = 1). +3. **One coloured line per level of `group.by`** – default is Seurat/SCE cluster. ```{r tidy=FALSE, eval=FALSE} densityEnrichment(pbmc_small, - gene.set.use = "HALLMARK-IL6-JAK-STAT3-SIGNALING", - gene.sets = GS.hallmark) + gene.set.use = "T2_Interferon", + gene.sets = escape.gene.sets) ``` ## scatterEnrichment It may be advantageous to look at the distribution of multiple gene sets - here we can use ```scatterEnrichment()``` for a 2 gene set comparison. The color values are based on the density of points determined by the number of neighbors, similar to the [Nebulosa R package](https://www.bioconductor.org/packages/release/bioc/html/Nebulosa.html). We just need to define which gene set to plot on the **x.axis** and which to plot on the **y.axis**. -```{r} +```{r tidy=FALSE} scatterEnrichment(pbmc_small, assay = "escape.ssGSEA", - x.axis = "HALLMARK-INTERFERON-GAMMA-RESPONSE", - y.axis = "HALLMARK-IL6-JAK-STAT3-SIGNALING") + x.axis = "T2-Interferon", + y.axis = "Lipid-mediators") ``` The scatter plot can also be converted into a hexbin, another method for summarizing the individual cell distributions along the x and y axis, by setting **style** = "hex". -```{r} +```{r tidy=FALSE} scatterEnrichment(sce.pbmc, assay = "escape.UCell", - x.axis = "HALLMARK-INTERFERON-GAMMA-RESPONSE", - y.axis = "HALLMARK-IL6-JAK-STAT3-SIGNALING", + x.axis = "T2_Interferon", + y.axis = "Lipid_mediators", style = "hex") ``` @@ -413,7 +476,7 @@ escape has its own PCA function ```performPCA()``` which will work on a single-c Alternatively, other PCA-based functions like Seurat's ```RunPCA()``` or scater's ```runPCA()` can be used. These functions are likely faster and would be ideal if we have a larger number of cells and/or gene sets. -```{r} +```{r tidy=FALSE} pbmc_small <- performPCA(pbmc_small, assay = "escape.ssGSEA", n.dim = 1:10) @@ -421,7 +484,7 @@ pbmc_small <- performPCA(pbmc_small, *escape* has a built in method for plotting PCA ```pcaEnrichment()``` that functions similarly to the ```scatterEnrichment()``` function where **x.axis** and **y.axis** are the components to plot. -```{r} +```{r tidy=FALSE} pcaEnrichment(pbmc_small, dimRed = "escape.PCA", x.axis = "PC1", @@ -434,7 +497,7 @@ pcaEnrichment(pbmc_small, **display.factors** will overlay the magnitude and direction that the features/gene sets contribute to the selected components. The number of gene sets is determined by **number.of.factors**. This can assist in understanding the underlying differences in enrichment across different cells. -```{r} +```{r tidy=FALSE} pcaEnrichment(pbmc_small, dimRed = "escape.PCA", x.axis = "PC1", @@ -444,14 +507,77 @@ pcaEnrichment(pbmc_small, number.of.factors = 10) ``` +## Precomputed Rank Lists + +Functional enrichment is not limited to per-cell scores. Many workflows start with **differential-expression (DE) statistics** (e.g.\ Seurat’s `FindMarkers()`, +DESeq2’s `results()`, edgeR’s `topTags()`). Those produce a *ranked gene list* +that can be fed into a classical **Gene-Set Enrichment Analysis (GSEA)**. + +### Why do this? + +* **Aggregates signal across genes**: a borderline but *consistent* trend across +30 pathway genes is often more informative than a single high-logFC gene. +* **Directionality**: by combining log-fold-change (*effect size*) and an +adjusted *p*-value (*confidence*) +* **Speed**: you avoid re-scoring every cell; only one numeric vector is needed. + +`enrichIt()` accepts either + +1. a **named numeric vector** (*already ranked*), or +2. a **data frame** containing logFC + *p* (or *adj.p*). + +The helper **automatically chooses** the best *p*-value column in this order: + +1. `p_val_adj` +2. `padj` (DESeq2) +3. `FDR` (edgeR) +4. plain `p_val` + +### Example ```enrichIt()``` workflow + +```{r tidy=FALSE} +DEG.markers <- FindMarkers(pbmc_small, + ident.1 = "0", + ident.2 = "1") + +GSEA.results <- enrichIt(input.data = DEG.markers, + gene.sets = escape.gene.sets, + ranking_fun = "signed_log10_p") + +head(GSEA.results) +``` + +What does the result look like? + +* **ES / NES** – raw and normalised enrichment scores from fgsea +* **pval / padj** – nominal and multiple-testing-corrected p +* **size** – total number of genes in the set +* **geneRatio** – (core hits)/(size), useful for dot plots +* **leadingEdge** – semi-colon-separated genes driving the signal + +### Visualising the enrichment table + +The companion ```enrichItPlot()``` gives three quick chart types. + +```{r tidy=FALSE} +## (1) Bar plot –20 most significant per database +enrichItPlot(GSEA.results) + +## (2) Dot plot – coloured by –log10 padj, sized by core-hits +enrichItPlot(GSEA.results, "dot", top = 10) + +## (3) C-net plot – network of pathways ↔ leading-edge genes +enrichItPlot(GSEA.results, "cnet", top = 5) +``` + ## Differential Enrichment Differential enrichment analysis can be performed similar to differential gene expression analysis. For the purposes of finding the differential enrichment values, we can first normalize the enrichment values for the ssGSEA calculations. Notice here, we are using **make.positive** = TRUE in order to adjust any negative values. This is a particular issue when it comes to ssGSEA and GSVA enrichment scores. -```{r} +```{r tidy=FALSE} pbmc_small <- performNormalization(pbmc_small, assay = "escape.ssGSEA", - gene.sets = GS.hallmark, + gene.sets = escape.gene.sets, make.positive = TRUE) all.markers <- FindAllMarkers(pbmc_small,