Skip to content

Commit 41cbe68

Browse files
Merge pull request #166 from toobiwankenobi/geyserSummary
fix color.by bug and introduce summarise.by feature for geyserEnrichment
2 parents b013e24 + 7b09b86 commit 41cbe68

12 files changed

Lines changed: 174 additions & 57 deletions

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: escape
22
Title: Easy single cell analysis platform for enrichment
3-
Version: 2.5.4
3+
Version: 2.5.5
44
Authors@R: c(
55
person(given = "Nick", family = "Borcherding", role = c("aut", "cre"), email = "ncborch@gmail.com"),
66
person(given = "Jared", family = "Andrews", role = c("aut"), email = "jared.andrews07@gmail.com"),

NEWS.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
# 2.5.5 (2025-06-11)
2+
3+
## Bug fix & enhanced functionality
4+
* Enable ```color.by``` for both metadata columns and features (other gene sets)
5+
* Introduce ```summarise.by``` argument for ```geyserEnrichment()```
6+
* Enable scaling if color.by is another gene.set. Enable scaling for ```dgCMatrix```
7+
18
# 2.5.4 (2025-06-05)
29

310
## Bug fixes

R/geyserEnrichment.R

Lines changed: 64 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@
2121
#' *`"group"`* – natural sort of group labels;
2222
#' *`NULL`* – keep original ordering.
2323
#' @param facet.by Optional metadata column used to facet the plot.
24+
#' @param summarise.by Optional metadata column used to summarise data.
25+
#' @param summary.stat Optional method used to summarize expression within each
26+
#' group defined by \code{summarise.by}. One of: \code{"mean"} (default),
27+
#' \code{"median"}, \code{"max"}, \code{"sum"}, or \code{"geometric"}.
2428
#' @param scale Logical; if `TRUE` scores are centered/scaled (Z‑score) prior
2529
#' to plotting.
2630
#' @param palette Character. Any palette from \code{\link[grDevices]{hcl.pals}}.
@@ -50,6 +54,8 @@ geyserEnrichment <- function(input.data,
5054
order.by = NULL,
5155
scale = FALSE,
5256
facet.by = NULL,
57+
summarise.by = NULL,
58+
summary.stat = "mean",
5359
palette = "inferno") {
5460
## ---- 0) Sanity checks -----------------------------------------------------
5561
if (missing(gene.set) || length(gene.set) != 1L)
@@ -61,24 +67,69 @@ geyserEnrichment <- function(input.data,
6167
if (identical(color.by, "group"))
6268
color.by <- group.by
6369

64-
## ---- 1) Build tidy data.frame -------------------------------------------
70+
if (!is.null(summarise.by) && (identical(summarise.by, group.by) ||
71+
identical(summarise.by, facet.by)))
72+
stop("'summarise.by' cannot be the same as 'group.by' or 'facet.by'.
73+
Please choose a different metadata column.")
74+
75+
# ---- 1) helper to match summary function -------------------------
76+
summary_fun <- .match_summary_fun(summary.stat)
77+
78+
## ---- 2) Build tidy data.frame -------------------------------------------
6579
enriched <- .prepData(input.data, assay, gene.set, group.by,
66-
split.by = NULL, facet.by = facet.by)
80+
split.by = summarise.by, facet.by = facet.by, color.by = color.by)
81+
82+
# Define all grouping variables that must be metadata columns
83+
grouping_vars <- unique(c(summarise.by, group.by, facet.by))
84+
85+
# Determine if color.by is a feature
86+
all_features <- rownames(.cntEval(input.data, assay = assay, type = "data"))
87+
88+
# Determine if color.by is a feature
89+
is_feature_color <- !is.null(color.by) &&
90+
(color.by %in% all_features)
91+
92+
## Optionally summarise data with **base aggregate()** ----------------------
93+
if (!is.null(summarise.by)) {
94+
95+
# add color.by to summarise_vars if it is a feautre, otherwise add to grouping_vars
96+
summarise_vars <- unique(c(gene.set, if (is_feature_color) color.by))
97+
grouping_vars <- unique(c(grouping_vars, if (!is_feature_color) color.by))
98+
99+
# Perform aggregation
100+
enriched <- aggregate(enriched[summarise_vars],
101+
by = enriched[grouping_vars],
102+
FUN = summary_fun,
103+
simplify = TRUE)
104+
}
67105

68106
## Optionally Z‑transform ----------------------------------------------------
69-
if (scale)
70-
enriched[[gene.set]] <- as.numeric(scale(enriched[[gene.set]]))
107+
if (scale) {
108+
enriched[[gene.set]] <- scale(as.numeric(enriched[[gene.set]]))
109+
110+
# Also scale color.by if it's a feature
111+
if (is_feature_color) {
112+
enriched[[color.by]] <- scale(enriched[[color.by]])
113+
}
114+
}
71115

72116
## Optionally reorder groups -------------------------------------------------
73117
if (!is.null(order.by))
74118
enriched <- .orderFunction(enriched, order.by, group.by)
75119

76-
## ---- 2) Plot --------------------------------------------------------------
77-
plt <- ggplot(enriched, aes(x = .data[[group.by]],
78-
y = .data[[gene.set]],
79-
colour = .data[[color.by]])) +
120+
## ---- 3) Plot --------------------------------------------------------------
121+
if (!is.null(color.by))
122+
plt <- ggplot(enriched, aes(x = .data[[group.by]],
123+
y = .data[[gene.set]],
124+
group = .data[[group.by]],
125+
colour = .data[[color.by]]))
126+
else
127+
plt <- ggplot(enriched, aes(x = .data[[group.by]],
128+
y = .data[[gene.set]]),
129+
group = .data[[group.by]])
130+
80131
# Raw points --------------------------------------------------------------
81-
geom_jitter(width = 0.25, size = 1.5, alpha = 0.6, na.rm = TRUE) +
132+
plt <- plt + geom_jitter(width = 0.25, size = 1.5, alpha = 0.6, na.rm = TRUE) +
82133

83134
# White base interval + median point -------------------------------------
84135
stat_pointinterval(interval_size_range = c(2, 3), fatten_point = 1.4,
@@ -97,10 +148,11 @@ geyserEnrichment <- function(input.data,
97148
theme(legend.direction = "horizontal",
98149
legend.position = "bottom")
99150

100-
## ---- 3) Colour scale ------------------------------------------------------
101-
plt <- .colorby(enriched, plt, color.by, palette, type = "color")
151+
## ---- 4) Colour scale ------------------------------------------------------
152+
if (!is.null(color.by))
153+
plt <- .colorby(enriched, plt, color.by, palette, type = "color")
102154

103-
## ---- 4) Facetting ---------------------------------------------------------
155+
## ---- 5) Facetting ---------------------------------------------------------
104156
if (!is.null(facet.by))
105157
plt <- plt + facet_grid(as.formula(paste(".~", facet.by)))
106158

R/gseaEnrichment.R

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ gseaEnrichment <- function(input.data,
178178
ggplot2::geom_step(linewidth = 0.8) +
179179
ggplot2::geom_hline(yintercept = 0) +
180180
ggplot2::scale_colour_manual(values = cols, name = NULL) +
181-
ggplot2::labs(y = "Running Enrichment Score") +
181+
ggplot2::labs(y = paste0(gene.set.use, "\nRunning Enrichment Score")) +
182182
ggplot2::theme_classic() +
183183
ggplot2::theme(axis.title.x = element_blank(),
184184
axis.text.x = element_blank(),
@@ -194,7 +194,7 @@ gseaEnrichment <- function(input.data,
194194
axis.text.y = element_blank(),
195195
axis.ticks.y = element_blank(),
196196
panel.border = element_rect(fill = NA, colour = "black", linewidth = 0.5))
197-
198-
p_top / p_mid + patchwork::plot_layout(heights = c(3, 0.4))
197+
198+
patchwork::wrap_plots(p_top, p_mid, ncol = 1, heights = c(3, 0.4))
199199
}
200200

R/heatmapEnrichment.R

Lines changed: 5 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@
1717
#' @param facet.by Optional metadata column used to facet the plot.
1818
#' @param scale If \code{TRUE}, Z‑transforms each gene‑set column **after**
1919
#' summarization.
20-
#' @param summary.stat Method used to summarize expression within each
21-
#* group: one of `"mean"` (default), `"median"`, `"max"`,
22-
#*`"sum"`, or `"geometric"`
20+
#' @param summary.stat Optional method used to summarize expression within each
21+
#' group. One of: \code{"mean"} (default), \code{"median"}, \code{"max"},
22+
#' \code{"sum"}, or \code{"geometric"}.
2323
#' @param palette Character. Any palette from \code{\link[grDevices]{hcl.pals}}.
2424
#'
2525
#' @return A \code{ggplot2} object.
@@ -47,30 +47,15 @@ heatmapEnrichment <- function(input.data,
4747
palette = "inferno")
4848
{
4949
# ---------- 1. helper to match summary function -------------------------
50-
.match_summary_fun <- function(fun) {
51-
if (is.function(fun)) return(fun)
52-
if (!is.character(fun) || length(fun) != 1)
53-
stop("'summary.stat' must be a single character keyword or a function")
54-
kw <- tolower(fun)
55-
fn <- switch(kw,
56-
mean = base::mean,
57-
median = stats::median,
58-
sum = base::sum,
59-
sd = stats::sd,
60-
max = base::max,
61-
min = base::min,
62-
geometric = function(x) exp(mean(log(x + 1e-6))),
63-
stop("Unsupported summary keyword: ", fun))
64-
fn
65-
}
6650
summary_fun <- .match_summary_fun(summary.stat)
6751

6852
# ---------- 2. pull / tidy data -----------------------------------------
6953
if (is.null(group.by)) group.by <- "ident"
7054
df <- .prepData(input.data, assay, gene.set.use,
7155
group.by = group.by,
7256
split.by = NULL,
73-
facet.by = facet.by)
57+
facet.by = facet.by,
58+
color.by = NULL)
7459

7560
# Which columns contain gene-set scores?
7661
if (identical(gene.set.use, "all"))

R/ridgeEnrichment.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ ridgeEnrichment <- function(input.data,
6161

6262
## ---- 1 build long data.frame ---------------------------------------
6363
df <- .prepData(input.data, assay, gene.set.use, group.by,
64-
split.by = NULL, facet.by = facet.by)
64+
split.by = NULL, facet.by = facet.by, color.by = color.by)
6565

6666
## optional scaling (Z-transform per gene-set) -------------------------
6767
if (scale)

R/scatterEnrichment.R

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ scatterEnrichment <- function(input.data,
6969
gene.set <- c(x.axis, y.axis)
7070

7171
## ---- 1 Assemble long data-frame -----------------------------------------
72-
enriched <- .prepData(input.data, assay, gene.set, group.by, NULL, facet.by)
72+
enriched <- .prepData(input.data, assay, gene.set, group.by, NULL, facet.by,
73+
color.by = NULL)
7374

7475
if (scale) {
7576
enriched[, gene.set] <- apply(enriched[, gene.set, drop = FALSE], 2, scale)

R/splitEnrichment.R

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,8 @@ splitEnrichment <- function(input.data,
5656
if (is.null(group.by)) group.by <- "ident"
5757

5858
# Prepare tidy data with relevant metadata columns
59-
enriched <- .prepData(input.data, assay, gene.set.use, group.by, split.by, facet.by)
59+
enriched <- .prepData(input.data, assay, gene.set.use, group.by, split.by,
60+
facet.by, color.by = NULL)
6061

6162
# Determine the number of levels in the splitting variable
6263
split.levels <- unique(enriched[[split.by]])

R/utils.R

Lines changed: 76 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -41,40 +41,80 @@
4141
# DATA.frame BUILDERS ---------------------------------------------------------
4242
# -----------------------------------------------------------------------------
4343
.makeDFfromSCO <- function(input.data, assay = "escape", gene.set = NULL,
44-
group.by = NULL, split.by = NULL, facet.by = NULL) {
44+
group.by = NULL, split.by = NULL, facet.by = NULL, color.by = NULL) {
4545
if (is.null(assay))
4646
stop("Please provide assay name")
47-
cols <- unique(c(group.by, split.by, facet.by))
47+
48+
# Pull count matrix (features) and metadata
4849
cnts <- .cntEval(input.data, assay = assay, type = "data")
50+
features <- rownames(cnts)
51+
meta <- .grabMeta(input.data)
52+
meta.cols <- colnames(meta)
4953

50-
if (length(gene.set) == 1 && gene.set == "all")
51-
gene.set <- rownames(cnts)
54+
# All potential column-like arguments
55+
cols <- unique(c(group.by, split.by, facet.by, color.by))
5256

53-
meta <- .grabMeta(input.data)
54-
meta <- meta[, cols, drop = FALSE]
57+
# Check that each is either metadata or a feature
58+
bad.cols <- cols[!(cols %in% meta.cols | cols %in% features)]
59+
if (length(bad.cols) > 0) {
60+
stop("The following variables are not found in either metadata or features: ", paste(bad.cols, collapse = ", "))
61+
}
62+
63+
# Determine if color.by is a feature or meta
64+
is_feature_color <- !is.null(color.by) && color.by %in% features
65+
is_meta_color <- !is.null(color.by) && color.by %in% meta.cols
5566

67+
# Prepare metadata subset
68+
meta <- meta[, intersect(cols, meta.cols), drop = FALSE]
69+
70+
# Convert gene.set if "all"
71+
if (length(gene.set) == 1 && gene.set == "all") {
72+
gene.set <- features
73+
}
74+
75+
# Build data frame with expression values
5676
if (length(gene.set) == 1) {
5777
df <- cbind(value = cnts[gene.set, ], meta)
5878
colnames(df)[1] <- gene.set
5979
} else {
6080
df <- cbind(Matrix::t(cnts[gene.set, , drop = FALSE]), meta)
6181
}
62-
df
82+
83+
# Add color.by feature expression if it's a gene but not in gene.set
84+
if (is_feature_color && !(color.by %in% gene.set)) {
85+
df[[color.by]] <- cnts[color.by, ]
86+
}
87+
88+
return(df)
6389
}
6490

65-
.prepData <- function(input.data, assay, gene.set, group.by, split.by, facet.by) {
91+
92+
.prepData <- function(input.data, assay, gene.set, group.by, split.by, facet.by, color.by) {
6693
if (.is_seurat_or_sce(input.data)) {
67-
df <- .makeDFfromSCO(input.data, assay, gene.set, group.by, split.by, facet.by)
94+
df <- .makeDFfromSCO(input.data, assay, gene.set, group.by, split.by, facet.by, color.by)
95+
96+
if (identical(gene.set, "all")) {
97+
meta_cols <- c(group.by, split.by, facet.by)
98+
# Do not remove color.by if it's also a feature
99+
non_gene_color <- if (!is.null(color.by) && color.by %in% colnames(df) && !(color.by %in% gene.set)) color.by else NULL
100+
gene.set <- setdiff(colnames(df), c(meta_cols, non_gene_color))
101+
}
102+
103+
} else {
104+
all.cols <- unique(c(gene.set, group.by, split.by, facet.by, color.by))
105+
missing.cols <- setdiff(all.cols, colnames(input.data))
106+
if (length(missing.cols) > 0) {
107+
stop("The following columns are missing in the input data: ", paste(missing.cols, collapse = ", "))
108+
}
109+
68110
if (identical(gene.set, "all")) {
69-
gene.set <- setdiff(colnames(df), c(group.by, split.by, facet.by))
111+
gene.set <- setdiff(colnames(input.data), c(group.by, split.by, facet.by, color.by))
70112
}
71-
} else { # assume plain data.frame / matrix
72-
if (identical(gene.set, "all"))
73-
gene.set <- setdiff(colnames(input.data), c(group.by, split.by, facet.by))
74-
df <- input.data[, c(gene.set, group.by, split.by, facet.by), drop = FALSE]
113+
114+
df <- input.data[, unique(c(gene.set, group.by, split.by, facet.by, color.by)), drop = FALSE]
75115
}
76-
colnames(df) <- c(gene.set, group.by, split.by, facet.by)
77-
df
116+
117+
return(df)
78118
}
79119

80120
# -----------------------------------------------------------------------------
@@ -443,4 +483,24 @@ utils::globalVariables(c(
443483
"gene.set.query", "index"
444484
))
445485

486+
# helper to match summary function
487+
.match_summary_fun <- function(fun) {
488+
if (is.function(fun)) return(fun)
489+
if (!is.character(fun) || length(fun) != 1)
490+
stop("'summary.stat' must be a single character keyword or a function")
491+
kw <- tolower(fun)
492+
fn <- switch(kw,
493+
mean = base::mean,
494+
median = stats::median,
495+
sum = base::sum,
496+
sd = stats::sd,
497+
max = base::max,
498+
min = base::min,
499+
geometric = function(x) exp(mean(log(x + 1e-6))),
500+
stop("Unsupported summary keyword: ", fun))
501+
502+
# Attach keyword as attribute
503+
attr(fn, "keyword") <- kw
504+
fn
505+
}
446506

man/geyserEnrichment.Rd

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)