Skip to content

Commit 5cb4eea

Browse files
Merge pull request #141 from ncborcherding/dev
v2.2.3
2 parents c8079f1 + 6530e5c commit 5cb4eea

5 files changed

Lines changed: 50 additions & 48 deletions

File tree

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: escape
22
Title: Easy single cell analysis platform for enrichment
3-
Version: 2.2.2
3+
Version: 2.2.3
44
Authors@R: c(
55
person(given = "Nick", family = "Borcherding", role = c("aut", "cre"), email = "ncborch@gmail.com"),
66
person(given = "Jared", family = "Andrews", role = c("aut"), email = "jared.andrews07@gmail.com"),

NEWS.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
# escape VERSION 2.2.3
2+
3+
## UNDERLYING CHANGES
4+
5+
* fixed handling of *groups* parameter and data splitting in ```escape.matrix()```
6+
* improved efficiency of internal ```.split_data.matrix()```
7+
18
# escape VERSION 2.2.2
29

310
## UNDERLYING CHANGES

R/runEscape.R

Lines changed: 34 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -70,39 +70,42 @@ escape.matrix <- function(input.data,
7070
length(splits), 'times.'))
7171
split.data <- .split_data.matrix(matrix=cnts, chunk.size=groups)
7272

73+
all_gene_sets <- names(egc) # Collect all gene set names
7374

7475
for (i in seq_along(splits)) {
75-
last <- min(ncol(cnts), i+groups-1)
76-
if(method == "GSVA") {
77-
parameters <- .gsva.setup(split.data[[i]], egc)
78-
} else if (method == "ssGSEA") {
79-
parameters <- .ssGSEA.setup(split.data[[i]], egc)
80-
}
81-
if(method %in% c("ssGSEA", "GSVA")) {
82-
a <- suppressWarnings(gsva(param = parameters,
83-
verbose = FALSE,
84-
BPPARAM = BPPARAM,
85-
...))
86-
} else if(method == "UCell") {
87-
a <- t(suppressWarnings(
88-
ScoreSignatures_UCell(matrix = split.data[[i]],
89-
features=egc,
90-
name = NULL,
91-
BPPARAM = BPPARAM,
92-
...)))
93-
} else if (method == "AUCell") {
94-
rankings <- AUCell_buildRankings(split.data[[i]],
95-
plotStats = FALSE,
96-
verbose = FALSE)
97-
a <- assay(AUCell_calcAUC(geneSets = egc,
98-
rankings,
99-
normAUC = TRUE,
100-
aucMaxRank = ceiling(0.2 * nrow(split.data[[i]])),
101-
verbose = FALSE,
102-
...))
103-
104-
}
105-
scores[[i]] <- a
76+
if (method == "GSVA") {
77+
parameters <- .gsva.setup(split.data[[i]], egc)
78+
} else if (method == "ssGSEA") {
79+
parameters <- .ssGSEA.setup(split.data[[i]], egc)
80+
}
81+
if (method %in% c("ssGSEA", "GSVA")) {
82+
a <- suppressWarnings(gsva(param = parameters,
83+
verbose = FALSE,
84+
BPPARAM = BPPARAM,
85+
...))
86+
} else if (method == "UCell") {
87+
a <- t(suppressWarnings(
88+
ScoreSignatures_UCell(matrix = split.data[[i]],
89+
features = egc,
90+
name = NULL,
91+
BPPARAM = BPPARAM,
92+
...)))
93+
} else if (method == "AUCell") {
94+
rankings <- AUCell_buildRankings(split.data[[i]],
95+
plotStats = FALSE,
96+
verbose = FALSE)
97+
a <- assay(AUCell_calcAUC(geneSets = egc,
98+
rankings,
99+
normAUC = TRUE,
100+
aucMaxRank = ceiling(0.2 * nrow(split.data[[i]])),
101+
verbose = FALSE,
102+
...))
103+
}
104+
105+
# Ensure consistent row names (all_gene_sets) across splits
106+
a <- as.data.frame(a)
107+
a <- a[match(all_gene_sets, rownames(a), nomatch = NA), , drop = FALSE]
108+
scores[[i]] <- a
106109
}
107110
scores <- do.call(cbind, scores)
108111
output <- t(as.matrix(scores))

R/utils.R

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -133,22 +133,16 @@ is_seurat_or_se_object <- function(obj) {
133133
}
134134

135135
#split data matrix into cell chunks
136-
#stole this from https://github.com/carmonalab/UCell
137-
.split_data.matrix <- function(matrix, chunk.size=1000) {
136+
#modified this from https://github.com/carmonalab/UCell
137+
.split_data.matrix <- function(matrix, chunk.size = 1000) {
138138
ncols <- dim(matrix)[2]
139-
nchunks <- (ncols-1) %/% chunk.size + 1
139+
nchunks <- ceiling(ncols / chunk.size) # Total number of chunks
140140

141-
split.data <- list()
142-
min <- 1
141+
split.data <- vector("list", nchunks) # Preallocate list for efficiency
143142
for (i in seq_len(nchunks)) {
144-
if (i == nchunks-1) { #make last two chunks of equal size
145-
left <- ncols-(i-1)*chunk.size
146-
max <- min+round(left/2)-1
147-
} else {
148-
max <- min(i*chunk.size, ncols)
149-
}
150-
split.data[[i]] <- matrix[,min:max]
151-
min <- max+1 #for next chunk
143+
min <- (i - 1) * chunk.size + 1
144+
max <- min(i * chunk.size, ncols)
145+
split.data[[i]] <- matrix[, min:max, drop = FALSE] # Ensure consistent structure
152146
}
153147
return(split.data)
154148
}

vignettes/vignette.Rmd

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,7 @@ ggplot(data = as.data.frame(enrichment.scores),
162162
theme_classic() +
163163
theme(axis.title = element_blank())
164164
```
165+
165166
Multi-core support is for all methods is available through [BiocParallel](https://bioconductor.org/packages/release/bioc/html/BiocParallel.html). To add more cores, use the argument **BPPARAM** to ```escape.matrix()```. Here we will use the ```SnowParam()``` for it's support across platforms and explicitly call 2 workers (or cores).
166167

167168
```{r tidy=FALSE, eval=FALSE}
@@ -176,7 +177,6 @@ enrichment.scores <- escape.matrix(pbmc_small,
176177

177178
Alternatively, we can use ```runEscape()``` to calculate the enrichment score and directly attach the output to a single-cell object. The additional parameter for ```runEscape` is **new.assay.name**, in order to save the enrichment scores as a custom assay in the single-cell object.
178179

179-
180180
```{r tidy = FALSE}
181181
pbmc_small <- runEscape(pbmc_small,
182182
method = "ssGSEA",
@@ -209,8 +209,6 @@ Although we glossed over the normalization that can be used in ```escape.matrix(
209209

210210
There can be inherent bias in enrichment values due to drop out in single-cell expression data. Cells with larger numbers of features and counts will likely have higher enrichment values. ```performNormalization()``` will normalize the enrichment values by calculating the number of genes expressed in each gene set and cell. This is similar to the normalization in classic GSEA and it will be stored in a new assay.
211211

212-
213-
214212
```{r}
215213
pbmc_small <- performNormalization(sc.data = pbmc_small,
216214
assay = "escape.ssGSEA",

0 commit comments

Comments
 (0)