Skip to content

Commit

Permalink
Merge pull request #108 from hpages/master
Browse files Browse the repository at this point in the history
Repair and optimize PsiNorm()
  • Loading branch information
drisso authored Feb 19, 2025
2 parents 92a6cd8 + 39212eb commit 120e114
Show file tree
Hide file tree
Showing 5 changed files with 99 additions and 18 deletions.
6 changes: 4 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Package: scone
Version: 1.31.0
Version: 1.31.1
Title: Single Cell Overview of Normalized Expression data
Description: SCONE is an R package for comparing and ranking the performance of
different normalization schemes for single-cell RNA-seq and other
Expand Down Expand Up @@ -44,7 +44,8 @@ Imports:
MatrixGenerics,
SingleCellExperiment,
DelayedMatrixStats,
sparseMatrixStats
sparseMatrixStats,
SparseArray (>= 1.7.6)
Suggests:
BiocStyle,
DT,
Expand All @@ -59,6 +60,7 @@ Suggests:
scRNAseq,
shiny,
testthat,
DelayedArray,
visNetwork,
doParallel,
batchtools,
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,15 @@ import(SingleCellExperiment)
import(SummarizedExperiment)
import(gplots)
import(methods)
importClassesFrom(SparseArray,SparseMatrix)
importClassesFrom(SummarizedExperiment,SummarizedExperiment)
importFrom(DelayedMatrixStats,colMins)
importFrom(DelayedMatrixStats,colSums2)
importFrom(MatrixGenerics,colMins)
importFrom(MatrixGenerics,colSums2)
importFrom(RColorBrewer,brewer.pal)
importFrom(RUVSeq,RUVg)
importFrom(SparseArray,nzwhich)
importFrom(aroma.light,normalizeQuantileRank.matrix)
importFrom(boot,inv.logit)
importFrom(boot,logit)
Expand Down
5 changes: 3 additions & 2 deletions R/SCONE_DEFAULTS.R
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,8 @@ SCRAN_FN = function(ei){
#' eo <- PSINORM_FN(ei)
#'
PSINORM_FN = function(ei){
inv_sf <- pareto.MLE(ei+1)
eo = t(t(ei) / mean(inv_sf) * inv_sf)
tei <- t(ei)
sf <- compute_transposed_PsiNorm_sf(tei)
eo <- t(mat_v_div(tei, mean(1 / sf) * sf))
return(eo)
}
75 changes: 66 additions & 9 deletions R/pareto_norm.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,22 +54,79 @@ setMethod(
#' @export
#' @importFrom DelayedMatrixStats colSums2 colMins
#' @importFrom sparseMatrixStats colSums2 colMins
#' @importClassesFrom SparseArray SparseMatrix
#' @importFrom SparseArray nzwhich
setMethod(
f = "PsiNorm",
signature = signature(x = "ANY"),
definition = function(x){
inv_sf <- pareto.MLE(x+1)
t(t(x) * inv_sf)
## Uses only two transpostions (instead of four in original implementation).
## Preserves sparsity.
tx <- t(x)
sf <- compute_transposed_PsiNorm_sf(tx)
t(mat_v_div(tx, sf))
})

# can we simplify the function and assume min is always 1?
pareto.MLE <- function(sce) {
n <- nrow(sce)
m <- MatrixGenerics::colMins(sce)
a <- n/MatrixGenerics::colSums2(t(t(log(sce)) - log(m)))
return(a)
## Preserves sparsity.
compute_transposed_PsiNorm_sf <- function(tx) {
## Temporary workaround until operations from the Math group (e.g. log1p)
## are supported on a SparseMatrix object of type "integer".
## See https://github.com/Bioconductor/SparseArray/blob/devel/TODO
if (type(tx) != "double")
type(tx) <- "double"
x2 <- log1p(tx)
m2 <- MatrixGenerics::rowMins(x2)
x2 <- mat_v_sub(x2, m2)
MatrixGenerics::rowSums2(x2) / ncol(tx)
}

## Preserves sparsity.
computePsiNormSF <- function(x) {
1/pareto.MLE(x+1)
compute_transposed_PsiNorm_sf(t(x))
}

.build_linear_index <- function(nr, nc, i) {
stopifnot(is.integer(nr), length(nr) == 1L,
is.integer(nc), length(nc) == 1L, is.integer(i))
rep.int(nr * (seq_len(nc) - 1L), rep.int(length(i), nc)) + i
}

## Implements optimized substraction between a matrix-like object 'mat' and
## an ordinary vector 'v' with mostly zeros that accepts a SparseMatrix
## object (in which case it also returns a SparseMatrix object). Note that
## the SparseArray package does not support this operation out-of-the-box at
## the moment.
mat_v_sub <- function(mat, v) {
mat_dim <- dim(mat)
stopifnot(length(mat_dim) == 2L, is.vector(v), length(v) == mat_dim[[1L]])
nzidx <- nzwhich(v) # indices of nonzero values in 'v'
if (length(nzidx) == 0L)
return(mat) # no-op
if (!is(mat, "SparseMatrix"))
return(mat - v)
## Works well if 'nzidx' is short (i.e. 'v' contains mostly zeros).
Lidx <- .build_linear_index(mat_dim[[1L]], mat_dim[[2L]], nzidx)
mat[Lidx] <- mat[Lidx] - v[nzidx]
mat
}

## Implements division between a matrix-like object 'mat' and an ordinary
## vector 'v' with mostly nonzero/non-NA values that accepts a SparseMatrix
## object (in which case it also returns a SparseMatrix object). Note that
## the SparseArray package only supports this operation when 'v' contains
## no zeros or NAs at the moment.
mat_v_div <- function(mat, v) {
mat_dim <- dim(mat)
stopifnot(length(mat_dim) == 2L, is.vector(v), length(v) == mat_dim[[1L]])
idx <- which(is.na(v) | v == 0)
if (length(idx) == 0L || !is(mat, "SparseMatrix"))
return(mat / v)
## Works well if 'idx' is short (i.e. if 'v' contains very few zeros/NAs).
v2 <- v
v2[idx] <- 1L
mat <- mat / v2
Lidx <- .build_linear_index(mat_dim[[1L]], mat_dim[[2L]], idx)
mat[Lidx] <- mat[Lidx] / v[idx]
mat
}

29 changes: 24 additions & 5 deletions tests/testthat/test-pareto_norm.R
Original file line number Diff line number Diff line change
@@ -1,14 +1,33 @@
test_that("PSiNorm works with all input classes", {
m <- matrix(c(1,0,2,0,2,9,3,0), ncol=2)

m <- matrix(0L, nrow=4, ncol=5)
m[c(1,3) , 1] <- 1:2
m[1:3, 3] <- c(2L, 9L, 3L)
m[ , 4] <- 2:1
m[2, 5] <- 1L

out1 <- PsiNorm(m)


svt <- as(m, "SVT_SparseMatrix")
out_svt <- PsiNorm(svt)
expect_true(is(out_svt, "SVT_SparseMatrix"))
expect_equal(out1, as.matrix(out_svt))

dm1 <- DelayedArray::DelayedArray(m)
out_dm1 <- PsiNorm(dm1)
expect_true(is(out_dm1, "DelayedMatrix"))
expect_equal(out1, as.matrix(out_dm1))

dm2 <- DelayedArray::DelayedArray(svt)
out_dm2 <- PsiNorm(dm2)
expect_true(is(out_dm2, "DelayedMatrix"))
expect_equal(out1, as.matrix(out_dm2))

se <- SummarizedExperiment(m)
sce <- SingleCellExperiment(m)

out_se <- PsiNorm(se)
expect_equal(out1, assay(out_se, "PsiNorm"))

out_sce <- PsiNorm(sce, whichAssay = 1)
out2 <- t(t(m)/sizeFactors(out_sce))
expect_equal(out1, out2)
Expand Down

0 comments on commit 120e114

Please sign in to comment.