Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@

* `workflows/differential_expression/pseudobulk_deseq2`: Workflow for generating pseudobulk samples from single-cell data followed by DESeq2 differential expression analysis (PR #1044)

## BUG FIX

* `differential_expression/create_pseudobulks`: Fixed the check to verify that the raw counts layer was passed (PR #1072).

# openpipelines 3.0.0

## BREAKING CHANGES
Expand Down
11 changes: 10 additions & 1 deletion src/differential_expression/create_pseudobulk/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,21 @@


def is_normalized(layer):
exp_layer = np.expm1(layer) # Inverse of log1p

if sp.issparse(layer):
row_sums = np.array(layer.sum(axis=1)).flatten()
exp_row_sums = np.array(exp_layer.sum(axis=1)).flatten()
else:
row_sums = layer.sum(axis=1)
exp_row_sums = exp_layer.sum(axis=1)

is_normalized = np.allclose(row_sums, 1)
is_log1p_normalized = np.isfinite(exp_row_sums).all() and np.allclose(
exp_row_sums, exp_row_sums[0]
)

return np.allclose(row_sums, 1)
return is_normalized or is_log1p_normalized


def count_obs(adata, pb_adata, obs_cols):
Expand Down
28 changes: 28 additions & 0 deletions src/differential_expression/create_pseudobulk/test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import os
import mudata as mu
import sys
import re
import subprocess
import pytest

## VIASH START
Expand Down Expand Up @@ -41,6 +43,32 @@ def test_simple_execution(run_component, random_h5mu_path):
assert adata.shape[0] == 8, "Expected a total of 8 pseudobulk samples in the output"


def test_log_normalized_counts(run_component, random_h5mu_path):
output_path = random_h5mu_path()
with pytest.raises(subprocess.CalledProcessError) as err:
run_component(
[
"--input",
input_path,
"--output",
output_path,
"--input_layer",
"log_normalized",
"--obs_label",
"cell_type",
"--obs_groups",
"treatment",
"--output_compression",
"gzip",
]
)

assert re.search(
r"ValueError: Input layer must contain raw counts.",
err.value.stdout.decode("utf-8"),
)


def test_multiple_factors(run_component, random_h5mu_path):
output_path = random_h5mu_path()

Expand Down
15 changes: 0 additions & 15 deletions src/differential_expression/deseq2/script.R
Original file line number Diff line number Diff line change
Expand Up @@ -59,17 +59,6 @@ h5mu_to_h5ad <- function(h5mu_path, modality_name) {
tmp_path
}

# Check if expression data is normalized (row sums =~ 1)
is_normalized <- function(layer) {
row_sums <- if (is(layer, "sparseMatrix") || is(layer, "dgCMatrix")) {
Matrix::rowSums(layer)
} else {
rowSums(layer)
}

all(abs(row_sums - 1) < 1e-6, na.rm = TRUE)
}

# Extract design factors from formula
parse_design_formula <- function(design_formula) {
if (!grepl("^~\\s*\\w+(\\s*\\+\\s*\\w+)*$", design_formula)) {
Expand Down Expand Up @@ -293,10 +282,6 @@ main <- function() {
mod$X
}

if (is_normalized(layer)) {
stop("Input layer must contain raw counts.")
}

# Prepare analysis components
cat("Preparing design formula\n")
design_factors <- parse_design_formula(par$design_formula)
Expand Down
2 changes: 0 additions & 2 deletions src/differential_expression/deseq2/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,6 @@ def test_invalid_contrast_column(run_component, tmp_path, pseudobulk_test_data_p
]
)

# Updated regex to match actual R error format (no square brackets)
assert re.search(
r"Missing required columns in metadata: nonexistent_column",
err.value.stdout.decode("utf-8"),
Expand Down Expand Up @@ -296,7 +295,6 @@ def test_invalid_design_column(run_component, tmp_path, pseudobulk_test_data_pat
]
)

# Updated regex to match actual R error format (no square brackets)
assert re.search(
r"Invalid design formula: 'malformed formula'",
err.value.stdout.decode("utf-8"),
Expand Down
Loading