From 42931b9f011c5ed8519848a85f414124aae57657 Mon Sep 17 00:00:00 2001 From: dorien-er Date: Mon, 8 Sep 2025 18:15:55 +0200 Subject: [PATCH 1/3] Fix normalization check - ignore linter warnings --- .../create_pseudobulk/script.py | 13 +++++++-- .../create_pseudobulk/test.py | 28 +++++++++++++++++++ src/differential_expression/deseq2/script.R | 15 ---------- src/differential_expression/deseq2/test.py | 3 -- 4 files changed, 39 insertions(+), 20 deletions(-) diff --git a/src/differential_expression/create_pseudobulk/script.py b/src/differential_expression/create_pseudobulk/script.py index 257893c6e69..cbcfc22b70f 100644 --- a/src/differential_expression/create_pseudobulk/script.py +++ b/src/differential_expression/create_pseudobulk/script.py @@ -9,7 +9,7 @@ par = { "input": "resources_test/annotation_test_data/TS_Blood_filtered.h5mu", "modality": "rna", - "input_layer": None, + "input_layer": "log_normalized", "obs_label": "cell_type", "obs_groups": ["treatment", "donor_id", "disease"], "obs_cell_count": "n_cells", @@ -27,12 +27,21 @@ def is_normalized(layer): + exp_layer = np.expm1(layer) # Inverse of log1p + if sp.issparse(layer): row_sums = np.array(layer.sum(axis=1)).flatten() + exp_row_sums = np.array(exp_layer.sum(axis=1)).flatten() else: row_sums = layer.sum(axis=1) + exp_row_sums = exp_layer.sum(axis=1) + + is_normalized = np.allclose(row_sums, 1) + is_log1p_normalized = np.isfinite(exp_row_sums).all() and np.allclose( + exp_row_sums, exp_row_sums[0] + ) - return np.allclose(row_sums, 1) + return is_normalized or is_log1p_normalized def count_obs(adata, pb_adata, obs_cols): diff --git a/src/differential_expression/create_pseudobulk/test.py b/src/differential_expression/create_pseudobulk/test.py index 859be8874b1..5db7f821990 100644 --- a/src/differential_expression/create_pseudobulk/test.py +++ b/src/differential_expression/create_pseudobulk/test.py @@ -1,6 +1,8 @@ import os import mudata as mu import sys +import re +import subprocess import pytest ## VIASH START @@ -41,6 +43,32 @@ def test_simple_execution(run_component, random_h5mu_path): assert adata.shape[0] == 8, "Expected a total of 8 pseudobulk samples in the output" +def test_log_normalized_counts(run_component, random_h5mu_path): + output_path = random_h5mu_path() + with pytest.raises(subprocess.CalledProcessError) as err: + run_component( + [ + "--input", + input_path, + "--output", + output_path, + "--input_layer", + "log_normalized", + "--obs_label", + "cell_type", + "--obs_groups", + "treatment", + "--output_compression", + "gzip", + ] + ) + + assert re.search( + r"ValueError: Input layer must contain raw counts.", + err.value.stdout.decode("utf-8"), + ) + + def test_multiple_factors(run_component, random_h5mu_path): output_path = random_h5mu_path() diff --git a/src/differential_expression/deseq2/script.R b/src/differential_expression/deseq2/script.R index a6bde13b473..b6d30cfb55e 100644 --- a/src/differential_expression/deseq2/script.R +++ b/src/differential_expression/deseq2/script.R @@ -59,17 +59,6 @@ h5mu_to_h5ad <- function(h5mu_path, modality_name) { tmp_path } -# Check if expression data is normalized (row sums =~ 1) -is_normalized <- function(layer) { - row_sums <- if (is(layer, "sparseMatrix") || is(layer, "dgCMatrix")) { - Matrix::rowSums(layer) - } else { - rowSums(layer) - } - - all(abs(row_sums - 1) < 1e-6, na.rm = TRUE) -} - # Extract design factors from formula parse_design_formula <- function(design_formula) { if (!grepl("^~\\s*\\w+(\\s*\\+\\s*\\w+)*$", design_formula)) { @@ -293,10 +282,6 @@ main <- function() { mod$X } - if (is_normalized(layer)) { - stop("Input layer must contain raw counts.") - } - # Prepare analysis components cat("Preparing design formula\n") design_factors <- parse_design_formula(par$design_formula) diff --git a/src/differential_expression/deseq2/test.py b/src/differential_expression/deseq2/test.py index 63a05f0f4e6..fe53dcc3564 100644 --- a/src/differential_expression/deseq2/test.py +++ b/src/differential_expression/deseq2/test.py @@ -4,7 +4,6 @@ import pandas as pd import re - ## VIASH START meta = {"resources_dir": "resources_test/"} ## VIASH END @@ -267,7 +266,6 @@ def test_invalid_contrast_column(run_component, tmp_path, pseudobulk_test_data_p ] ) - # Updated regex to match actual R error format (no square brackets) assert re.search( r"Missing required columns in metadata: nonexistent_column", err.value.stdout.decode("utf-8"), @@ -296,7 +294,6 @@ def test_invalid_design_column(run_component, tmp_path, pseudobulk_test_data_pat ] ) - # Updated regex to match actual R error format (no square brackets) assert re.search( r"Invalid design formula: 'malformed formula'", err.value.stdout.decode("utf-8"), From a598ee03db637d8152dbedccd8980d86de69615b Mon Sep 17 00:00:00 2001 From: dorien-er Date: Mon, 8 Sep 2025 18:47:00 +0200 Subject: [PATCH 2/3] update changelog --- CHANGELOG.md | 4 +++ .../from_h5mu_or_h5ad_to_seurat/aim_amp.yaml | 31 +++++++++++++++++++ .../create_pseudobulk/script.py | 2 +- src/differential_expression/deseq2/test.py | 1 + 4 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 src/convert/from_h5mu_or_h5ad_to_seurat/aim_amp.yaml diff --git a/CHANGELOG.md b/CHANGELOG.md index 8367a8aff6e..b848fba1cdb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,10 @@ * `workflows/differential_expression/pseudobulk_deseq2`: Workflow for generating pseudobulk samples from single-cell data followed by DESeq2 differential expression analysis (PR #1044) +## BUG FIX + +* `differential_expression/create_pseudobulks`: Fixed the check to verify that the raw counts layer was passed (PR #1072). + # openpipelines 3.0.0 ## BREAKING CHANGES diff --git a/src/convert/from_h5mu_or_h5ad_to_seurat/aim_amp.yaml b/src/convert/from_h5mu_or_h5ad_to_seurat/aim_amp.yaml new file mode 100644 index 00000000000..1885742b8d3 --- /dev/null +++ b/src/convert/from_h5mu_or_h5ad_to_seurat/aim_amp.yaml @@ -0,0 +1,31 @@ +- id: Xenium_RA-SYN +s3://itx-acd-xfer-internal/mchambe3/AMP_AIM/Xenium/Xenium_RA-SYN/Level_1_Xenium Output/output-XETG00150__0027107__slidearray0097__20241023__195946.zip +s3://itx-acd-xfer-internal/mchambe3/AMP_AIM/Xenium/Xenium_RA-SYN/Level_1_Xenium Output/output-XETG00150__0030961__slidearray0093__20241011__193120.zip +s3://itx-acd-xfer-internal/mchambe3/AMP_AIM/Xenium/Xenium_RA-SYN/Level_1_Xenium Output/output-XETG00150__0031015__slidearray0085__20241023__195946.zip +s3://itx-acd-xfer-internal/mchambe3/AMP_AIM/Xenium/Xenium_RA-SYN/Level_1_Xenium Output/output-XETG00150__0031100__slidearray0095__20241011__193120.zip +s3://itx-acd-xfer-internal/mchambe3/AMP_AIM/Xenium/Xenium_RA-SYN/Level_1_Xenium Output/output-XETG00392__0027093__slidearray0099__20241113__210855.zip +s3://itx-acd-xfer-internal/mchambe3/AMP_AIM/Xenium/Xenium_RA-SYN/Level_1_Xenium Output/output-XETG00392__0027248__slidearray0091__20241011__193809.zip +s3://itx-acd-xfer-internal/mchambe3/AMP_AIM/Xenium/Xenium_RA-SYN/Level_1_Xenium Output/output-XETG00392__0027249__slidearray0089__20241011__193808.zip +s3://itx-acd-xfer-internal/mchambe3/AMP_AIM/Xenium/Xenium_RA-SYN/Level_1_Xenium Output/output-XETG00392__0030554__slidearray0081__20241018__192235.zip +s3://itx-acd-xfer-internal/mchambe3/AMP_AIM/Xenium/Xenium_RA-SYN/Level_1_Xenium Output/output-XETG00392__0030701__slidearray0083__20241018__192235.zip + +- id: Xenium_SLE-KDY +s3://itx-del-data-pipelines/droosen1/AMP_AIM/Xenium/raw_output_bundles/output-XETG00056__0030582__slidearray0073__20240724__224332/output-XETG00056__0030582__slidearray0073__20240724__224332 +s3://itx-del-data-pipelines/droosen1/AMP_AIM/Xenium/raw_output_bundles/output-XETG00056__0030594__slidearray0077__20240812__180808/output-XETG00056__0030594__slidearray077__20240812__180808 +s3://itx-del-data-pipelines/droosen1/AMP_AIM/Xenium/raw_output_bundles/output-XETG00056__0030629__slidearray0079__20240812__180808/output-XETG00056__0030629__slidearray079__20240812__180808 +s3://itx-del-data-pipelines/droosen1/AMP_AIM/Xenium/raw_output_bundles/output-XETG00056__0030973__slidearray0075__20240724__224332/output-XETG00056__0030973__slidearray0075__20240724__224332 + +- id: Xenium_SLE-SKN +s3://itx-acd-xfer-internal/mchambe3/AMP_AIM/Xenium/Xenium_SLE-SKN/Level_1_Xenium Output/output-XETG00150__0043958__8073578235-01-02__20250130__211258.zip +s3://itx-acd-xfer-internal/mchambe3/AMP_AIM/Xenium/Xenium_SLE-SKN/Level_1_Xenium Output/output-XETG00150__0045328__8073579012-01-02__20250206__204720.zip +s3://itx-acd-xfer-internal/mchambe3/AMP_AIM/Xenium/Xenium_SLE-SKN/Level_1_Xenium Output/output-XETG00150__0045329__8073578531-01-02__20250206__204719.zip +s3://itx-acd-xfer-internal/mchambe3/AMP_AIM/Xenium/Xenium_SLE-SKN/Level_1_Xenium Output/output-XETG00150__0305066__8073578716-01-02__20250130__211258.zip +s3://itx-acd-xfer-internal/mchambe3/AMP_AIM/Xenium/Xenium_SLE-SKN/Level_1_Xenium Output/output-XETG00392__0030543__8073578310-01-02__20250206__204225.zip +s3://itx-acd-xfer-internal/mchambe3/AMP_AIM/Xenium/Xenium_SLE-SKN/Level_1_Xenium Output/output-XETG00392__0043961__8073578597-01-02__20250206__204225.zip + +- id: Xenium_SjD-SGL +s3://itx-del-data-pipelines/droosen1/AMP_AIM/Xenium/raw_output_bundles/output-XETG00382_0045595_8073578486-02-02_20250430_161333/output-XETG00382_0045595_Batch_3_8073578486_20250430_161333/ +s3://itx-del-data-pipelines/droosen1/AMP_AIM/Xenium/raw_output_bundles/output-XETG00382_0045628_8073578937-02-02_20250430_161333/output-XETG00382_0045628_Batch_4_8073578937_20250430_161333/ +s3://itx-del-data-pipelines/droosen1/AMP_AIM/Xenium/raw_output_bundles/output-XETG00382_0067418_8073578515-02-02_20250523_161643/output-XETG00382_0067418_Batch_1_8073578515_20250523_161643/ +s3://itx-del-data-pipelines/droosen1/AMP_AIM/Xenium/raw_output_bundles/output-XETG00382_0067432_8073578922-02-02_20250523_161643/output-XETG00382_0067432_Batch_2_8073578922_20250523_161643/ + \ No newline at end of file diff --git a/src/differential_expression/create_pseudobulk/script.py b/src/differential_expression/create_pseudobulk/script.py index cbcfc22b70f..b65b9b5bef9 100644 --- a/src/differential_expression/create_pseudobulk/script.py +++ b/src/differential_expression/create_pseudobulk/script.py @@ -9,7 +9,7 @@ par = { "input": "resources_test/annotation_test_data/TS_Blood_filtered.h5mu", "modality": "rna", - "input_layer": "log_normalized", + "input_layer": None, "obs_label": "cell_type", "obs_groups": ["treatment", "donor_id", "disease"], "obs_cell_count": "n_cells", diff --git a/src/differential_expression/deseq2/test.py b/src/differential_expression/deseq2/test.py index fe53dcc3564..54bab701b81 100644 --- a/src/differential_expression/deseq2/test.py +++ b/src/differential_expression/deseq2/test.py @@ -4,6 +4,7 @@ import pandas as pd import re + ## VIASH START meta = {"resources_dir": "resources_test/"} ## VIASH END From f607323b9b4f7af5d1fe17d85919e02cd93ac4fc Mon Sep 17 00:00:00 2001 From: dorien-er Date: Mon, 8 Sep 2025 18:48:37 +0200 Subject: [PATCH 3/3] cleanup --- .../from_h5mu_or_h5ad_to_seurat/aim_amp.yaml | 31 ------------------- 1 file changed, 31 deletions(-) delete mode 100644 src/convert/from_h5mu_or_h5ad_to_seurat/aim_amp.yaml diff --git a/src/convert/from_h5mu_or_h5ad_to_seurat/aim_amp.yaml b/src/convert/from_h5mu_or_h5ad_to_seurat/aim_amp.yaml deleted file mode 100644 index 1885742b8d3..00000000000 --- a/src/convert/from_h5mu_or_h5ad_to_seurat/aim_amp.yaml +++ /dev/null @@ -1,31 +0,0 @@ -- id: Xenium_RA-SYN -s3://itx-acd-xfer-internal/mchambe3/AMP_AIM/Xenium/Xenium_RA-SYN/Level_1_Xenium Output/output-XETG00150__0027107__slidearray0097__20241023__195946.zip -s3://itx-acd-xfer-internal/mchambe3/AMP_AIM/Xenium/Xenium_RA-SYN/Level_1_Xenium Output/output-XETG00150__0030961__slidearray0093__20241011__193120.zip -s3://itx-acd-xfer-internal/mchambe3/AMP_AIM/Xenium/Xenium_RA-SYN/Level_1_Xenium Output/output-XETG00150__0031015__slidearray0085__20241023__195946.zip -s3://itx-acd-xfer-internal/mchambe3/AMP_AIM/Xenium/Xenium_RA-SYN/Level_1_Xenium Output/output-XETG00150__0031100__slidearray0095__20241011__193120.zip -s3://itx-acd-xfer-internal/mchambe3/AMP_AIM/Xenium/Xenium_RA-SYN/Level_1_Xenium Output/output-XETG00392__0027093__slidearray0099__20241113__210855.zip -s3://itx-acd-xfer-internal/mchambe3/AMP_AIM/Xenium/Xenium_RA-SYN/Level_1_Xenium Output/output-XETG00392__0027248__slidearray0091__20241011__193809.zip -s3://itx-acd-xfer-internal/mchambe3/AMP_AIM/Xenium/Xenium_RA-SYN/Level_1_Xenium Output/output-XETG00392__0027249__slidearray0089__20241011__193808.zip -s3://itx-acd-xfer-internal/mchambe3/AMP_AIM/Xenium/Xenium_RA-SYN/Level_1_Xenium Output/output-XETG00392__0030554__slidearray0081__20241018__192235.zip -s3://itx-acd-xfer-internal/mchambe3/AMP_AIM/Xenium/Xenium_RA-SYN/Level_1_Xenium Output/output-XETG00392__0030701__slidearray0083__20241018__192235.zip - -- id: Xenium_SLE-KDY -s3://itx-del-data-pipelines/droosen1/AMP_AIM/Xenium/raw_output_bundles/output-XETG00056__0030582__slidearray0073__20240724__224332/output-XETG00056__0030582__slidearray0073__20240724__224332 -s3://itx-del-data-pipelines/droosen1/AMP_AIM/Xenium/raw_output_bundles/output-XETG00056__0030594__slidearray0077__20240812__180808/output-XETG00056__0030594__slidearray077__20240812__180808 -s3://itx-del-data-pipelines/droosen1/AMP_AIM/Xenium/raw_output_bundles/output-XETG00056__0030629__slidearray0079__20240812__180808/output-XETG00056__0030629__slidearray079__20240812__180808 -s3://itx-del-data-pipelines/droosen1/AMP_AIM/Xenium/raw_output_bundles/output-XETG00056__0030973__slidearray0075__20240724__224332/output-XETG00056__0030973__slidearray0075__20240724__224332 - -- id: Xenium_SLE-SKN -s3://itx-acd-xfer-internal/mchambe3/AMP_AIM/Xenium/Xenium_SLE-SKN/Level_1_Xenium Output/output-XETG00150__0043958__8073578235-01-02__20250130__211258.zip -s3://itx-acd-xfer-internal/mchambe3/AMP_AIM/Xenium/Xenium_SLE-SKN/Level_1_Xenium Output/output-XETG00150__0045328__8073579012-01-02__20250206__204720.zip -s3://itx-acd-xfer-internal/mchambe3/AMP_AIM/Xenium/Xenium_SLE-SKN/Level_1_Xenium Output/output-XETG00150__0045329__8073578531-01-02__20250206__204719.zip -s3://itx-acd-xfer-internal/mchambe3/AMP_AIM/Xenium/Xenium_SLE-SKN/Level_1_Xenium Output/output-XETG00150__0305066__8073578716-01-02__20250130__211258.zip -s3://itx-acd-xfer-internal/mchambe3/AMP_AIM/Xenium/Xenium_SLE-SKN/Level_1_Xenium Output/output-XETG00392__0030543__8073578310-01-02__20250206__204225.zip -s3://itx-acd-xfer-internal/mchambe3/AMP_AIM/Xenium/Xenium_SLE-SKN/Level_1_Xenium Output/output-XETG00392__0043961__8073578597-01-02__20250206__204225.zip - -- id: Xenium_SjD-SGL -s3://itx-del-data-pipelines/droosen1/AMP_AIM/Xenium/raw_output_bundles/output-XETG00382_0045595_8073578486-02-02_20250430_161333/output-XETG00382_0045595_Batch_3_8073578486_20250430_161333/ -s3://itx-del-data-pipelines/droosen1/AMP_AIM/Xenium/raw_output_bundles/output-XETG00382_0045628_8073578937-02-02_20250430_161333/output-XETG00382_0045628_Batch_4_8073578937_20250430_161333/ -s3://itx-del-data-pipelines/droosen1/AMP_AIM/Xenium/raw_output_bundles/output-XETG00382_0067418_8073578515-02-02_20250523_161643/output-XETG00382_0067418_Batch_1_8073578515_20250523_161643/ -s3://itx-del-data-pipelines/droosen1/AMP_AIM/Xenium/raw_output_bundles/output-XETG00382_0067432_8073578922-02-02_20250523_161643/output-XETG00382_0067432_Batch_2_8073578922_20250523_161643/ - \ No newline at end of file