From f7e1ff6bd6c59cfab1a3edd57f3577717b9ea3eb Mon Sep 17 00:00:00 2001
From: jakubmajercik <jakub.majercik@gmail.com>
Date: Mon, 10 Mar 2025 10:15:43 +0100
Subject: [PATCH 01/19] add params, update test data

---
 resources_test_scripts/qc_sample_data.sh      | 50 +++++++++++++++++--
 .../generate_report/config.vsh.yaml           | 14 ++++++
 src/ingestion_qc/generate_report/main.nf      | 21 ++++++--
 src/ingestion_qc/generate_report/test.sh      |  6 +--
 .../h5mu_to_qc_json/config.vsh.yaml           |  4 +-
 src/ingestion_qc/h5mu_to_qc_json/test.py      |  4 +-
 6 files changed, 84 insertions(+), 15 deletions(-)

diff --git a/resources_test_scripts/qc_sample_data.sh b/resources_test_scripts/qc_sample_data.sh
index d88c436..993a6e1 100755
--- a/resources_test_scripts/qc_sample_data.sh
+++ b/resources_test_scripts/qc_sample_data.sh
@@ -5,7 +5,7 @@ OUT_DIR=resources_test/qc_sample_data
 [ ! -d "$OUT_DIR" ] && mkdir -p "$OUT_DIR"
 
 # fetch/create h5mu from somewhere
-cat > /tmp/params.yaml <<EOF
+cat > /tmp/params_create_h5mu.yaml <<EOF
 param_list:
   - id: sample_one
     input_id: sample_one
@@ -24,13 +24,55 @@ nextflow run openpipelines-bio/openpipeline \
   -r 2.0.0 \
   -main-script target/nextflow/metadata/add_id/main.nf \
   -profile docker \
-  -params-file /tmp/params.yaml \
+  -params-file /tmp/params_create_h5mu.yaml \
+  -resume
+
+cat > /tmp/params_subset.yaml <<EOF
+param_list:
+  - id: sample_one
+    input: resources_test/qc_sample_data/sample_one.qc.h5mu
+  - id: sample_two
+    input: resources_test/qc_sample_data/sample_two.qc.h5mu
+output: '\$id.qc.h5mu'
+number_of_observations: 10000
+output_compression: gzip
+publish_dir: "$OUT_DIR"
+EOF
+
+# subset h5mus
+nextflow run openpipelines-bio/openpipeline \
+  -latest \
+  -r 2.0.0 \
+  -main-script target/nextflow/filter/subset_h5mu/main.nf \
+  -profile docker \
+  -params-file /tmp/params_subset.yaml \
+  -resume
+
+# generate cellbender out for testing
+cat > /tmp/params_cellbender.yaml <<EOF
+param_list:
+  - id: sample_one
+    input: resources_test/qc_sample_data/sample_one.qc.h5mu
+  - id: sample_two
+    input: resources_test/qc_sample_data/sample_two.qc.h5mu
+output: '\$id.qc.cellbender.h5mu'
+epochs: 5
+output_compression: gzip
+publish_dir: "$OUT_DIR"
+EOF
+
+nextflow run openpipelines-bio/openpipeline \
+  -latest \
+  -r 2.0.0 \
+  -main-script target/nextflow/correction/cellbender_remove_background/main.nf \
+  -profile docker \
+  -params-file /tmp/params_cellbender.yaml \
   -resume
 
 # generate json for testing
 viash run src/ingestion_qc/h5mu_to_qc_json/config.vsh.yaml --engine docker -- \
-  --input "$OUT_DIR"//sample_one.qc.h5mu \
-  --input "$OUT_DIR"/sample_two.qc.h5mu \
+  --input "$OUT_DIR"/sample_one.qc.cellbender.h5mu \
+  --input "$OUT_DIR"/sample_two.qc.cellbender.h5mu \
   --output "$OUT_DIR"/dataset.json
 
 # copy to s3
diff --git a/src/ingestion_qc/generate_report/config.vsh.yaml b/src/ingestion_qc/generate_report/config.vsh.yaml
index 23db8a6..57878ae 100644
--- a/src/ingestion_qc/generate_report/config.vsh.yaml
+++ b/src/ingestion_qc/generate_report/config.vsh.yaml
@@ -59,6 +59,17 @@ argument_groups:
         default: "ribosomal"
         description: |
           In which .var slot to store a boolean array corresponding the ribosomal genes.
+      - mame: "--obs_cell_probability"
+        type: string
+        required: false
+        default: "cellbender_cell_probability"
+        description: |
+          In which .obs slot to store the cell probability.
+      - name: "--cellbender_epochs"
+        type: integer
+        required: false
+        description: Number of epochs to train cellbender. 
+        default: 150
 
   - name: Outputs
     arguments:
@@ -78,6 +89,9 @@ dependencies:
   - name: workflows/qc/qc
     alias: qc_wf
     repository: openpipeline
+  - name: correction/cellbender_remove_background
+    alias: cellbender
+    repository: openpipeline
   - name: ingestion_qc/h5mu_to_qc_json
   - name: ingestion_qc/generate_html
 runners:
diff --git a/src/ingestion_qc/generate_report/main.nf b/src/ingestion_qc/generate_report/main.nf
index e8c7964..f6260c8 100644
--- a/src/ingestion_qc/generate_report/main.nf
+++ b/src/ingestion_qc/generate_report/main.nf
@@ -8,14 +8,25 @@ workflow run_wf {
       [id, state + [_meta: [join_id: id]]]
     }
 
+    // run cellbender
+    | cellbender.run(
+      fromState: [
+        id: "id",
+        input: "input",
+        obs_cell_probability: "obs_cell_probability",
+        epochs: "cellbender_epochs",
+      ],
+      toState: ["output"]
+    )
+
     // run qc on each sample
     | qc_wf.run(
       fromState: [
-        "id",
-        "input",
-        "var_gene_names",
-        "var_name_mitochondrial_genes",
-        "var_name_ribosomal_genes"
+        id: "id",
+        input: "output",
+        var_gene_names: "var_gene_names",
+        var_name_mitochondrial_genes: "var_name_mitochondrial_genes",
+        var_name_ribosomal_genes: "var_name_ribosomal_genes"
       ],
       toState: ["output"]
     )
diff --git a/src/ingestion_qc/generate_report/test.sh b/src/ingestion_qc/generate_report/test.sh
index e0e81aa..f681159 100755
--- a/src/ingestion_qc/generate_report/test.sh
+++ b/src/ingestion_qc/generate_report/test.sh
@@ -4,17 +4,17 @@ viash ns build --setup cb --parallel
 
 cat > /tmp/params.yaml <<EOF
 param_list:
-  - input: resources_test/sample_data/sample_1.qc.output.h5mu
+  - input: resources_test/qc_sample_data/sample_one.qc.h5mu
     id: sample_one
-  - input: resources_test/sample_data/sample_2.qc.output.h5mu
+  - input: resources_test/qc_sample_data/sample_two.qc.h5mu
     id: sample_two
 output_qc_json: output_qc.json
 output_html: output_report.html
 EOF
 
+
 nextflow run . \
   -main-script target/nextflow/ingestion_qc/generate_report/main.nf \
   -params-file /tmp/params.yaml \
   -profile docker \
   --publish_dir test_results \
-  --resume
diff --git a/src/ingestion_qc/h5mu_to_qc_json/config.vsh.yaml b/src/ingestion_qc/h5mu_to_qc_json/config.vsh.yaml
index 5aa68fd..6e01e2e 100644
--- a/src/ingestion_qc/h5mu_to_qc_json/config.vsh.yaml
+++ b/src/ingestion_qc/h5mu_to_qc_json/config.vsh.yaml
@@ -50,7 +50,9 @@ argument_groups:
         type: string
         multiple: true
         description: The keys in the h5mu .obs to include in the output JSON
-        default: ["total_counts", "num_nonzero_vars", "fraction_mitochondrial", "fraction_ribosomal"]
+        default: ["total_counts", "num_nonzero_vars", "fraction_mitochondrial", "fraction_ribosomal",
+                  "cellbender_background_fraction", "cellbender_cell_probability", "cellbender_cell_size",
+                  "cellbender_droplet_efficiency"]
       - name: --cellranger_metrics_uns_key
         type: string
         description: The key in the h5mu file .uns that contains the cellranger metrics
diff --git a/src/ingestion_qc/h5mu_to_qc_json/test.py b/src/ingestion_qc/h5mu_to_qc_json/test.py
index f5bddd4..a655f74 100644
--- a/src/ingestion_qc/h5mu_to_qc_json/test.py
+++ b/src/ingestion_qc/h5mu_to_qc_json/test.py
@@ -16,8 +16,8 @@ def test_simple_execution(run_component, tmp_path):
     
     run_component(
         [
-            "--input", meta["resources_dir"] + "/resources_test/qc_sample_data/sample_one.qc.h5mu",
-            "--input", meta["resources_dir"] + "/resources_test/qc_sample_data/sample_two.qc.h5mu",
+            "--input", meta["resources_dir"] + "/resources_test/qc_sample_data/sample_one.qc.cellbender.h5mu",
+            "--input", meta["resources_dir"] + "/resources_test/qc_sample_data/sample_two.qc.cellbender.h5mu",
             "--output", output_json_path,
         ]
     )

From 2adbb699da6c70fcd02a68a888d204da14193fcc Mon Sep 17 00:00:00 2001
From: jakubmajercik <jakub.majercik@gmail.com>
Date: Mon, 10 Mar 2025 10:20:46 +0100
Subject: [PATCH 02/19] add executable runner

---
 src/ingestion_qc/generate_report/config.vsh.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/ingestion_qc/generate_report/config.vsh.yaml b/src/ingestion_qc/generate_report/config.vsh.yaml
index 57878ae..48bb970 100644
--- a/src/ingestion_qc/generate_report/config.vsh.yaml
+++ b/src/ingestion_qc/generate_report/config.vsh.yaml
@@ -95,4 +95,5 @@ dependencies:
   - name: ingestion_qc/h5mu_to_qc_json
   - name: ingestion_qc/generate_html
 runners:
+  - type: executable
   - type: nextflow

From dc9f4cee833cc7021059a84b4ebfd62c282f0329 Mon Sep 17 00:00:00 2001
From: jakubmajercik <jakub.majercik@gmail.com>
Date: Mon, 10 Mar 2025 10:40:24 +0100
Subject: [PATCH 03/19] fix typo

---
 src/ingestion_qc/generate_report/config.vsh.yaml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/ingestion_qc/generate_report/config.vsh.yaml b/src/ingestion_qc/generate_report/config.vsh.yaml
index 48bb970..eab9da2 100644
--- a/src/ingestion_qc/generate_report/config.vsh.yaml
+++ b/src/ingestion_qc/generate_report/config.vsh.yaml
@@ -59,7 +59,7 @@ argument_groups:
         default: "ribosomal"
         description: |
           In which .var slot to store a boolean array corresponding the ribosomal genes.
-      - mame: "--obs_cell_probability"
+      - name: "--obs_cell_probability"
         type: string
         required: false
         default: "cellbender_cell_probability"
@@ -95,5 +95,4 @@ dependencies:
   - name: ingestion_qc/h5mu_to_qc_json
   - name: ingestion_qc/generate_html
 runners:
-  - type: executable
   - type: nextflow

From a44993c4df07781a9e91d2c43c3ec3b3fb97a487 Mon Sep 17 00:00:00 2001
From: jakubmajercik <jakub.majercik@gmail.com>
Date: Mon, 10 Mar 2025 11:24:05 +0100
Subject: [PATCH 04/19] update unit tests

---
 src/ingestion_qc/h5mu_to_qc_json/test.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/ingestion_qc/h5mu_to_qc_json/test.py b/src/ingestion_qc/h5mu_to_qc_json/test.py
index a655f74..955749d 100644
--- a/src/ingestion_qc/h5mu_to_qc_json/test.py
+++ b/src/ingestion_qc/h5mu_to_qc_json/test.py
@@ -30,7 +30,9 @@ def test_simple_execution(run_component, tmp_path):
     assert output_json_dict.keys() == {"cell_rna_stats", "sample_summary_stats", "metrics_cellranger_stats"}
     
     column_names = [col["name"] for col in output_json_dict["cell_rna_stats"]["columns"]]
-    assert column_names == ["sample_id", "total_counts", "num_nonzero_vars", "fraction_mitochondrial", "fraction_ribosomal"]
+    assert column_names == ["sample_id", "total_counts", "num_nonzero_vars", "fraction_mitochondrial", "fraction_ribosomal",
+                            "cellbender_background_fraction", "cellbender_cell_probability", "cellbender_cell_size",
+                            "cellbender_droplet_efficiency"]
         
     for key in output_json_dict.keys():
         assert output_json_dict[key].keys() == {"num_rows", "num_cols", "columns"}
@@ -43,8 +45,8 @@ def test_set_filters(run_component, tmp_path):
     
     run_component(
         [
-            "--input", meta["resources_dir"] + "/resources_test/qc_sample_data/sample_one.qc.h5mu",
-            "--input", meta["resources_dir"] + "/resources_test/qc_sample_data/sample_two.qc.h5mu",
+            "--input", meta["resources_dir"] + "/resources_test/qc_sample_data/sample_one.qc.cellbender.h5mu",
+            "--input", meta["resources_dir"] + "/resources_test/qc_sample_data/sample_two.qc.cellbender.h5mu",
             "--output", output_json_path,
             "--sample_id_key", "sample_id",
             "--min_total_counts", "10",

From 95f884fd06bbbf5197684b7b03f424f557077763 Mon Sep 17 00:00:00 2001
From: jakubmajercik <jakub.majercik@gmail.com>
Date: Thu, 13 Mar 2025 15:29:12 +0100
Subject: [PATCH 05/19] updated data parsing

---
 .../h5mu_to_qc_json/config.vsh.yaml           |  8 +++--
 src/ingestion_qc/h5mu_to_qc_json/script.py    | 33 ++++++++++++++++---
 2 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/src/ingestion_qc/h5mu_to_qc_json/config.vsh.yaml b/src/ingestion_qc/h5mu_to_qc_json/config.vsh.yaml
index 6e01e2e..825f0d8 100644
--- a/src/ingestion_qc/h5mu_to_qc_json/config.vsh.yaml
+++ b/src/ingestion_qc/h5mu_to_qc_json/config.vsh.yaml
@@ -50,8 +50,12 @@ argument_groups:
         type: string
         multiple: true
         description: The keys in the h5mu .obs to include in the output JSON
-        default: ["total_counts", "num_nonzero_vars", "fraction_mitochondrial", "fraction_ribosomal",
-                  "cellbender_background_fraction", "cellbender_cell_probability", "cellbender_cell_size",
+        default: ["total_counts", "num_nonzero_vars", "fraction_mitochondrial", "fraction_ribosomal"]
+      - name: --cellbender_obs_keys
+        type: string
+        multiple: true
+        description: The cellbender keys in the h5mu .obs to include in the output JSON
+        default: ["cellbender_background_fraction", "cellbender_cell_probability", "cellbender_cell_size",
                   "cellbender_droplet_efficiency"]
       - name: --cellranger_metrics_uns_key
         type: string
diff --git a/src/ingestion_qc/h5mu_to_qc_json/script.py b/src/ingestion_qc/h5mu_to_qc_json/script.py
index 34210c3..7161c5d 100644
--- a/src/ingestion_qc/h5mu_to_qc_json/script.py
+++ b/src/ingestion_qc/h5mu_to_qc_json/script.py
@@ -7,7 +7,7 @@
 ## VIASH START
 # inputs = list(Path("data/sample_data/sample_data").glob("*.h5mu"))
 # output = "data/sample-data.json"
-inputs = list(Path("resources_test/qc_sample_data").glob("*.h5mu"))
+inputs = list(Path("resources_test/qc_sample_data").glob("*.qc.cellbender.h5mu"))
 output = "tmp.json"
 par = {
     "input": sorted([str(x) for x in inputs]),
@@ -21,8 +21,13 @@
         "num_nonzero_vars",
         "fraction_mitochondrial",
         "fraction_ribosomal",
-        "pct_of_counts_in_top_50_vars",
     ],
+    "cellbender_obs_keys": [
+        "cellbender_background_fraction",
+        "cellbender_cell_probability",
+        "cellbender_cell_size",
+        "cellbender_droplet_efficiency",
+    ],        
     "cellranger_metrics_uns_key": "metrics_cellranger",
 }
 i = 0
@@ -62,6 +67,7 @@ def transform_df(df):
 
 def main(par):
     cell_stats_dfs = []
+    cellbender_cell_stats_dfs = []
     sample_stats_dfs = []
     metrics_cellranger_dfs = []
 
@@ -93,6 +99,7 @@ def main(par):
         missing_keys = [key for key in par["obs_keys"] if key not in mod_obs.columns]
         if missing_keys:
             raise ValueError(f"Missing keys in obs: {', '.join(missing_keys)}")
+        
 
         sample_id = (
             mod_obs[par["sample_id_key"]].tolist()
@@ -106,7 +113,7 @@ def main(par):
                 **{key: mod_obs[key] for key in par["obs_keys"]},
             }
         )
-
+        
         sample_summary_stats = pd.DataFrame(
             {
                 "sample_id": pd.Categorical([sample_id[0]]),
@@ -147,18 +154,36 @@ def main(par):
             metrics["sample_id"] = [sample_id[0]]
             metrics_cellranger_dfs.append(metrics)
 
+        if par["cellbender_obs_keys"]:
+            missing_cellbender_keys = [key for key in par["cellbender_obs_keys"] if key not in mod_obs.columns]
+            if missing_cellbender_keys:
+                raise ValueError(f"Missing keys in obs: {', '.join(missing_cellbender_keys)}. Run cellbenbder first.")
+            
+            cellbender_rna_stats = pd.DataFrame(
+                {
+                    "sample_id": pd.Categorical(sample_id),
+                    **{key: mod_obs[key] for key in par["cellbender_obs_keys"]},
+                }
+            )
+            
+        else:
+            cellbender_rna_stats = pd.DataFrame()
+        
         cell_stats_dfs.append(cell_rna_stats)
+        cellbender_cell_stats_dfs.append(cellbender_rna_stats)
         sample_stats_dfs.append(sample_summary_stats)
 
     combined_cell_stats = pd.concat(cell_stats_dfs, ignore_index=True)
+    combined_cellbender_stats = pd.concat(cellbender_cell_stats_dfs, ignore_index=True)
     combined_sample_stats = pd.concat(sample_stats_dfs, ignore_index=True)
     combined_metrics_cellranger = pd.concat(metrics_cellranger_dfs, ignore_index=True)
 
-    for df in [combined_cell_stats, combined_sample_stats, combined_metrics_cellranger]:
+    for df in [combined_cell_stats, combined_cellbender_stats, combined_sample_stats, combined_metrics_cellranger]:
         df["sample_id"] = pd.Categorical(df["sample_id"])
 
     output = {
         "cell_rna_stats": transform_df(combined_cell_stats),
+        "cellbender_rna_stats": transform_df(combined_cellbender_stats),
         "sample_summary_stats": transform_df(combined_sample_stats),
         "metrics_cellranger_stats": transform_df(combined_metrics_cellranger),
     }

From 844968f144c55bfcbd56e664cc21ebfe343125de Mon Sep 17 00:00:00 2001
From: jakubmajercik <jakub.majercik@gmail.com>
Date: Thu, 13 Mar 2025 16:20:57 +0100
Subject: [PATCH 06/19] update tests

---
 src/ingestion_qc/h5mu_to_qc_json/test.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/ingestion_qc/h5mu_to_qc_json/test.py b/src/ingestion_qc/h5mu_to_qc_json/test.py
index 955749d..eb84ceb 100644
--- a/src/ingestion_qc/h5mu_to_qc_json/test.py
+++ b/src/ingestion_qc/h5mu_to_qc_json/test.py
@@ -27,12 +27,13 @@ def test_simple_execution(run_component, tmp_path):
     with open(output_json_path, "r") as f:
         output_json_dict = json.load(f)
     
-    assert output_json_dict.keys() == {"cell_rna_stats", "sample_summary_stats", "metrics_cellranger_stats"}
+    assert output_json_dict.keys() == {"cell_rna_stats", "sample_summary_stats", "cellbender_rna_stats", "metrics_cellranger_stats"}
     
-    column_names = [col["name"] for col in output_json_dict["cell_rna_stats"]["columns"]]
-    assert column_names == ["sample_id", "total_counts", "num_nonzero_vars", "fraction_mitochondrial", "fraction_ribosomal",
-                            "cellbender_background_fraction", "cellbender_cell_probability", "cellbender_cell_size",
-                            "cellbender_droplet_efficiency"]
+    column_names_cell = [col["name"] for col in output_json_dict["cell_rna_stats"]["columns"]]
+    assert column_names_cell == ["sample_id", "total_counts", "num_nonzero_vars", "fraction_mitochondrial", "fraction_ribosomal"]
+    
+    column_names_cellbender = [col["name"] for col in output_json_dict["cellbender_rna_stats"]["columns"]]
+    assert column_names_cellbender == ["sample_id", "cellbender_background_fraction", "cellbender_cell_probability", "cellbender_cell_size", "cellbender_droplet_efficiency"]
         
     for key in output_json_dict.keys():
         assert output_json_dict[key].keys() == {"num_rows", "num_cols", "columns"}
@@ -61,7 +62,7 @@ def test_set_filters(run_component, tmp_path):
     with open(output_json_path, "r") as f:
         output_json_dict = json.load(f)
     
-    assert output_json_dict.keys() == {"cell_rna_stats", "sample_summary_stats", "metrics_cellranger_stats"}
+    assert output_json_dict.keys() == {"cell_rna_stats", "sample_summary_stats", "cellbender_rna_stats", "metrics_cellranger_stats"}
     
     column_names = [col["name"] for col in output_json_dict["cell_rna_stats"]["columns"]]
     assert column_names == ["sample_id", "total_counts", "num_nonzero_vars"]

From c570a5482819197e4000d1924d0236c8d913a4d9 Mon Sep 17 00:00:00 2001
From: jakubmajercik <jakub.majercik@gmail.com>
Date: Tue, 18 Mar 2025 17:31:01 +0100
Subject: [PATCH 07/19] PR updates

---
 _viash.yaml                                      | 2 +-
 src/ingestion_qc/generate_report/config.vsh.yaml | 9 ++++++++-
 src/ingestion_qc/generate_report/main.nf         | 1 +
 src/ingestion_qc/generate_report/test.sh         | 4 +++-
 src/ingestion_qc/h5mu_to_qc_json/script.py       | 2 +-
 5 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/_viash.yaml b/_viash.yaml
index 56e5e0c..251e5f3 100644
--- a/_viash.yaml
+++ b/_viash.yaml
@@ -14,7 +14,7 @@ repositories:
   - name: openpipeline
     repo: openpipelines-bio/openpipeline
     type: github
-    tag: main_build
+    tag: 2.0.0
 
 info:
   test_resources:
diff --git a/src/ingestion_qc/generate_report/config.vsh.yaml b/src/ingestion_qc/generate_report/config.vsh.yaml
index eab9da2..a4149bd 100644
--- a/src/ingestion_qc/generate_report/config.vsh.yaml
+++ b/src/ingestion_qc/generate_report/config.vsh.yaml
@@ -70,7 +70,11 @@ argument_groups:
         required: false
         description: Number of epochs to train cellbender. 
         default: 150
-
+      - name: "--run_cellbender"
+        type: boolean
+        required: false
+        description: Whether to run cellbender or not.
+        default: false
   - name: Outputs
     arguments:
       - name: --output
@@ -79,10 +83,12 @@ argument_groups:
         direction: output
         description: The output HTML report
         example: path/to/file.html
+
 resources:
   - type: nextflow_script
     entrypoint: run_wf
     path: main.nf
+
 dependencies:
   - name: metadata/add_id
     repository: openpipeline
@@ -94,5 +100,6 @@ dependencies:
     repository: openpipeline
   - name: ingestion_qc/h5mu_to_qc_json
   - name: ingestion_qc/generate_html
+
 runners:
   - type: nextflow
diff --git a/src/ingestion_qc/generate_report/main.nf b/src/ingestion_qc/generate_report/main.nf
index f6260c8..1b16d26 100644
--- a/src/ingestion_qc/generate_report/main.nf
+++ b/src/ingestion_qc/generate_report/main.nf
@@ -10,6 +10,7 @@ workflow run_wf {
 
     // run cellbender
     | cellbender.run(
+      runIf: {id, state -> state.run_cellbender},
       fromState: [
         id: "id",
         input: "input",
diff --git a/src/ingestion_qc/generate_report/test.sh b/src/ingestion_qc/generate_report/test.sh
index f681159..08f77c0 100755
--- a/src/ingestion_qc/generate_report/test.sh
+++ b/src/ingestion_qc/generate_report/test.sh
@@ -8,6 +8,8 @@ param_list:
     id: sample_one
   - input: resources_test/qc_sample_data/sample_two.qc.h5mu
     id: sample_two
+cellbender_epochs: 5
+run_cellbender: true
 output_qc_json: output_qc.json
 output_html: output_report.html
 EOF
@@ -17,4 +19,4 @@ nextflow run . \
   -main-script target/nextflow/ingestion_qc/generate_report/main.nf \
   -params-file /tmp/params.yaml \
   -profile docker \
-  --publish_dir test_results \
+  --publish_dir test_results
diff --git a/src/ingestion_qc/h5mu_to_qc_json/script.py b/src/ingestion_qc/h5mu_to_qc_json/script.py
index 7161c5d..65874f2 100644
--- a/src/ingestion_qc/h5mu_to_qc_json/script.py
+++ b/src/ingestion_qc/h5mu_to_qc_json/script.py
@@ -157,7 +157,7 @@ def main(par):
         if par["cellbender_obs_keys"]:
             missing_cellbender_keys = [key for key in par["cellbender_obs_keys"] if key not in mod_obs.columns]
             if missing_cellbender_keys:
-                raise ValueError(f"Missing keys in obs: {', '.join(missing_cellbender_keys)}. Run cellbenbder first.")
+                raise ValueError(f"Missing keys in obs: {', '.join(missing_cellbender_keys)}. Run cellbender first.")
             
             cellbender_rna_stats = pd.DataFrame(
                 {

From 329bc5cdbd41b04a43986e16f124590c559a676d Mon Sep 17 00:00:00 2001
From: jakubmajercik <jakub.majercik@gmail.com>
Date: Wed, 19 Mar 2025 13:41:52 +0100
Subject: [PATCH 08/19] update optional cellbender step

---
 .../generate_html/config.vsh.yaml             |  2 +-
 .../generate_report/config.vsh.yaml           | 16 ++++------
 src/ingestion_qc/generate_report/main.nf      |  1 -
 .../h5mu_to_qc_json/config.vsh.yaml           |  1 +
 src/ingestion_qc/h5mu_to_qc_json/script.py    | 31 +++++++------------
 src/ingestion_qc/h5mu_to_qc_json/test.py      | 11 +++----
 src/utils/setup_logger.py                     | 12 +++++++
 7 files changed, 36 insertions(+), 38 deletions(-)
 create mode 100644 src/utils/setup_logger.py

diff --git a/src/ingestion_qc/generate_html/config.vsh.yaml b/src/ingestion_qc/generate_html/config.vsh.yaml
index fa79d60..d86d57b 100644
--- a/src/ingestion_qc/generate_html/config.vsh.yaml
+++ b/src/ingestion_qc/generate_html/config.vsh.yaml
@@ -45,7 +45,7 @@ engines:
       - type: docker
         run: |
           npm install -g pnpm@latest-10 \
-          && cd /opt && git clone https://github.com/openpipelines-bio/incubator_ingestion_qc.git \
+          && cd /opt && git clone -b ambient-rna https://github.com/openpipelines-bio/incubator_ingestion_qc.git \
           && cd incubator_ingestion_qc && pnpm install
 runners:
   - type: executable
diff --git a/src/ingestion_qc/generate_report/config.vsh.yaml b/src/ingestion_qc/generate_report/config.vsh.yaml
index a4149bd..1c3e810 100644
--- a/src/ingestion_qc/generate_report/config.vsh.yaml
+++ b/src/ingestion_qc/generate_report/config.vsh.yaml
@@ -59,22 +59,18 @@ argument_groups:
         default: "ribosomal"
         description: |
           In which .var slot to store a boolean array corresponding the ribosomal genes.
-      - name: "--obs_cell_probability"
-        type: string
+  - name: Cellbender options
+    arguments:
+      - name: "--run_cellbender"
+        type: boolean
         required: false
-        default: "cellbender_cell_probability"
-        description: |
-          In which .obs slot to store the cell probability.
+        description: Whether to run cellbender or not.
+        default: false
       - name: "--cellbender_epochs"
         type: integer
         required: false
         description: Number of epochs to train cellbender. 
         default: 150
-      - name: "--run_cellbender"
-        type: boolean
-        required: false
-        description: Whether to run cellbender or not.
-        default: false
   - name: Outputs
     arguments:
       - name: --output
diff --git a/src/ingestion_qc/generate_report/main.nf b/src/ingestion_qc/generate_report/main.nf
index 1b16d26..213caa7 100644
--- a/src/ingestion_qc/generate_report/main.nf
+++ b/src/ingestion_qc/generate_report/main.nf
@@ -14,7 +14,6 @@ workflow run_wf {
       fromState: [
         id: "id",
         input: "input",
-        obs_cell_probability: "obs_cell_probability",
         epochs: "cellbender_epochs",
       ],
       toState: ["output"]
diff --git a/src/ingestion_qc/h5mu_to_qc_json/config.vsh.yaml b/src/ingestion_qc/h5mu_to_qc_json/config.vsh.yaml
index 825f0d8..7e49651 100644
--- a/src/ingestion_qc/h5mu_to_qc_json/config.vsh.yaml
+++ b/src/ingestion_qc/h5mu_to_qc_json/config.vsh.yaml
@@ -64,6 +64,7 @@ argument_groups:
 resources:
   - type: python_script
     path: script.py
+  - path: /src/utils/setup_logger.py
 test_resources:
   - type: python_script
     path: test.py
diff --git a/src/ingestion_qc/h5mu_to_qc_json/script.py b/src/ingestion_qc/h5mu_to_qc_json/script.py
index 65874f2..5992bb9 100644
--- a/src/ingestion_qc/h5mu_to_qc_json/script.py
+++ b/src/ingestion_qc/h5mu_to_qc_json/script.py
@@ -3,6 +3,7 @@
 from pathlib import Path
 import anndata as ad
 import h5py
+import sys
 
 ## VIASH START
 # inputs = list(Path("data/sample_data/sample_data").glob("*.h5mu"))
@@ -34,6 +35,10 @@
 mudata_file = par["input"][i]
 ## VIASH END
 
+sys.path.append(meta["resources_dir"])
+from setup_logger import setup_logger
+
+logger = setup_logger()
 
 def transform_df(df):
     """Transform a DataFrame into the annotation object format."""
@@ -67,7 +72,6 @@ def transform_df(df):
 
 def main(par):
     cell_stats_dfs = []
-    cellbender_cell_stats_dfs = []
     sample_stats_dfs = []
     metrics_cellranger_dfs = []
 
@@ -100,6 +104,10 @@ def main(par):
         if missing_keys:
             raise ValueError(f"Missing keys in obs: {', '.join(missing_keys)}")
         
+        if par["cellbender_obs_keys"]:
+            missing_cellbender_keys = [key for key in par["cellbender_obs_keys"] if key not in mod_obs.columns]
+            if missing_cellbender_keys:
+                logger.info(f"Missing keys in obs: {', '.join(missing_cellbender_keys)}. Run cellbender first to include these metrics.")
 
         sample_id = (
             mod_obs[par["sample_id_key"]].tolist()
@@ -111,6 +119,7 @@ def main(par):
             {
                 "sample_id": pd.Categorical(sample_id),
                 **{key: mod_obs[key] for key in par["obs_keys"]},
+                **{key: mod_obs[key] for key in par["cellbender_obs_keys"] if par["cellbender_obs_keys"]},
             }
         )
         
@@ -153,37 +162,19 @@ def main(par):
                 metrics[col] = pd.to_numeric(metrics[col], errors="coerce")
             metrics["sample_id"] = [sample_id[0]]
             metrics_cellranger_dfs.append(metrics)
-
-        if par["cellbender_obs_keys"]:
-            missing_cellbender_keys = [key for key in par["cellbender_obs_keys"] if key not in mod_obs.columns]
-            if missing_cellbender_keys:
-                raise ValueError(f"Missing keys in obs: {', '.join(missing_cellbender_keys)}. Run cellbender first.")
-            
-            cellbender_rna_stats = pd.DataFrame(
-                {
-                    "sample_id": pd.Categorical(sample_id),
-                    **{key: mod_obs[key] for key in par["cellbender_obs_keys"]},
-                }
-            )
-            
-        else:
-            cellbender_rna_stats = pd.DataFrame()
         
         cell_stats_dfs.append(cell_rna_stats)
-        cellbender_cell_stats_dfs.append(cellbender_rna_stats)
         sample_stats_dfs.append(sample_summary_stats)
 
     combined_cell_stats = pd.concat(cell_stats_dfs, ignore_index=True)
-    combined_cellbender_stats = pd.concat(cellbender_cell_stats_dfs, ignore_index=True)
     combined_sample_stats = pd.concat(sample_stats_dfs, ignore_index=True)
     combined_metrics_cellranger = pd.concat(metrics_cellranger_dfs, ignore_index=True)
 
-    for df in [combined_cell_stats, combined_cellbender_stats, combined_sample_stats, combined_metrics_cellranger]:
+    for df in [combined_cell_stats, combined_sample_stats, combined_metrics_cellranger]:
         df["sample_id"] = pd.Categorical(df["sample_id"])
 
     output = {
         "cell_rna_stats": transform_df(combined_cell_stats),
-        "cellbender_rna_stats": transform_df(combined_cellbender_stats),
         "sample_summary_stats": transform_df(combined_sample_stats),
         "metrics_cellranger_stats": transform_df(combined_metrics_cellranger),
     }
diff --git a/src/ingestion_qc/h5mu_to_qc_json/test.py b/src/ingestion_qc/h5mu_to_qc_json/test.py
index eb84ceb..c9349ca 100644
--- a/src/ingestion_qc/h5mu_to_qc_json/test.py
+++ b/src/ingestion_qc/h5mu_to_qc_json/test.py
@@ -27,13 +27,12 @@ def test_simple_execution(run_component, tmp_path):
     with open(output_json_path, "r") as f:
         output_json_dict = json.load(f)
     
-    assert output_json_dict.keys() == {"cell_rna_stats", "sample_summary_stats", "cellbender_rna_stats", "metrics_cellranger_stats"}
+    assert output_json_dict.keys() == {"cell_rna_stats", "sample_summary_stats", "metrics_cellranger_stats"}
     
     column_names_cell = [col["name"] for col in output_json_dict["cell_rna_stats"]["columns"]]
-    assert column_names_cell == ["sample_id", "total_counts", "num_nonzero_vars", "fraction_mitochondrial", "fraction_ribosomal"]
-    
-    column_names_cellbender = [col["name"] for col in output_json_dict["cellbender_rna_stats"]["columns"]]
-    assert column_names_cellbender == ["sample_id", "cellbender_background_fraction", "cellbender_cell_probability", "cellbender_cell_size", "cellbender_droplet_efficiency"]
+    assert column_names_cell == ["sample_id", "total_counts", "num_nonzero_vars", "fraction_mitochondrial", "fraction_ribosomal",
+                                 "cellbender_background_fraction", "cellbender_cell_probability", "cellbender_cell_size",
+                                 "cellbender_droplet_efficiency"]
         
     for key in output_json_dict.keys():
         assert output_json_dict[key].keys() == {"num_rows", "num_cols", "columns"}
@@ -62,7 +61,7 @@ def test_set_filters(run_component, tmp_path):
     with open(output_json_path, "r") as f:
         output_json_dict = json.load(f)
     
-    assert output_json_dict.keys() == {"cell_rna_stats", "sample_summary_stats", "cellbender_rna_stats", "metrics_cellranger_stats"}
+    assert output_json_dict.keys() == {"cell_rna_stats", "sample_summary_stats", "metrics_cellranger_stats"}
     
     column_names = [col["name"] for col in output_json_dict["cell_rna_stats"]["columns"]]
     assert column_names == ["sample_id", "total_counts", "num_nonzero_vars"]
diff --git a/src/utils/setup_logger.py b/src/utils/setup_logger.py
new file mode 100644
index 0000000..3ca1cdb
--- /dev/null
+++ b/src/utils/setup_logger.py
@@ -0,0 +1,12 @@
+def setup_logger():
+    import logging
+    from sys import stdout
+
+    logger = logging.getLogger()
+    logger.setLevel(logging.INFO)
+    console_handler = logging.StreamHandler(stdout)
+    logFormatter = logging.Formatter("%(asctime)s %(levelname)-8s %(message)s")
+    console_handler.setFormatter(logFormatter)
+    logger.addHandler(console_handler)
+
+    return logger

From a6be8db3dd9d36f09fecde92f6dea5a12c742ed6 Mon Sep 17 00:00:00 2001
From: jakubmajercik <jakub.majercik@gmail.com>
Date: Wed, 19 Mar 2025 13:48:38 +0100
Subject: [PATCH 09/19] update unit test

---
 src/ingestion_qc/h5mu_to_qc_json/test.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/ingestion_qc/h5mu_to_qc_json/test.py b/src/ingestion_qc/h5mu_to_qc_json/test.py
index c9349ca..908f763 100644
--- a/src/ingestion_qc/h5mu_to_qc_json/test.py
+++ b/src/ingestion_qc/h5mu_to_qc_json/test.py
@@ -64,7 +64,8 @@ def test_set_filters(run_component, tmp_path):
     assert output_json_dict.keys() == {"cell_rna_stats", "sample_summary_stats", "metrics_cellranger_stats"}
     
     column_names = [col["name"] for col in output_json_dict["cell_rna_stats"]["columns"]]
-    assert column_names == ["sample_id", "total_counts", "num_nonzero_vars"]
+    assert column_names == ["sample_id", "total_counts", "num_nonzero_vars", "cellbender_background_fraction",
+                            "cellbender_cell_probability", "cellbender_cell_size", "cellbender_droplet_efficiency"]
         
     for key in output_json_dict.keys():
         assert output_json_dict[key].keys() == {"num_rows", "num_cols", "columns"}

From d4034fa43b601d3365400d5f4bac2f1e2e289dad Mon Sep 17 00:00:00 2001
From: jakubmajercik <jakub.majercik@gmail.com>
Date: Wed, 19 Mar 2025 14:09:48 +0100
Subject: [PATCH 10/19] remove dryrun

---
 resources_test_scripts/qc_sample_data.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resources_test_scripts/qc_sample_data.sh b/resources_test_scripts/qc_sample_data.sh
index 993a6e1..59b6b79 100755
--- a/resources_test_scripts/qc_sample_data.sh
+++ b/resources_test_scripts/qc_sample_data.sh
@@ -80,4 +80,4 @@ aws s3 sync \
   --profile di \
   resources_test/qc_sample_data \
   s3://openpipelines-bio/openpipeline_incubator/resources_test/qc_sample_data \
-  --delete --dryrun
+  --delete

From 955af4ee4e1bc1094db62a4597d2d8819a8cd314 Mon Sep 17 00:00:00 2001
From: jakubmajercik <jakub.majercik@gmail.com>
Date: Wed, 19 Mar 2025 14:17:18 +0100
Subject: [PATCH 11/19] ignore tests conversion component

---
 src/ingestion_qc/h5mu_to_qc_json/config.vsh.yaml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/ingestion_qc/h5mu_to_qc_json/config.vsh.yaml b/src/ingestion_qc/h5mu_to_qc_json/config.vsh.yaml
index 7e49651..f1f1f1b 100644
--- a/src/ingestion_qc/h5mu_to_qc_json/config.vsh.yaml
+++ b/src/ingestion_qc/h5mu_to_qc_json/config.vsh.yaml
@@ -65,11 +65,11 @@ resources:
   - type: python_script
     path: script.py
   - path: /src/utils/setup_logger.py
-test_resources:
-  - type: python_script
-    path: test.py
-  - type: file
-    path: /resources_test
+# test_resources:
+#   - type: python_script
+#     path: test.py
+#   - type: file
+#     path: /resources_test
 engines:
   - type: docker
     image: python:3.12-slim

From d6307a59da05e4215617a284980b27f4de078702 Mon Sep 17 00:00:00 2001
From: jakubmajercik <jakub.majercik@gmail.com>
Date: Tue, 25 Mar 2025 10:58:13 +0100
Subject: [PATCH 12/19] add direwctives

---
 src/ingestion_qc/generate_html/config.vsh.yaml   |  2 ++
 src/ingestion_qc/generate_report/main.nf         | 12 ++++++------
 src/ingestion_qc/h5mu_to_qc_json/config.vsh.yaml |  2 ++
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/ingestion_qc/generate_html/config.vsh.yaml b/src/ingestion_qc/generate_html/config.vsh.yaml
index d86d57b..2a744bf 100644
--- a/src/ingestion_qc/generate_html/config.vsh.yaml
+++ b/src/ingestion_qc/generate_html/config.vsh.yaml
@@ -50,3 +50,5 @@ engines:
 runners:
   - type: executable
   - type: nextflow
+    directives:
+      label: [lowmem, lowdisk]
diff --git a/src/ingestion_qc/generate_report/main.nf b/src/ingestion_qc/generate_report/main.nf
index 213caa7..952d98e 100644
--- a/src/ingestion_qc/generate_report/main.nf
+++ b/src/ingestion_qc/generate_report/main.nf
@@ -8,6 +8,12 @@ workflow run_wf {
       [id, state + [_meta: [join_id: id]]]
     }
 
+    // add sample ids to each state
+    | add_id.run(
+      fromState: [input_id: "id", input: "output"],
+      toState: ["output"]
+    )
+    
     // run cellbender
     | cellbender.run(
       runIf: {id, state -> state.run_cellbender},
@@ -31,12 +37,6 @@ workflow run_wf {
       toState: ["output"]
     )
 
-    // add sample ids to each state
-    | add_id.run(
-      fromState: [input_id: "id", input: "output"],
-      toState: ["output"]
-    )
-
     // combine files into one state
     | joinStates { ids, states ->
       def newId = "combined"
diff --git a/src/ingestion_qc/h5mu_to_qc_json/config.vsh.yaml b/src/ingestion_qc/h5mu_to_qc_json/config.vsh.yaml
index f1f1f1b..e9f9bc8 100644
--- a/src/ingestion_qc/h5mu_to_qc_json/config.vsh.yaml
+++ b/src/ingestion_qc/h5mu_to_qc_json/config.vsh.yaml
@@ -92,3 +92,5 @@ engines:
 runners:
   - type: executable
   - type: nextflow
+    directives:
+      label: [midmem, middisk]

From c16be2c1b492d031f190798281b1f77c1c5a64d7 Mon Sep 17 00:00:00 2001
From: jakubmajercik <jakub.majercik@gmail.com>
Date: Tue, 25 Mar 2025 11:12:33 +0100
Subject: [PATCH 13/19] move add_id

---
 src/ingestion_qc/generate_report/main.nf | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/ingestion_qc/generate_report/main.nf b/src/ingestion_qc/generate_report/main.nf
index 952d98e..213caa7 100644
--- a/src/ingestion_qc/generate_report/main.nf
+++ b/src/ingestion_qc/generate_report/main.nf
@@ -8,12 +8,6 @@ workflow run_wf {
       [id, state + [_meta: [join_id: id]]]
     }
 
-    // add sample ids to each state
-    | add_id.run(
-      fromState: [input_id: "id", input: "output"],
-      toState: ["output"]
-    )
-    
     // run cellbender
     | cellbender.run(
       runIf: {id, state -> state.run_cellbender},
@@ -37,6 +31,12 @@ workflow run_wf {
       toState: ["output"]
     )
 
+    // add sample ids to each state
+    | add_id.run(
+      fromState: [input_id: "id", input: "output"],
+      toState: ["output"]
+    )
+
     // combine files into one state
     | joinStates { ids, states ->
       def newId = "combined"

From a67a150c025c99209e2d10bfd6cf034b8c9cfad2 Mon Sep 17 00:00:00 2001
From: jakubmajercik <jakub.majercik@gmail.com>
Date: Tue, 25 Mar 2025 17:54:10 +0100
Subject: [PATCH 14/19] get op components from main

---
 _viash.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/_viash.yaml b/_viash.yaml
index 251e5f3..947519a 100644
--- a/_viash.yaml
+++ b/_viash.yaml
@@ -14,7 +14,7 @@ repositories:
   - name: openpipeline
     repo: openpipelines-bio/openpipeline
     type: github
-    tag: 2.0.0
+    tag: main
 
 info:
   test_resources:

From 87d934f9774ade2e83b87e97a52286fd17e20811 Mon Sep 17 00:00:00 2001
From: jakubmajercik <jakub.majercik@gmail.com>
Date: Tue, 25 Mar 2025 18:25:39 +0100
Subject: [PATCH 15/19] remove tag

---
 _viash.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/_viash.yaml b/_viash.yaml
index 947519a..926a45f 100644
--- a/_viash.yaml
+++ b/_viash.yaml
@@ -14,7 +14,7 @@ repositories:
   - name: openpipeline
     repo: openpipelines-bio/openpipeline
     type: github
-    tag: main
+    # tag: main
 
 info:
   test_resources:

From e031b2ff25104467b65d583976227239af362154 Mon Sep 17 00:00:00 2001
From: jakubmajercik <jakub.majercik@gmail.com>
Date: Tue, 25 Mar 2025 19:13:36 +0100
Subject: [PATCH 16/19] op main_build tag

---
 _viash.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/_viash.yaml b/_viash.yaml
index 926a45f..56e5e0c 100644
--- a/_viash.yaml
+++ b/_viash.yaml
@@ -14,7 +14,7 @@ repositories:
   - name: openpipeline
     repo: openpipelines-bio/openpipeline
     type: github
-    # tag: main
+    tag: main_build
 
 info:
   test_resources:

From 9fc517c3a5557d8cc39762c04a348c25553cc2ee Mon Sep 17 00:00:00 2001
From: jakubmajercik <jakub.majercik@gmail.com>
Date: Wed, 26 Mar 2025 10:32:42 +0100
Subject: [PATCH 17/19] trigger container rebuild

---
 src/ingestion_qc/generate_html/config.vsh.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/ingestion_qc/generate_html/config.vsh.yaml b/src/ingestion_qc/generate_html/config.vsh.yaml
index 2a744bf..c290f69 100644
--- a/src/ingestion_qc/generate_html/config.vsh.yaml
+++ b/src/ingestion_qc/generate_html/config.vsh.yaml
@@ -46,7 +46,8 @@ engines:
         run: |
           npm install -g pnpm@latest-10 \
           && cd /opt && git clone -b ambient-rna https://github.com/openpipelines-bio/incubator_ingestion_qc.git \
-          && cd incubator_ingestion_qc && pnpm install
+          && cd incubator_ingestion_qc && pnpm install \
+          && true
 runners:
   - type: executable
   - type: nextflow

From 155413334d1fbecb3a5e6eb975029624eb4e2d91 Mon Sep 17 00:00:00 2001
From: jakubmajercik <jakub.majercik@gmail.com>
Date: Wed, 26 Mar 2025 22:53:39 +0100
Subject: [PATCH 18/19] remove from docker setup

---
 src/ingestion_qc/generate_html/config.vsh.yaml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/ingestion_qc/generate_html/config.vsh.yaml b/src/ingestion_qc/generate_html/config.vsh.yaml
index c290f69..2a744bf 100644
--- a/src/ingestion_qc/generate_html/config.vsh.yaml
+++ b/src/ingestion_qc/generate_html/config.vsh.yaml
@@ -46,8 +46,7 @@ engines:
         run: |
           npm install -g pnpm@latest-10 \
           && cd /opt && git clone -b ambient-rna https://github.com/openpipelines-bio/incubator_ingestion_qc.git \
-          && cd incubator_ingestion_qc && pnpm install \
-          && true
+          && cd incubator_ingestion_qc && pnpm install
 runners:
   - type: executable
   - type: nextflow

From aa59d28d37a313a50b126125e2fb7e909e82d222 Mon Sep 17 00:00:00 2001
From: Dorien <41797896+dorien-er@users.noreply.github.com>
Date: Fri, 28 Mar 2025 14:35:22 +0100
Subject: [PATCH 19/19] Update src/ingestion_qc/h5mu_to_qc_json/script.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 src/ingestion_qc/h5mu_to_qc_json/script.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/ingestion_qc/h5mu_to_qc_json/script.py b/src/ingestion_qc/h5mu_to_qc_json/script.py
index 5992bb9..3580070 100644
--- a/src/ingestion_qc/h5mu_to_qc_json/script.py
+++ b/src/ingestion_qc/h5mu_to_qc_json/script.py
@@ -119,7 +119,7 @@ def main(par):
             {
                 "sample_id": pd.Categorical(sample_id),
                 **{key: mod_obs[key] for key in par["obs_keys"]},
-                **{key: mod_obs[key] for key in par["cellbender_obs_keys"] if par["cellbender_obs_keys"]},
+                **{key: mod_obs[key] for key in par["cellbender_obs_keys"] if key in mod_obs.columns},
             }
         )