openproblems-bio · scottgigante-immunai · Dec 3, 2022 · Nov 26, 2022 · Nov 26, 2022 · Nov 26, 2022
diff --git a/openproblems/tasks/_batch_integration/batch_integration_embed/README.md b/openproblems/tasks/_batch_integration/batch_integration_embed/README.md
@@ -66,6 +66,7 @@ Datasets should contain the following attributes:
 * `adata.obsm['X_uni']` with a pre-integration embedding (PCA)
 * `adata.layers['log_normalized']` with log-normalized data
 * `adata.X` with log-normalized data
+* `adata.uns["organism"]` with either `"mouse"` or `"human"`
 
 Methods should assign output to `adata.obsm['X_emb']`.
 

diff --git a/openproblems/tasks/_batch_integration/batch_integration_embed/api.py b/openproblems/tasks/_batch_integration/batch_integration_embed/api.py
@@ -11,13 +11,17 @@ def check_dataset(adata):
     assert "batch" in adata.obs
     assert "labels" in adata.obs
     assert "log_normalized" in adata.layers
+    assert "organism" in adata.uns
+    assert adata.uns["organism"] in ["mouse", "human"]
 
     return True
 
 
 def check_method(adata, is_baseline=False):
     """Check that method output fits expected API."""
     assert "X_emb" in adata.obsm
+    # check organism was not removed
+    assert "organism" in adata.uns
     return True
 
 
@@ -27,6 +31,7 @@ def sample_dataset():
     import scanpy as sc
 
     adata = load_sample_data()
+    adata.uns["organism"] = "human"
 
     adata.var.index = adata.var.gene_short_name.astype(str)
     sc.pp.normalize_total(adata)

diff --git a/openproblems/tasks/_batch_integration/batch_integration_embed/methods/baseline.py b/openproblems/tasks/_batch_integration/batch_integration_embed/methods/baseline.py
@@ -76,3 +76,21 @@ def batch_random_integration(adata, test=False):
     )
     adata.uns["method_code_version"] = check_version("openproblems")
     return adata
+
+
+@method(
+    method_name="Random Integration by Batch",
+    paper_name="Random Integration by Batch (baseline)",
+    paper_url="https://openproblems.bio",
+    paper_year=2022,
+    code_url="https://github.com/openproblems-bio/openproblems",
+    image="openproblems-python-batch-integration",
+    is_baseline=True,
+)
+def cell_cycle_integration(adata, test=False):
+    from scib.preprocessing import score_cell_cycle
+
+    score_cell_cycle(adata, organism=adata.uns["organism"])
+    adata.obsm["X_emb"] = adata.obs[["S_score", "G2M_score"]].to_numpy()
+    adata.uns["method_code_version"] = check_version("openproblems")
+    return adata
diff --git a/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/cc_score.py b/openproblems/tasks/_batch_integration/batch_integration_embed/metrics/cc_score.py
@@ -21,14 +21,16 @@
 @metric(
     metric_name="Cell Cycle Score",
     maximize=True,
-    image="openproblems-python-batch-integration",  # only if required
+    image="openproblems-python-batch-integration",
 )
 def cc_score(adata, test=False):
     from ._utils import _get_split
     from scib.metrics import cell_cycle
 
     try:
-        cc = cell_cycle(*_get_split(adata), "batch", embed="X_emb", organism="human")
+        cc = cell_cycle(
+            *_get_split(adata), "batch", embed="X_emb", organism=adata.uns["organism"]
+        )
 
     except ValueError:
         cc = 0

diff --git a/openproblems/tasks/_batch_integration/batch_integration_graph/datasets/immune.py b/openproblems/tasks/_batch_integration/batch_integration_graph/datasets/immune.py
@@ -15,6 +15,7 @@ def immune_batch(test=False):
     import scanpy as sc
 
     adata = load_immune(test)
+    adata.uns["organism"] = "human"
     adata.obs["labels"] = adata.obs["final_annotation"]
 
     sc.pp.filter_genes(adata, min_counts=1)

diff --git a/openproblems/tasks/_batch_integration/batch_integration_graph/datasets/pancreas.py b/openproblems/tasks/_batch_integration/batch_integration_graph/datasets/pancreas.py
@@ -15,6 +15,7 @@ def pancreas_batch(test=False):
     import scanpy as sc
 
     adata = load_pancreas(test)
+    adata.uns["organism"] = "human"
     adata.obs["labels"] = adata.obs["celltype"]
     adata.obs["batch"] = adata.obs["tech"]
 

diff --git a/openproblems/tasks/_batch_integration/batch_integration_graph/methods/_utils.py b/openproblems/tasks/_batch_integration/batch_integration_graph/methods/_utils.py
@@ -4,6 +4,8 @@ def hvg_batch(adata, batch_key, target_genes, adataOut):
     if adata.n_vars < 2000:
         return adata
     else:
+        # uns and var get trampled
+        uns = adata.uns.copy()
         var = adata.var.copy()
         adata = hvg_batch(
             adata,
@@ -13,13 +15,17 @@ def hvg_batch(adata, batch_key, target_genes, adataOut):
             adataOut=adataOut,
         )
         adata.var = var.loc[adata.var.index]
+        adata.uns = uns
         return adata
 
 
 def scale_batch(adata, batch_key):
     from scib.preprocessing import scale_batch
 
+    # uns and var get trampled
+    uns = adata.uns.copy()
     var = adata.var.copy()
     adata = scale_batch(adata, batch_key)
     adata.var = var.loc[adata.var_names]
+    adata.uns = uns
     return adata
diff --git a/openproblems/tasks/_batch_integration/batch_integration_graph/methods/mnn.py b/openproblems/tasks/_batch_integration/batch_integration_graph/methods/mnn.py
@@ -18,7 +18,10 @@ def _mnn(adata):
     from scib.integration import runMNN
     from scib.preprocessing import reduce_data
 
+    # mnn clears adata.uns
+    uns = adata.uns
     adata = runMNN(adata, "batch")
+    adata.uns = uns
     reduce_data(adata, umap=False)
     adata.obsm["X_emb"] = adata.obsm["X_pca"]
     adata.uns["method_code_version"] = check_version("mnnpy")

diff --git a/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py b/openproblems/tasks/_batch_integration/batch_integration_graph/methods/scanorama.py
@@ -18,10 +18,12 @@ def _scanorama(adata, use_rep, pca):
     from scib.integration import scanorama
     from scib.preprocessing import reduce_data
 
-    # scanorama clears adata.layers
+    # scanorama clears adata.layers and uns
     layers = adata.layers
+    uns = adata.uns
     adata = scanorama(adata, "batch")
     adata.layers = layers
+    adata.uns = uns
     reduce_data(adata, umap=False, use_rep=use_rep, pca=pca)
     adata.uns["method_code_version"] = check_version("scanorama")
     return adata