Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add cell cycle score baseline #706

Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
020d907
add cc_score baseline
scottgigante-immunai Nov 26, 2022
fe14551
document
scottgigante-immunai Nov 26, 2022
5f257fd
Merge branch 'main' into batch_integration_embed/baseline/cc_score
scottgigante-immunai Nov 26, 2022
f81f946
Merge branch 'main' into batch_integration_embed/baseline/cc_score
scottgigante-immunai Nov 27, 2022
838da80
Merge branch 'main' into batch_integration_embed/baseline/cc_score
scottgigante-immunai Nov 27, 2022
5596e45
Make sure method didn't remove uns
scottgigante-immunai Nov 27, 2022
cd3f88b
Combat tramples uns
scottgigante-immunai Nov 27, 2022
9e85204
Revert
scottgigante-immunai Nov 27, 2022
da437b5
Scale and hvg trample uns
scottgigante-immunai Nov 27, 2022
eadbc8d
scanorama clears uns
scottgigante-immunai Nov 27, 2022
5fff381
mnn tramples uns
scottgigante-immunai Nov 27, 2022
f755ae7
just copy uns
scottgigante-immunai Nov 28, 2022
ec14bc2
Merge branch 'main' into batch_integration_embed/baseline/cc_score
scottgigante-immunai Nov 28, 2022
ecb88b2
just copy uns
scottgigante-immunai Nov 28, 2022
d273c9f
Merge branch 'batch_integration_embed/baseline/cc_score' of github.co…
scottgigante-immunai Nov 28, 2022
9c70c22
don't set X_emb if missing; it shouldn't ever be missing
scottgigante-immunai Nov 28, 2022
09adbba
Merge branch 'main' into batch_integration_embed/baseline/cc_score
scottgigante-immunai Nov 28, 2022
fdd6ea8
Merge remote-tracking branch 'base/main' into batch_integration_embed…
scottgigante-immunai Dec 1, 2022
d981d42
use true features as embedding
scottgigante-immunai Dec 1, 2022
0d2dd69
compute PCA per batch
scottgigante-immunai Dec 2, 2022
fa4ec33
Merge branch 'main' into batch_integration_embed/baseline/cc_score
scottgigante-immunai Dec 2, 2022
a69f851
Set code version
scottgigante-immunai Dec 3, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ Datasets should contain the following attributes:
* `adata.obsm['X_uni']` with a pre-integration embedding (PCA)
* `adata.layers['log_normalized']` with log-normalized data
* `adata.X` with log-normalized data
* `adata.uns["organism"]` with either `"mouse"` or `"human"`

Methods should assign output to `adata.obsm['X_emb']`.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,17 @@ def check_dataset(adata):
assert "batch" in adata.obs
assert "labels" in adata.obs
assert "log_normalized" in adata.layers
assert "organism" in adata.uns
assert adata.uns["organism"] in ["mouse", "human"]

return True


def check_method(adata, is_baseline=False):
"""Check that method output fits expected API."""
assert "X_emb" in adata.obsm
# check organism was not removed
assert "organism" in adata.uns
return True


Expand All @@ -27,6 +31,7 @@ def sample_dataset():
import scanpy as sc

adata = load_sample_data()
adata.uns["organism"] = "human"

adata.var.index = adata.var.gene_short_name.astype(str)
sc.pp.normalize_total(adata)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,3 +76,21 @@ def batch_random_integration(adata, test=False):
)
adata.uns["method_code_version"] = check_version("openproblems")
return adata


@method(
method_name="Random Integration by Batch",
paper_name="Random Integration by Batch (baseline)",
paper_url="https://openproblems.bio",
paper_year=2022,
code_url="https://github.com/openproblems-bio/openproblems",
image="openproblems-python-batch-integration",
is_baseline=True,
)
def cell_cycle_integration(adata, test=False):
from scib.preprocessing import score_cell_cycle

score_cell_cycle(adata, organism=adata.uns["organism"])
adata.obsm["X_emb"] = adata.obs[["S_score", "G2M_score"]].to_numpy()
adata.uns["method_code_version"] = check_version("openproblems")
return adata
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,16 @@
@metric(
metric_name="Cell Cycle Score",
maximize=True,
image="openproblems-python-batch-integration", # only if required
image="openproblems-python-batch-integration",
)
def cc_score(adata, test=False):
from ._utils import _get_split
from scib.metrics import cell_cycle

try:
cc = cell_cycle(*_get_split(adata), "batch", embed="X_emb", organism="human")
cc = cell_cycle(
*_get_split(adata), "batch", embed="X_emb", organism=adata.uns["organism"]
)

except ValueError:
cc = 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ def immune_batch(test=False):
import scanpy as sc

adata = load_immune(test)
adata.uns["organism"] = "human"
adata.obs["labels"] = adata.obs["final_annotation"]

sc.pp.filter_genes(adata, min_counts=1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ def pancreas_batch(test=False):
import scanpy as sc

adata = load_pancreas(test)
adata.uns["organism"] = "human"
adata.obs["labels"] = adata.obs["celltype"]
adata.obs["batch"] = adata.obs["tech"]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ def hvg_batch(adata, batch_key, target_genes, adataOut):
if adata.n_vars < 2000:
return adata
else:
# uns and var get trampled
uns = adata.uns.copy()
var = adata.var.copy()
adata = hvg_batch(
adata,
Expand All @@ -13,13 +15,17 @@ def hvg_batch(adata, batch_key, target_genes, adataOut):
adataOut=adataOut,
)
adata.var = var.loc[adata.var.index]
adata.uns = uns
return adata


def scale_batch(adata, batch_key):
from scib.preprocessing import scale_batch

# uns and var get trampled
uns = adata.uns.copy()
var = adata.var.copy()
adata = scale_batch(adata, batch_key)
adata.var = var.loc[adata.var_names]
adata.uns = uns
return adata
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@ def _mnn(adata):
from scib.integration import runMNN
from scib.preprocessing import reduce_data

# mnn clears adata.uns
uns = adata.uns
adata = runMNN(adata, "batch")
adata.uns = uns
reduce_data(adata, umap=False)
adata.obsm["X_emb"] = adata.obsm["X_pca"]
adata.uns["method_code_version"] = check_version("mnnpy")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@ def _scanorama(adata, use_rep, pca):
from scib.integration import scanorama
from scib.preprocessing import reduce_data

# scanorama clears adata.layers
# scanorama clears adata.layers and uns
layers = adata.layers
uns = adata.uns
adata = scanorama(adata, "batch")
adata.layers = layers
adata.uns = uns
reduce_data(adata, umap=False, use_rep=use_rep, pca=pca)
adata.uns["method_code_version"] = check_version("scanorama")
return adata
Expand Down