Skip to content

openproblems interoperability POC #883

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions _viash.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,9 @@ config_mods: |
.resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}
.runners[.type == 'nextflow'].directives.tag := '$id'
.runners[.type == 'nextflow'].config.script := 'includeConfig("nextflow_labels.config")'

repositories:
- type: github
name: openproblems_task_dimensionality_reduction
repo: openproblems-bio/task_dimensionality_reduction
tag: build/main
95 changes: 95 additions & 0 deletions src/dimred/openproblems_dr/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
name: "openproblems_dr"
namespace: "dimred"
argument_groups:
- name: "Inputs"
arguments:
- name: "--input"
required: true
type: file
description: Path to the sample.
example: dataset.h5mu

- name: "--input_modality"
type: string
default: "rna"
required: false

- name: "--input_layer_counts"
type: string
description: In which layer to find the raw counts. If not provided, the raw counts are assumed to be in the .X object.
required: false

- name: "--input_layer_normalized"
type: string
description: In which layer to find the log normalized counts. If not provided, the raw counts are assumed to be in the .X object.
required: false
default: "log_normalized"

- name: "--input_var_hvg_score"
type: string
description: In which obsm to find the HVG scores. If not provided, the HVG scores are not passed.
required: false

- name: Method settings
arguments:
- name: "--method_id"
description: ID of the method to use.
type: string
required: true
choices: [
densmap,
diffusion_map,
ivis,
lmds,
neuralee,
pca,
phate,
pymde,
simlr,
tsne,
umap
]
- name: "Outputs"
arguments:
- name: "--output"
type: file
required: true
direction: output
description: Destination path to the output.
example: output.h5mu

- name: "--output_obsm_key"
description: Key in the .obsm object to use as input. If not provided, "X_{method_id}" is used.
type: string
required: false
dependencies:
- name: dimred/openproblems_dr_h5mu_to_h5ad
- name: dimred/openproblems_dr_h5ad_to_h5mu
- name: methods/densmap
repository: openproblems_task_dimensionality_reduction
- name: methods/diffusion_map
repository: openproblems_task_dimensionality_reduction
- name: methods/ivis
repository: openproblems_task_dimensionality_reduction
- name: methods/lmds
repository: openproblems_task_dimensionality_reduction
- name: methods/neuralee
repository: openproblems_task_dimensionality_reduction
- name: methods/pca
repository: openproblems_task_dimensionality_reduction
- name: methods/phate
repository: openproblems_task_dimensionality_reduction
- name: methods/pymde
repository: openproblems_task_dimensionality_reduction
- name: methods/simlr
repository: openproblems_task_dimensionality_reduction
- name: methods/tsne
repository: openproblems_task_dimensionality_reduction
- name: methods/umap
repository: openproblems_task_dimensionality_reduction
resources:
- type: nextflow_script
path: main.nf
entrypoint: run_wf
runners:
- type: nextflow
70 changes: 70 additions & 0 deletions src/dimred/openproblems_dr/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
methods = [
densmap,
diffusion_map,
ivis,
lmds,
neuralee,
pca,
phate,
pymde,
simlr,
tsne,
umap
]

workflow run_wf {
take:
input_ch

main:
output_ch = input_ch

| openproblems_dr_h5mu_to_h5ad.run(
fromState: [
"input",
"input_modality",
"input_layer_counts",
"input_layer_normalized",
"input_var_hvg_score"
],
toState: [
"method_input": "output"
]
)

| runEach(
components: methods,
filter: { id, state, comp ->
state.method_id == comp.config.name
},
fromState: [
"input": "method_input"
],
toState: [
"method_output": "output"
]
)

| openproblems_dr_h5ad_to_h5mu.run(
fromState: { id, state ->
def output_obsm_key = state.output_obsm_key
if (!output_obsm_key) {
output_obsm_key = "X_" + state.method_id
}
[
"input_dataset": state.input,
"input_output": state.method_output,
"input_modality": state.input_modality,
"output_obsm_key": output_obsm_key
]
},
toState: [
"output": "output"
]
)

| setState(["output"])

emit:
output_ch
}
11 changes: 11 additions & 0 deletions src/dimred/openproblems_dr/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash

# viash ns build --parallel -q openproblems --setup cb

nextflow run . \
-main-script target/nextflow/dimred/openproblems_dr/main.nf \
-profile docker \
-resume \
--input resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu \
--method_id pymde \
--publish_dir output/foo
66 changes: 66 additions & 0 deletions src/dimred/openproblems_dr_h5ad_to_h5mu/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
name: openproblems_dr_h5ad_to_h5mu
namespace: "dimred"


argument_groups:
- name: Inputs
arguments:
- name: "--input_dataset"
type: file
description: Input h5mu file
direction: input
required: true
example: input.h5mu

- name: "--input_output"
type: file
description: Method output h5ad file
direction: input
required: true
example: input.h5ad

- name: "--input_modality"
type: string
default: "rna"
required: false

- name: Outputs
arguments:
- name: "--output"
alternatives: ["-o"]
type: file
description: Output h5mu file.
direction: output
required: true
example: output.h5mu

- name: "--output_obsm_key"
description: Key in the .obsm object to use as input.
type: string
required: false

resources:
- type: python_script
path: script.py

# test_resources:
# - type: python_script
# path: test.py
# - path: /resources_test/pbmc_1k_protein_v3

engines:
- type: docker
image: python:3.12-slim
setup:
- type: apt
packages:
- procps
- type: python
__merge__: [/src/base/requirements/anndata_mudata.yaml, .]
__merge__: [/src/base/requirements/python_test_setup.yaml, .]

runners:
- type: executable
- type: nextflow
directives:
label: [highcpu, midmem]
22 changes: 22 additions & 0 deletions src/dimred/openproblems_dr_h5ad_to_h5mu/script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import mudata as mu
import anndata as ad

## VIASH START
par = {
"input_dataset": "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu",
"input_output": "work/90/56062df29c88150755a174b63bdb82/_viash_par/input_output_1/run.pymde.output.h5ad",
"input_modality": "rna",
"output_obsm_key": "X_dr",
"output": "output.h5mu"
}
## VIASH END

print("Reading h5mu file", flush=True)
mdata = mu.read_h5mu(par["input_dataset"])
adata = ad.read_h5ad(par["input_output"])

adata_dest = mdata.mod[par["input_modality"]]
adata_dest.obsm[par["output_obsm_key"]] = adata.obsm["X_emb"]

print("Writing h5ad file", flush=True)
mdata.write_h5mu(par["output"])
69 changes: 69 additions & 0 deletions src/dimred/openproblems_dr_h5mu_to_h5ad/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
name: openproblems_dr_h5mu_to_h5ad
namespace: "dimred"

argument_groups:
- name: Inputs
arguments:
- name: "--input"
type: file
description: Input h5mu file
direction: input
required: true
example: input.h5mu

- name: "--input_modality"
type: string
default: "rna"
required: false

- name: "--input_layer_counts"
type: string
description: In which layer to find the raw counts. If not provided, the raw counts are assumed to be in the .X object.
required: false

- name: "--input_layer_normalized"
type: string
description: In which layer to find the log normalized counts. If not provided, the raw counts are assumed to be in the .X object.
required: false
default: "log_normalized"

- name: "--input_var_hvg_score"
type: string
description: In which obsm to find the HVG scores. If not provided, the HVG scores are not passed.
required: false

- name: Outputs
arguments:
- name: "--output"
alternatives: ["-o"]
type: file
description: Output h5mu file.
direction: output
required: true
example: output.h5mu

resources:
- type: python_script
path: script.py

# test_resources:
# - type: python_script
# path: test.py
# - path: /resources_test/pbmc_1k_protein_v3

engines:
- type: docker
image: python:3.12-slim
setup:
- type: apt
packages:
- procps
- type: python
__merge__: [/src/base/requirements/anndata_mudata.yaml, .]
__merge__: [/src/base/requirements/python_test_setup.yaml, .]

runners:
- type: executable
- type: nextflow
directives:
label: [highcpu, midmem]
46 changes: 46 additions & 0 deletions src/dimred/openproblems_dr_h5mu_to_h5ad/script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import mudata as mu
import anndata as ad

## VIASH START
par = {
"input": "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu",
"input_modality": "rna",
"input_layer_counts": "log_normalized",
"input_layer_normalized": "log_normalized",
"input_var_hvg_score": None,
"output": "output.h5mu"
}
## VIASH END

print("Reading h5mu file", flush=True)
mdata = mu.read_h5mu(par["input"])

print("Transforming to anndata", flush=True)
def get_matrix(mdata, modality, layer):
if layer is None:
return mdata.mod[modality].X
return mdata.mod[modality].layers[layer]

# create var
var = mdata.mod[par["input_modality"]].var[[]]

if par["input_var_hvg_score"] is not None:
assert par["input_var_hvg_score"] in var.columns, f"Variable {par['input_var_hvg_score']} not found in var"
var["hvg_score"] = mdata.mod[par["input_modality"]].var[par["input_var_hvg_score"]]

# create anndata
adata = ad.AnnData(
layers={
"counts": get_matrix(mdata, par["input_modality"], par["input_layer_counts"]),
"normalized": get_matrix(mdata, par["input_modality"], par["input_layer_normalized"])
},
obs=mdata.mod[par["input_modality"]].obs[[]],
var=var,
uns={
"dataset_id": "dummy",
"normalization_id": "dummy"
}
)

print("Writing h5ad file", flush=True)
adata.write_h5ad(par["output"], compression="gzip")