diff --git a/_viash.yaml b/_viash.yaml index 6706c0fe93e..d43500ec99e 100644 --- a/_viash.yaml +++ b/_viash.yaml @@ -27,3 +27,9 @@ config_mods: | .resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'} .runners[.type == 'nextflow'].directives.tag := '$id' .runners[.type == 'nextflow'].config.script := 'includeConfig("nextflow_labels.config")' + +repositories: + - type: github + name: openproblems_task_dimensionality_reduction + repo: openproblems-bio/task_dimensionality_reduction + tag: build/main diff --git a/src/dimred/openproblems_dr/config.vsh.yaml b/src/dimred/openproblems_dr/config.vsh.yaml new file mode 100644 index 00000000000..ae6c88c62f2 --- /dev/null +++ b/src/dimred/openproblems_dr/config.vsh.yaml @@ -0,0 +1,95 @@ +name: "openproblems_dr" +namespace: "dimred" +argument_groups: + - name: "Inputs" + arguments: + - name: "--input" + required: true + type: file + description: Path to the sample. + example: dataset.h5mu + + - name: "--input_modality" + type: string + default: "rna" + required: false + + - name: "--input_layer_counts" + type: string + description: In which layer to find the raw counts. If not provided, the raw counts are assumed to be in the .X object. + required: false + + - name: "--input_layer_normalized" + type: string + description: In which layer to find the log normalized counts. If not provided, the raw counts are assumed to be in the .X object. + required: false + default: "log_normalized" + + - name: "--input_var_hvg_score" + type: string + description: In which obsm to find the HVG scores. If not provided, the HVG scores are not passed. + required: false + + - name: Method settings + arguments: + - name: "--method_id" + description: ID of the method to use. + type: string + required: true + choices: [ + densmap, + diffusion_map, + ivis, + lmds, + neuralee, + pca, + phate, + pymde, + simlr, + tsne, + umap + ] + - name: "Outputs" + arguments: + - name: "--output" + type: file + required: true + direction: output + description: Destination path to the output. + example: output.h5mu + + - name: "--output_obsm_key" + description: Key in the .obsm object to use as input. If not provided, "X_{method_id}" is used. + type: string + required: false +dependencies: + - name: dimred/openproblems_dr_h5mu_to_h5ad + - name: dimred/openproblems_dr_h5ad_to_h5mu + - name: methods/densmap + repository: openproblems_task_dimensionality_reduction + - name: methods/diffusion_map + repository: openproblems_task_dimensionality_reduction + - name: methods/ivis + repository: openproblems_task_dimensionality_reduction + - name: methods/lmds + repository: openproblems_task_dimensionality_reduction + - name: methods/neuralee + repository: openproblems_task_dimensionality_reduction + - name: methods/pca + repository: openproblems_task_dimensionality_reduction + - name: methods/phate + repository: openproblems_task_dimensionality_reduction + - name: methods/pymde + repository: openproblems_task_dimensionality_reduction + - name: methods/simlr + repository: openproblems_task_dimensionality_reduction + - name: methods/tsne + repository: openproblems_task_dimensionality_reduction + - name: methods/umap + repository: openproblems_task_dimensionality_reduction +resources: + - type: nextflow_script + path: main.nf + entrypoint: run_wf +runners: + - type: nextflow \ No newline at end of file diff --git a/src/dimred/openproblems_dr/main.nf b/src/dimred/openproblems_dr/main.nf new file mode 100644 index 00000000000..d7ae75f3fcf --- /dev/null +++ b/src/dimred/openproblems_dr/main.nf @@ -0,0 +1,70 @@ +methods = [ + densmap, + diffusion_map, + ivis, + lmds, + neuralee, + pca, + phate, + pymde, + simlr, + tsne, + umap +] + +workflow run_wf { + take: + input_ch + + main: + output_ch = input_ch + + | openproblems_dr_h5mu_to_h5ad.run( + fromState: [ + "input", + "input_modality", + "input_layer_counts", + "input_layer_normalized", + "input_var_hvg_score" + ], + toState: [ + "method_input": "output" + ] + ) + + | runEach( + components: methods, + filter: { id, state, comp -> + state.method_id == comp.config.name + }, + fromState: [ + "input": "method_input" + ], + toState: [ + "method_output": "output" + ] + ) + + | openproblems_dr_h5ad_to_h5mu.run( + fromState: { id, state -> + def output_obsm_key = state.output_obsm_key + if (!output_obsm_key) { + output_obsm_key = "X_" + state.method_id + } + [ + "input_dataset": state.input, + "input_output": state.method_output, + "input_modality": state.input_modality, + "output_obsm_key": output_obsm_key + ] + }, + toState: [ + "output": "output" + ] + ) + + | setState(["output"]) + + emit: + output_ch +} diff --git a/src/dimred/openproblems_dr/test.sh b/src/dimred/openproblems_dr/test.sh new file mode 100755 index 00000000000..73c3daaed14 --- /dev/null +++ b/src/dimred/openproblems_dr/test.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +# viash ns build --parallel -q openproblems --setup cb + +nextflow run . \ + -main-script target/nextflow/dimred/openproblems_dr/main.nf \ + -profile docker \ + -resume \ + --input resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu \ + --method_id pymde \ + --publish_dir output/foo diff --git a/src/dimred/openproblems_dr_h5ad_to_h5mu/config.vsh.yaml b/src/dimred/openproblems_dr_h5ad_to_h5mu/config.vsh.yaml new file mode 100644 index 00000000000..230fba4f63a --- /dev/null +++ b/src/dimred/openproblems_dr_h5ad_to_h5mu/config.vsh.yaml @@ -0,0 +1,66 @@ +name: openproblems_dr_h5ad_to_h5mu +namespace: "dimred" + + +argument_groups: + - name: Inputs + arguments: + - name: "--input_dataset" + type: file + description: Input h5mu file + direction: input + required: true + example: input.h5mu + + - name: "--input_output" + type: file + description: Method output h5ad file + direction: input + required: true + example: input.h5ad + + - name: "--input_modality" + type: string + default: "rna" + required: false + + - name: Outputs + arguments: + - name: "--output" + alternatives: ["-o"] + type: file + description: Output h5mu file. + direction: output + required: true + example: output.h5mu + + - name: "--output_obsm_key" + description: Key in the .obsm object to use as input. + type: string + required: false + +resources: + - type: python_script + path: script.py + +# test_resources: +# - type: python_script +# path: test.py +# - path: /resources_test/pbmc_1k_protein_v3 + +engines: + - type: docker + image: python:3.12-slim + setup: + - type: apt + packages: + - procps + - type: python + __merge__: [/src/base/requirements/anndata_mudata.yaml, .] + __merge__: [/src/base/requirements/python_test_setup.yaml, .] + +runners: + - type: executable + - type: nextflow + directives: + label: [highcpu, midmem] diff --git a/src/dimred/openproblems_dr_h5ad_to_h5mu/script.py b/src/dimred/openproblems_dr_h5ad_to_h5mu/script.py new file mode 100644 index 00000000000..1788a923018 --- /dev/null +++ b/src/dimred/openproblems_dr_h5ad_to_h5mu/script.py @@ -0,0 +1,22 @@ +import mudata as mu +import anndata as ad + +## VIASH START +par = { + "input_dataset": "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu", + "input_output": "work/90/56062df29c88150755a174b63bdb82/_viash_par/input_output_1/run.pymde.output.h5ad", + "input_modality": "rna", + "output_obsm_key": "X_dr", + "output": "output.h5mu" +} +## VIASH END + +print("Reading h5mu file", flush=True) +mdata = mu.read_h5mu(par["input_dataset"]) +adata = ad.read_h5ad(par["input_output"]) + +adata_dest = mdata.mod[par["input_modality"]] +adata_dest.obsm[par["output_obsm_key"]] = adata.obsm["X_emb"] + +print("Writing h5ad file", flush=True) +mdata.write_h5mu(par["output"]) diff --git a/src/dimred/openproblems_dr_h5mu_to_h5ad/config.vsh.yaml b/src/dimred/openproblems_dr_h5mu_to_h5ad/config.vsh.yaml new file mode 100644 index 00000000000..70c639b3d30 --- /dev/null +++ b/src/dimred/openproblems_dr_h5mu_to_h5ad/config.vsh.yaml @@ -0,0 +1,69 @@ +name: openproblems_dr_h5mu_to_h5ad +namespace: "dimred" + +argument_groups: + - name: Inputs + arguments: + - name: "--input" + type: file + description: Input h5mu file + direction: input + required: true + example: input.h5mu + + - name: "--input_modality" + type: string + default: "rna" + required: false + + - name: "--input_layer_counts" + type: string + description: In which layer to find the raw counts. If not provided, the raw counts are assumed to be in the .X object. + required: false + + - name: "--input_layer_normalized" + type: string + description: In which layer to find the log normalized counts. If not provided, the raw counts are assumed to be in the .X object. + required: false + default: "log_normalized" + + - name: "--input_var_hvg_score" + type: string + description: In which obsm to find the HVG scores. If not provided, the HVG scores are not passed. + required: false + + - name: Outputs + arguments: + - name: "--output" + alternatives: ["-o"] + type: file + description: Output h5mu file. + direction: output + required: true + example: output.h5mu + +resources: + - type: python_script + path: script.py + +# test_resources: +# - type: python_script +# path: test.py +# - path: /resources_test/pbmc_1k_protein_v3 + +engines: + - type: docker + image: python:3.12-slim + setup: + - type: apt + packages: + - procps + - type: python + __merge__: [/src/base/requirements/anndata_mudata.yaml, .] + __merge__: [/src/base/requirements/python_test_setup.yaml, .] + +runners: + - type: executable + - type: nextflow + directives: + label: [highcpu, midmem] diff --git a/src/dimred/openproblems_dr_h5mu_to_h5ad/script.py b/src/dimred/openproblems_dr_h5mu_to_h5ad/script.py new file mode 100644 index 00000000000..687927c50c4 --- /dev/null +++ b/src/dimred/openproblems_dr_h5mu_to_h5ad/script.py @@ -0,0 +1,46 @@ +import mudata as mu +import anndata as ad + +## VIASH START +par = { + "input": "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu", + "input_modality": "rna", + "input_layer_counts": "log_normalized", + "input_layer_normalized": "log_normalized", + "input_var_hvg_score": None, + "output": "output.h5mu" +} +## VIASH END + +print("Reading h5mu file", flush=True) +mdata = mu.read_h5mu(par["input"]) + +print("Transforming to anndata", flush=True) +def get_matrix(mdata, modality, layer): + if layer is None: + return mdata.mod[modality].X + return mdata.mod[modality].layers[layer] + +# create var +var = mdata.mod[par["input_modality"]].var[[]] + +if par["input_var_hvg_score"] is not None: + assert par["input_var_hvg_score"] in var.columns, f"Variable {par['input_var_hvg_score']} not found in var" + var["hvg_score"] = mdata.mod[par["input_modality"]].var[par["input_var_hvg_score"]] + +# create anndata +adata = ad.AnnData( + layers={ + "counts": get_matrix(mdata, par["input_modality"], par["input_layer_counts"]), + "normalized": get_matrix(mdata, par["input_modality"], par["input_layer_normalized"]) + }, + obs=mdata.mod[par["input_modality"]].obs[[]], + var=var, + uns={ + "dataset_id": "dummy", + "normalization_id": "dummy" + } +) + +print("Writing h5ad file", flush=True) +adata.write_h5ad(par["output"], compression="gzip")