openpipelines-bio · rcannood · Sep 20, 2024 · Sep 20, 2024 · Sep 21, 2024
diff --git a/_viash.yaml b/_viash.yaml
@@ -27,3 +27,9 @@ config_mods: |
   .resources += {path: '/src/workflows/utils/labels.config', dest: 'nextflow_labels.config'}
   .runners[.type == 'nextflow'].directives.tag := '$id'
   .runners[.type == 'nextflow'].config.script := 'includeConfig("nextflow_labels.config")'
+
+repositories:
+  - type: github
+    name: openproblems_task_dimensionality_reduction
+    repo: openproblems-bio/task_dimensionality_reduction
+    tag: build/main
diff --git a/src/dimred/openproblems_dr/config.vsh.yaml b/src/dimred/openproblems_dr/config.vsh.yaml
@@ -0,0 +1,95 @@
+name: "openproblems_dr"
+namespace: "dimred"
+argument_groups:
+  - name: "Inputs"
+    arguments:
+      - name: "--input"
+        required: true
+        type: file
+        description: Path to the sample.
+        example: dataset.h5mu
+
+      - name: "--input_modality"
+        type: string
+        default: "rna"
+        required: false
+
+      - name: "--input_layer_counts"
+        type: string
+        description: In which layer to find the raw counts. If not provided, the raw counts are assumed to be in the .X object.
+        required: false
+
+      - name: "--input_layer_normalized"
+        type: string
+        description: In which layer to find the log normalized counts. If not provided, the raw counts are assumed to be in the .X object.
+        required: false
+        default: "log_normalized"
+
+      - name: "--input_var_hvg_score"
+        type: string
+        description: In which obsm to find the HVG scores. If not provided, the HVG scores are not passed.
+        required: false
+
+  - name: Method settings
+    arguments:
+      - name: "--method_id"
+        description: ID of the method to use.
+        type: string
+        required: true
+        choices: [
+          densmap,
+          diffusion_map,
+          ivis,
+          lmds,
+          neuralee,
+          pca,
+          phate,
+          pymde,
+          simlr,
+          tsne,
+          umap
+        ]
+  - name: "Outputs"
+    arguments:
+      - name: "--output"
+        type: file
+        required: true
+        direction: output
+        description: Destination path to the output.
+        example: output.h5mu
+
+      - name: "--output_obsm_key"
+        description: Key in the .obsm object to use as input. If not provided, "X_{method_id}" is used.
+        type: string
+        required: false
+dependencies:
+  - name: dimred/openproblems_dr_h5mu_to_h5ad
+  - name: dimred/openproblems_dr_h5ad_to_h5mu
+  - name: methods/densmap
+    repository: openproblems_task_dimensionality_reduction
+  - name: methods/diffusion_map
+    repository: openproblems_task_dimensionality_reduction
+  - name: methods/ivis
+    repository: openproblems_task_dimensionality_reduction
+  - name: methods/lmds
+    repository: openproblems_task_dimensionality_reduction
+  - name: methods/neuralee
+    repository: openproblems_task_dimensionality_reduction
+  - name: methods/pca
+    repository: openproblems_task_dimensionality_reduction
+  - name: methods/phate
+    repository: openproblems_task_dimensionality_reduction
+  - name: methods/pymde
+    repository: openproblems_task_dimensionality_reduction
+  - name: methods/simlr
+    repository: openproblems_task_dimensionality_reduction
+  - name: methods/tsne
+    repository: openproblems_task_dimensionality_reduction
+  - name: methods/umap
+    repository: openproblems_task_dimensionality_reduction
+resources:
+  - type: nextflow_script
+    path: main.nf
+    entrypoint: run_wf
+runners:
+  - type: nextflow
diff --git a/src/dimred/openproblems_dr/main.nf b/src/dimred/openproblems_dr/main.nf
@@ -0,0 +1,70 @@
+methods = [
+  densmap,
+  diffusion_map,
+  ivis,
+  lmds,
+  neuralee,
+  pca,
+  phate,
+  pymde,
+  simlr,
+  tsne,
+  umap
+]
+
+workflow run_wf {
+  take:
+  input_ch
+
+  main:
+  output_ch = input_ch
+
+    | openproblems_dr_h5mu_to_h5ad.run(
+      fromState: [
+        "input",
+        "input_modality",
+        "input_layer_counts",
+        "input_layer_normalized",
+        "input_var_hvg_score"
+      ],
+      toState: [
+        "method_input": "output"
+      ]
+    )
+
+    | runEach(
+      components: methods,
+      filter: { id, state, comp ->
+        state.method_id == comp.config.name
+      },
+      fromState: [
+        "input": "method_input"
+      ],
+      toState: [
+        "method_output": "output"
+      ]
+    )
+
+    | openproblems_dr_h5ad_to_h5mu.run(
+      fromState: { id, state ->
+        def output_obsm_key = state.output_obsm_key
+        if (!output_obsm_key) {
+          output_obsm_key = "X_" + state.method_id
+        }
+        [
+          "input_dataset": state.input,
+          "input_output": state.method_output,
+          "input_modality": state.input_modality,
+          "output_obsm_key": output_obsm_key
+        ]
+      },
+      toState: [
+        "output": "output"
+      ]
+    )
+
+    | setState(["output"])
+
+  emit:
+  output_ch
+}
diff --git a/src/dimred/openproblems_dr/test.sh b/src/dimred/openproblems_dr/test.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+# viash ns build --parallel -q openproblems --setup cb
+
+nextflow run . \
+  -main-script target/nextflow/dimred/openproblems_dr/main.nf \
+  -profile docker \
+  -resume \
+  --input resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu \
+  --method_id pymde \
+  --publish_dir output/foo
diff --git a/src/dimred/openproblems_dr_h5ad_to_h5mu/config.vsh.yaml b/src/dimred/openproblems_dr_h5ad_to_h5mu/config.vsh.yaml
@@ -0,0 +1,66 @@
+name: openproblems_dr_h5ad_to_h5mu
+namespace: "dimred"
+
+
+argument_groups:
+  - name: Inputs
+    arguments:
+      - name: "--input_dataset"
+        type: file
+        description: Input h5mu file
+        direction: input
+        required: true
+        example: input.h5mu
+
+      - name: "--input_output"
+        type: file
+        description: Method output h5ad file
+        direction: input
+        required: true
+        example: input.h5ad
+
+      - name: "--input_modality"
+        type: string
+        default: "rna"
+        required: false
+
+  - name: Outputs
+    arguments:
+      - name: "--output"
+        alternatives: ["-o"]
+        type: file
+        description: Output h5mu file.
+        direction: output
+        required: true
+        example: output.h5mu
+
+      - name: "--output_obsm_key"
+        description: Key in the .obsm object to use as input.
+        type: string
+        required: false
+
+resources:
+  - type: python_script
+    path: script.py
+
+# test_resources:
+#   - type: python_script
+#     path: test.py
+#   - path: /resources_test/pbmc_1k_protein_v3
+
+engines:
+  - type: docker
+    image: python:3.12-slim
+    setup:
+      - type: apt
+        packages:
+          - procps
+      - type: python
+        __merge__: [/src/base/requirements/anndata_mudata.yaml, .]
+    __merge__: [/src/base/requirements/python_test_setup.yaml, .]
+
+runners:
+  - type: executable
+  - type: nextflow
+    directives:
+      label: [highcpu, midmem]
diff --git a/src/dimred/openproblems_dr_h5ad_to_h5mu/script.py b/src/dimred/openproblems_dr_h5ad_to_h5mu/script.py
@@ -0,0 +1,22 @@
+import mudata as mu
+import anndata as ad
+
+## VIASH START
+par = {
+  "input_dataset": "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu",
+  "input_output": "work/90/56062df29c88150755a174b63bdb82/_viash_par/input_output_1/run.pymde.output.h5ad",
+  "input_modality": "rna",
+  "output_obsm_key": "X_dr",
+  "output": "output.h5mu"
+}
+## VIASH END
+
+print("Reading h5mu file", flush=True)
+mdata = mu.read_h5mu(par["input_dataset"])
+adata = ad.read_h5ad(par["input_output"])
+
+adata_dest = mdata.mod[par["input_modality"]]
+adata_dest.obsm[par["output_obsm_key"]] = adata.obsm["X_emb"]
+
+print("Writing h5ad file", flush=True)
+mdata.write_h5mu(par["output"])
diff --git a/src/dimred/openproblems_dr_h5mu_to_h5ad/config.vsh.yaml b/src/dimred/openproblems_dr_h5mu_to_h5ad/config.vsh.yaml
@@ -0,0 +1,69 @@
+name: openproblems_dr_h5mu_to_h5ad
+namespace: "dimred"
+
+argument_groups:
+  - name: Inputs
+    arguments:
+      - name: "--input"
+        type: file
+        description: Input h5mu file
+        direction: input
+        required: true
+        example: input.h5mu
+
+      - name: "--input_modality"
+        type: string
+        default: "rna"
+        required: false
+
+      - name: "--input_layer_counts"
+        type: string
+        description: In which layer to find the raw counts. If not provided, the raw counts are assumed to be in the .X object.
+        required: false
+
+      - name: "--input_layer_normalized"
+        type: string
+        description: In which layer to find the log normalized counts. If not provided, the raw counts are assumed to be in the .X object.
+        required: false
+        default: "log_normalized"
+
+      - name: "--input_var_hvg_score"
+        type: string
+        description: In which obsm to find the HVG scores. If not provided, the HVG scores are not passed.
+        required: false
+
+  - name: Outputs
+    arguments:
+      - name: "--output"
+        alternatives: ["-o"]
+        type: file
+        description: Output h5mu file.
+        direction: output
+        required: true
+        example: output.h5mu
+
+resources:
+  - type: python_script
+    path: script.py
+
+# test_resources:
+#   - type: python_script
+#     path: test.py
+#   - path: /resources_test/pbmc_1k_protein_v3
+
+engines:
+  - type: docker
+    image: python:3.12-slim
+    setup:
+      - type: apt
+        packages:
+          - procps
+      - type: python
+        __merge__: [/src/base/requirements/anndata_mudata.yaml, .]
+    __merge__: [/src/base/requirements/python_test_setup.yaml, .]
+
+runners:
+  - type: executable
+  - type: nextflow
+    directives:
+      label: [highcpu, midmem]
diff --git a/src/dimred/openproblems_dr_h5mu_to_h5ad/script.py b/src/dimred/openproblems_dr_h5mu_to_h5ad/script.py
@@ -0,0 +1,46 @@
+import mudata as mu
+import anndata as ad
+
+## VIASH START
+par = {
+  "input": "resources_test/pbmc_1k_protein_v3/pbmc_1k_protein_v3_mms.h5mu",
+  "input_modality": "rna",
+  "input_layer_counts": "log_normalized",
+  "input_layer_normalized": "log_normalized",
+  "input_var_hvg_score": None,
+  "output": "output.h5mu"
+}
+## VIASH END
+
+print("Reading h5mu file", flush=True)
+mdata = mu.read_h5mu(par["input"])
+
+print("Transforming to anndata", flush=True)
+def get_matrix(mdata, modality, layer):
+  if layer is None:
+    return mdata.mod[modality].X
+  return mdata.mod[modality].layers[layer]
+
+# create var
+var = mdata.mod[par["input_modality"]].var[[]]
+
+if par["input_var_hvg_score"] is not None:
+  assert par["input_var_hvg_score"] in var.columns, f"Variable {par['input_var_hvg_score']} not found in var"
+  var["hvg_score"] = mdata.mod[par["input_modality"]].var[par["input_var_hvg_score"]]
+
+# create anndata
+adata = ad.AnnData(
+  layers={
+    "counts": get_matrix(mdata, par["input_modality"], par["input_layer_counts"]),
+    "normalized": get_matrix(mdata, par["input_modality"], par["input_layer_normalized"])
+  },
+  obs=mdata.mod[par["input_modality"]].obs[[]],
+  var=var,
+  uns={
+    "dataset_id": "dummy",
+    "normalization_id": "dummy"
+  }
+)
+
+print("Writing h5ad file", flush=True)
+adata.write_h5ad(par["output"], compression="gzip")