grn specific workflow

openproblems-bio · Aug 12, 2024 · 725a476 · 725a476
1 parent ee9f1ce
commit 725a476
Show file tree

Hide file tree

Showing 8 changed files with 373 additions and 16 deletions.
diff --git a/params/subsample_200_ridge.yaml b/params/subsample_200_ridge.yaml
@@ -1,22 +1,321 @@
 param_list:
   - id: pearson_celloracle
-    perturbation_data: resources/grn-benchmark/perturbation_data.h5ad
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
     layer: pearson
-    prediction: resources/grn_models/celloracle.csv 
+    prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv 
     reg_type: ridge
     method_id: celloracle
     subsample: 200
     max_workers: 20
 
-  - id: lognorm_positive_control
-    perturbation_data: resources/grn-benchmark/perturbation_data.h5ad
+  - id: lognorm_celloracle
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: lognorm
+    prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv 
+    reg_type: ridge
+    method_id: celloracle
+    subsample: 200
+    max_workers: 20
+
+  - id: scgen_pearson_celloracle
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: scgen_pearson
+    prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv 
+    reg_type: ridge
+    method_id: celloracle
+    subsample: 200
+    max_workers: 20
+
+  - id: scgen_lognorm_celloracle
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: scgen_lognorm
+    prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv 
+    reg_type: ridge
+    method_id: celloracle
+    subsample: 200
+    max_workers: 20
+
+  - id: seurat_pearson_celloracle
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: seurat_pearson
+    prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv 
+    reg_type: ridge
+    method_id: celloracle
+    subsample: 200
+    max_workers: 20
+
+  - id: seurat_lognorm_celloracle
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: seurat_lognorm
+    prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv 
+    reg_type: ridge
+    method_id: celloracle
+    subsample: 200
+    max_workers: 20
+
+  - id: pearson_scenicplus
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: pearson
+    prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv 
+    reg_type: ridge
+    method_id: scenicplus
+    subsample: 200
+    max_workers: 20
+
+  - id: lognorm_scenicplus
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: lognorm
+    prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv 
+    reg_type: ridge
+    method_id: scenicplus
+    subsample: 200
+    max_workers: 20
+
+  - id: scgen_pearson_scenicplus
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: scgen_pearson
+    prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv 
+    reg_type: ridge
+    method_id: scenicplus
+    subsample: 200
+    max_workers: 20
+
+  - id: scgen_lognorm_scenicplus
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: scgen_lognorm
+    prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv 
+    reg_type: ridge
+    method_id: scenicplus
+    subsample: 200
+    max_workers: 20
+
+  - id: seurat_pearson_scenicplus
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: seurat_pearson
+    prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv 
+    reg_type: ridge
+    method_id: scenicplus
+    subsample: 200
+    max_workers: 20
+
+  - id: seurat_lognorm_scenicplus
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: seurat_lognorm
+    prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv 
+    reg_type: ridge
+    method_id: scenicplus
+    subsample: 200
+    max_workers: 20
+
+  - id: pearson_figr
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: pearson
+    prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv 
+    reg_type: ridge
+    method_id: figr
+    subsample: 200
+    max_workers: 20
+
+  - id: lognorm_figr
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: lognorm
+    prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv 
+    reg_type: ridge
+    method_id: figr
+    subsample: 200
+    max_workers: 20
+
+  - id: scgen_pearson_figr
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: scgen_pearson
+    prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv 
+    reg_type: ridge
+    method_id: figr
+    subsample: 200
+    max_workers: 20
+
+  - id: scgen_lognorm_figr
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: scgen_lognorm
+    prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv 
+    reg_type: ridge
+    method_id: figr
+    subsample: 200
+    max_workers: 20
+
+  - id: seurat_pearson_figr
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: seurat_pearson
+    prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv 
+    reg_type: ridge
+    method_id: figr
+    subsample: 200
+    max_workers: 20
+
+  - id: seurat_lognorm_figr
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: seurat_lognorm
+    prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv 
+    reg_type: ridge
+    method_id: figr
+    subsample: 200
+    max_workers: 20
+
+  - id: pearson_granie
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: pearson
+    prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv 
+    reg_type: ridge
+    method_id: granie
+    subsample: 200
+    max_workers: 20
+
+  - id: lognorm_granie
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: lognorm
+    prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv 
+    reg_type: ridge
+    method_id: granie
+    subsample: 200
+    max_workers: 20
+
+  - id: scgen_pearson_granie
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: scgen_pearson
+    prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv 
+    reg_type: ridge
+    method_id: granie
+    subsample: 200
+    max_workers: 20
+
+  - id: scgen_lognorm_granie
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: scgen_lognorm
+    prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv 
+    reg_type: ridge
+    method_id: granie
+    subsample: 200
+    max_workers: 20
+
+  - id: seurat_pearson_granie
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: seurat_pearson
+    prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv 
+    reg_type: ridge
+    method_id: granie
+    subsample: 200
+    max_workers: 20
+
+  - id: seurat_lognorm_granie
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: seurat_lognorm
+    prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv 
+    reg_type: ridge
+    method_id: granie
+    subsample: 200
+    max_workers: 20
+
+  - id: pearson_scglue
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: pearson
+    prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv 
+    reg_type: ridge
+    method_id: scglue
+    subsample: 200
+    max_workers: 20
+
+  - id: lognorm_scglue
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: lognorm
+    prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv 
+    reg_type: ridge
+    method_id: scglue
+    subsample: 200
+    max_workers: 20
+
+  - id: scgen_pearson_scglue
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: scgen_pearson
+    prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv 
+    reg_type: ridge
+    method_id: scglue
+    subsample: 200
+    max_workers: 20
+
+  - id: scgen_lognorm_scglue
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: scgen_lognorm
+    prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv 
+    reg_type: ridge
+    method_id: scglue
+    subsample: 200
+    max_workers: 20
+
+  - id: seurat_pearson_scglue
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: seurat_pearson
+    prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv 
+    reg_type: ridge
+    method_id: scglue
+    subsample: 200
+    max_workers: 20
+
+  - id: seurat_lognorm_scglue
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: seurat_lognorm
+    prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv 
+    reg_type: ridge
+    method_id: scglue
+    subsample: 200
+    max_workers: 20
+
+  - id: pearson_positive_control
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
     layer: pearson
     reg_type: ridge
     method_id: positive_control
-    tf_all: resources/prior/tf_all.csv
     subsample: 200
     max_workers: 20
 
+  - id: lognorm_positive_control
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: lognorm
+    reg_type: ridge
+    method_id: positive_control
+    subsample: 200
+    max_workers: 20
+
+  - id: scgen_pearson_positive_control
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: scgen_pearson
+    reg_type: ridge
+    method_id: positive_control
+    subsample: 200
+    max_workers: 20
+
+  - id: scgen_lognorm_positive_control
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: scgen_lognorm
+    reg_type: ridge
+    method_id: positive_control
+    subsample: 200
+    max_workers: 20
+
+  - id: seurat_pearson_positive_control
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: seurat_pearson
+    reg_type: ridge
+    method_id: positive_control
+    subsample: 200
+    max_workers: 20
+
+  - id: seurat_lognorm_positive_control
+    perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad
+    layer: seurat_lognorm
+    reg_type: ridge
+    method_id: positive_control
+    subsample: 200
+    max_workers: 20
 
 output_state: "state.yaml"
 publish_dir: "s3://openproblems-data/resources/grn/results/subsample_200_ridge"
diff --git a/scripts/run_grn_evaluation_tw.sh b/scripts/run_grn_evaluation_tw.sh
@@ -66,7 +66,8 @@ grn_name="positive_control"
 for layer in "${layers[@]}"; do
   cat >> $param_file << HERE
   - id: ${layer}_${grn_name}
-    perturbation_data: ${perturbation_data}
+    perturbation_data: ${resources_dir}/grn-benchmark/perturbation_data.h5ad
+    tf_all: ${resources_dir}/prior/tf_all.csv
     layer: ${layer}
     reg_type: $reg_type
     method_id: $grn_name

diff --git a/scripts/run_grn_inference.sh b/scripts/run_grn_inference.sh
@@ -28,7 +28,7 @@ HERE
 
 if [ "$submit" = true ]; then
   nextflow run . \
-    -main-script  target/nextflow/workflows/run_grn_inference/main.nf \
+    -main-script  target/nextflow/workflows/grn_inference_celloracle/main.nf \
     -profile docker \
     -with-trace \
     -c src/common/nextflow_helpers/labels_ci.config \

diff --git a/src/methods/multi_omics/celloracle/config.vsh.yaml b/src/methods/multi_omics/celloracle/config.vsh.yaml
@@ -8,14 +8,20 @@ functionality:
     summary: "GRN inference using celloracle"
     description: |
       GRN inference using celloracle. 
-    documentation_url: https://morris-lab.github.io/CellOracle.documentation/    
-
+    documentation_url: https://morris-lab.github.io/CellOracle.documentation/   
+  arguments:
+    - name: --base_grn
+      type: file
+      direction: output
+      default: output/celloracle/base_grn.csv
+    - name: --links
+      type: file
+      direction: output
+      default: output/celloracle/links.celloracle.links
   resources:
     - type: python_script
       path: script.py
     - path: main.py
-
-
 platforms:
   - type: docker
     image: kenjikamimoto126/celloracle_ubuntu:0.18.0