diff --git a/_viash.yaml b/_viash.yaml index 02a348ca5..485711e53 100644 --- a/_viash.yaml +++ b/_viash.yaml @@ -11,5 +11,5 @@ config_mods: | .platforms[.type == 'docker'].target_image_source := 'https://github.com/openproblems-bio/task_grn_inference' .platforms[.type == "nextflow"].directives.tag := "$id" .platforms[.type == "nextflow"].auto.simplifyOutput := false - .platforms[.type == "nextflow"].config.labels := { lowmem : "memory = 20.Gb", midmem : "memory = 50.Gb", highmem : "memory = 100.Gb", lowcpu : "cpus = 5", midcpu : "cpus = 15", highcpu : "cpus = 30", lowtime : "time = 1.h", midtime : "time = 4.h", hightime : "time = 8.h", veryhightime : "time = 24.h", veryveryhightime : "time = 48.h", threedaystime : "time = 72.h", oneweektime : "time = 168.h" } + .platforms[.type == "nextflow"].config.labels := { lowmem : "memory = 20.Gb", midmem : "memory = 50.Gb", highmem : "memory = 100.Gb", lowcpu : "cpus = 5", midcpu : "cpus = 15", highcpu : "cpus = 30", lowtime : "time = 1.h", midtime : "time = 4.h", hightime : "time = 8.h", onedaytime : "time = 24.h", threedaystime : "time = 72.h", oneweektime : "time = 168.h" } .platforms[.type == "nextflow"].config.script := "process.errorStrategy = 'ignore'" \ No newline at end of file diff --git a/runs.ipynb b/runs.ipynb index 89fe475de..45b3dd9e2 100644 --- a/runs.ipynb +++ b/runs.ipynb @@ -2,12 +2,13 @@ "cells": [ { "cell_type": "code", - "execution_count": 19, + "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "import yaml\n", "import pandas as pd \n", + "import matplotlib.pyplot as plt\n", "controls = ['negative_control','positive_control']\n", "grn_models = ['collectri','granie', 'figr', 'celloracle', 'scglue', 'scenicplus']" ] @@ -16,123 +17,70 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Robustness analysis" + "## GRN evaluation for multiomics methods" ] }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "temporaryFolder: /tmp/viash_hub_repo12671172282063304508 uri: https://github.com/openproblems-bio/openproblems-v2.git\n", - "Cloning into '.'...\n", - "checkout out: List(git, checkout, origin/main_build, --, .) 0 \n", - "temporaryFolder: /tmp/viash_hub_repo14261181966255345651 uri: https://github.com/openproblems-bio/openproblems-v2.git\n", + "temporaryFolder: /tmp/viash_hub_repo8645851257998049428 uri: https://github.com/openproblems-bio/openproblems-v2.git\n", "Cloning into '.'...\n", "checkout out: List(git, checkout, origin/main_build, --, .) 0 \n", - "\u001b[37mExporting format_resources_r (multiomics) =docker=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/docker/multiomics/format_resources_r\u001b[0m\n", - "\u001b[37mExporting batch_correction_seurat (perturbation) =native=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/native/perturbation/batch_correction_seurat\u001b[0m\n", - "\u001b[37mExporting sc_counts (perturbation) =nextflow=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/nextflow/perturbation/sc_counts\u001b[0m\n", - "\u001b[37mExporting regression_2 (metrics) =nextflow=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/nextflow/metrics/regression_2\u001b[0m\n", - "\u001b[37mExporting noise_grn (robustness_analysis) =nextflow=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/nextflow/robustness_analysis/noise_grn\u001b[0m\n", - "\u001b[37mExporting normalization (perturbation) =nextflow=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/nextflow/perturbation/normalization\u001b[0m\n", - "\u001b[37mExporting create_test_resources (workflows) =nextflow=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/nextflow/workflows/create_test_resources\u001b[0m\n", - "\u001b[37mExporting sc_counts (perturbation) =native=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/native/perturbation/sc_counts\u001b[0m\n", - "\u001b[37mExporting create_test_data (testdata) =native=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/native/testdata/create_test_data\u001b[0m\n", - "\u001b[37mExporting create_component (common) =docker=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/docker/common/create_component\u001b[0m\n", "\u001b[37mExporting run_grn_evaluation (workflows) =nextflow=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/nextflow/workflows/run_grn_evaluation\u001b[0m\n", - "\u001b[37mExporting run_robustness_analysis (workflows) =nextflow=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/nextflow/workflows/run_robustness_analysis\u001b[0m\n", - "\u001b[37mExporting process_perturbation (workflows) =nextflow=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/nextflow/workflows/process_perturbation\u001b[0m\n", - "\u001b[37mExporting noise_grn (robustness_analysis) =docker=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/docker/robustness_analysis/noise_grn\u001b[0m\n", - "\u001b[37mExporting format_data (multiomics) =native=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/native/multiomics/format_data\u001b[0m\n", - "\u001b[37mExporting multiome_matrix (multiomics) =native=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/native/multiomics/multiome_matrix\u001b[0m\n", - "\u001b[37mExporting create_test_data (testdata) =docker=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/docker/testdata/create_test_data\u001b[0m\n", - "\u001b[37mExporting multiome_matrix (multiomics) =nextflow=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/nextflow/multiomics/multiome_matrix\u001b[0m\n", - "\u001b[37mExporting batch_correction_seurat (perturbation) =nextflow=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/nextflow/perturbation/batch_correction_seurat\u001b[0m\n", - "\u001b[37mExporting format_data (multiomics) =nextflow=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/nextflow/multiomics/format_data\u001b[0m\n", - "\u001b[37mExporting create_test_data (testdata) =nextflow=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/nextflow/testdata/create_test_data\u001b[0m\n", - "\u001b[37m[notice] Building container 'ghcr.io/openproblems-bio/task_grn_inference/robustness_analysis/noise_grn:dev' with Dockerfile\u001b[0m\n", - "\u001b[37m[notice] Building container 'ghcr.io/openproblems-bio/task_grn_inference/multiomics/format_resources_r:dev' with Dockerfile\u001b[0m\n", - "\u001b[37mExporting sc_counts (perturbation) =docker=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/docker/perturbation/sc_counts\u001b[0m\n", - "\u001b[37mExporting process_multiomics (workflows) =nextflow=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/nextflow/workflows/process_multiomics\u001b[0m\n", - "\u001b[37m[notice] Building container 'ghcr.io/openproblems-bio/task_grn_inference/common/create_component:dev' with Dockerfile\u001b[0m\n", - "\u001b[37mExporting format_data (multiomics) =docker=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/docker/multiomics/format_data\u001b[0m\n", - "\u001b[37mExporting celloracle (grn_methods) =native=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/native/grn_methods/celloracle\u001b[0m\n", - "\u001b[37mExporting batch_correction_evaluation (perturbation) =nextflow=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/nextflow/perturbation/batch_correction_evaluation\u001b[0m\n", - "\u001b[37mExporting batch_correction_evaluation (perturbation) =docker=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/docker/perturbation/batch_correction_evaluation\u001b[0m\n", - "\u001b[37mExporting normalization (perturbation) =docker=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/docker/perturbation/normalization\u001b[0m\n", - "\u001b[37mExporting normalization (perturbation) =native=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/native/perturbation/normalization\u001b[0m\n", - "\u001b[37mExporting batch_correction_scgen (perturbation) =native=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/native/perturbation/batch_correction_scgen\u001b[0m\n", - "\u001b[37m[notice] Building container 'ghcr.io/openproblems-bio/task_grn_inference/testdata/create_test_data:dev' with Dockerfile\u001b[0m\n", - "\u001b[37m[notice] Building container 'ghcr.io/openproblems-bio/task_grn_inference/multiomics/format_data:dev' with Dockerfile\u001b[0m\n", - "\u001b[37m[notice] Building container 'ghcr.io/openproblems-bio/task_grn_inference/perturbation/sc_counts:dev' with Dockerfile\u001b[0m\n", - "\u001b[37mExporting batch_correction_evaluation (perturbation) =native=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/native/perturbation/batch_correction_evaluation\u001b[0m\n", - "\u001b[37mExporting celloracle (grn_methods) =nextflow=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/nextflow/grn_methods/celloracle\u001b[0m\n", - "\u001b[37m[notice] Building container 'ghcr.io/openproblems-bio/task_grn_inference/perturbation/normalization:dev' with Dockerfile\u001b[0m\n", - "\u001b[37m[notice] Building container 'ghcr.io/openproblems-bio/task_grn_inference/perturbation/batch_correction_evaluation:dev' with Dockerfile\u001b[0m\n", - "\u001b[37mExporting batch_correction_scgen (perturbation) =nextflow=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/nextflow/perturbation/batch_correction_scgen\u001b[0m\n", - "\u001b[37mExporting batch_correction_scgen (perturbation) =docker=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/docker/perturbation/batch_correction_scgen\u001b[0m\n", - "\u001b[37mExporting positive_control (control_methods) =docker=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/docker/control_methods/positive_control\u001b[0m\n", - "\u001b[37m[notice] Building container 'ghcr.io/openproblems-bio/task_grn_inference/perturbation/batch_correction_scgen:dev' with Dockerfile\u001b[0m\n", - "\u001b[37mExporting grn_inference_celloracle (workflows) =nextflow=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/nextflow/workflows/grn_inference_celloracle\u001b[0m\n", - "\u001b[37mExporting batch_correction_seurat (perturbation) =docker=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/docker/perturbation/batch_correction_seurat\u001b[0m\n", - "\u001b[37m[notice] Building container 'ghcr.io/openproblems-bio/task_grn_inference/control_methods/positive_control:dev' with Dockerfile\u001b[0m\n", - "\u001b[37m[notice] Building container 'ghcr.io/openproblems-bio/task_grn_inference/perturbation/batch_correction_seurat:dev' with Dockerfile\u001b[0m\n", - "\u001b[37mExporting granie (grn_methods) =docker=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/docker/grn_methods/granie\u001b[0m\n", - "\u001b[37m[notice] Building container 'ghcr.io/openproblems-bio/task_grn_inference/grn_methods/granie:dev' with Dockerfile\u001b[0m\n", - "\u001b[37mExporting explanatory_analysis =nextflow=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/nextflow/explanatory_analysis\u001b[0m\n", - "\u001b[37mExporting scenicplus (grn_methods) =nextflow=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/nextflow/grn_methods/scenicplus\u001b[0m\n", - "\u001b[37mExporting grn_inference_scenicplus (workflows) =nextflow=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/nextflow/workflows/grn_inference_scenicplus\u001b[0m\n", - "\u001b[37mExporting dummy (grn_methods) =docker=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/docker/grn_methods/dummy\u001b[0m\n", - "\u001b[37m[notice] Building container 'ghcr.io/openproblems-bio/task_grn_inference/grn_methods/dummy:dev' with Dockerfile\u001b[0m\n", - "\u001b[37mExporting scglue (grn_methods) =docker=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/docker/grn_methods/scglue\u001b[0m\n", - "\u001b[37mExporting create_component (common) =native=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/native/common/create_component\u001b[0m\n", - "\u001b[37m[notice] Building container 'ghcr.io/openproblems-bio/task_grn_inference/grn_methods/scglue:dev' with Dockerfile\u001b[0m\n", - "\u001b[37mExporting format_resources_r (multiomics) =native=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/native/multiomics/format_resources_r\u001b[0m\n", - "\u001b[37mExporting create_component (common) =nextflow=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/nextflow/common/create_component\u001b[0m\n", - "\u001b[37mExporting format_resources_r (multiomics) =nextflow=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/nextflow/multiomics/format_resources_r\u001b[0m\n", - "\u001b[37mExporting sync_test_resources (common) =docker=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/docker/common/sync_test_resources\u001b[0m\n", - "\u001b[37mExporting multiome_matrix (multiomics) =docker=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/docker/multiomics/multiome_matrix\u001b[0m\n", - "\u001b[37m[notice] Building container 'ghcr.io/openproblems-bio/task_grn_inference/common/sync_test_resources:dev' with Dockerfile\u001b[0m\n", - "\u001b[37m[notice] Building container 'ghcr.io/openproblems-bio/task_grn_inference/multiomics/multiome_matrix:dev' with Dockerfile\u001b[0m\n", - "\u001b[37mExporting dummy (grn_methods) =native=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/native/grn_methods/dummy\u001b[0m\n", - "\u001b[37mExporting dummy (grn_methods) =nextflow=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/nextflow/grn_methods/dummy\u001b[0m\n", - "\u001b[37mExporting celloracle (grn_methods) =docker=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/docker/grn_methods/celloracle\u001b[0m\n", - "\u001b[37m[notice] Building container 'ghcr.io/openproblems-bio/task_grn_inference/grn_methods/celloracle:dev' with Dockerfile\u001b[0m\n", - "\u001b[37mExporting granie (grn_methods) =native=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/native/grn_methods/granie\u001b[0m\n", - "\u001b[37mExporting scglue (grn_methods) =nextflow=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/nextflow/grn_methods/scglue\u001b[0m\n", - "\u001b[37mExporting sync_test_resources (common) =native=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/native/common/sync_test_resources\u001b[0m\n", - "\u001b[37mExporting sync_test_resources (common) =nextflow=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/nextflow/common/sync_test_resources\u001b[0m\n", - "\u001b[37mExporting granie (grn_methods) =nextflow=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/nextflow/grn_methods/granie\u001b[0m\n", - "\u001b[37mExporting grn_inference_scglue (workflows) =nextflow=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/nextflow/workflows/grn_inference_scglue\u001b[0m\n", - "\u001b[37mExporting negative_control (control_methods) =docker=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/docker/control_methods/negative_control\u001b[0m\n", - "\u001b[37mExporting grn_inference_granie (workflows) =nextflow=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/nextflow/workflows/grn_inference_granie\u001b[0m\n", - "\u001b[37mExporting regression_1 (metrics) =docker=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/docker/metrics/regression_1\u001b[0m\n", - "\u001b[37m[notice] Building container 'ghcr.io/openproblems-bio/task_grn_inference/control_methods/negative_control:dev' with Dockerfile\u001b[0m\n", - "\u001b[37mExporting scenicplus (grn_methods) =docker=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/docker/grn_methods/scenicplus\u001b[0m\n", - "\u001b[37m[notice] Building container 'ghcr.io/openproblems-bio/task_grn_inference/metrics/regression_1:dev' with Dockerfile\u001b[0m\n", - "\u001b[37m[notice] Building container 'ghcr.io/openproblems-bio/task_grn_inference/grn_methods/scenicplus:dev' with Dockerfile\u001b[0m\n", - "\u001b[37mExporting explanatory_analysis =docker=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/docker/explanatory_analysis\u001b[0m\n", - "\u001b[37m[notice] Building container 'ghcr.io/openproblems-bio/task_grn_inference/explanatory_analysis:dev' with Dockerfile\u001b[0m\n", - "\u001b[37mExporting positive_control (control_methods) =nextflow=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/nextflow/control_methods/positive_control\u001b[0m\n", - "\u001b[37mExporting explanatory_analysis =native=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/native/explanatory_analysis\u001b[0m\n", - "\u001b[37mExporting negative_control (control_methods) =nextflow=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/nextflow/control_methods/negative_control\u001b[0m\n", - "\u001b[37mExporting regression_1 (metrics) =nextflow=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/nextflow/metrics/regression_1\u001b[0m\n", + "\u001b[33mNot all configs built successfully\u001b[0m\n", + "\u001b[33m 39 configs were disabled\u001b[0m\n", + "\u001b[32m 1/1 configs built successfully\u001b[0m\n" + ] + } + ], + "source": [ + "!viash ns build --setup cb -q run_grn_evaluation" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "!bash scripts/run_grn_evaluation.sh \"ridge\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Robustness analysis" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ "\u001b[37mExporting regression_2 (metrics) =docker=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/docker/metrics/regression_2\u001b[0m\n", - "\u001b[37mExporting scenicplus (grn_methods) =native=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/native/grn_methods/scenicplus\u001b[0m\n", "\u001b[37m[notice] Building container 'ghcr.io/openproblems-bio/task_grn_inference/metrics/regression_2:dev' with Dockerfile\u001b[0m\n", - "\u001b[37mExporting scglue (grn_methods) =native=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/native/grn_methods/scglue\u001b[0m\n", - "\u001b[32mAll 70 configs built successfully\u001b[0m\n" + "\u001b[37mExporting regression_2 (metrics) =nextflow=> /mnt/c/Users/nourisa/Documents/testProjs/ongoing/task_grn_benchmark/target/nextflow/metrics/regression_2\u001b[0m\n", + "\u001b[33mNot all configs built successfully\u001b[0m\n", + "\u001b[33m 39 configs were disabled\u001b[0m\n", + "\u001b[32m 2/2 configs built successfully\u001b[0m\n" ] } ], "source": [ - "!viash ns build --setup cb --parallel\n", - "# !viash ns build --setup cb -q run_robustness_analysis" + "# !viash ns build --setup cb --parallel\n", + "!viash ns build --setup cb -q run_robustness_analysis\n", + "# !viash ns build --setup cb -q regression_2\n" ] }, { @@ -2400,7 +2348,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -2443,7 +2391,7 @@ " 0.33327\n", " \n", " \n", - " Negative control\n", + " Negative control compound\n", " -0.249304\n", " 0.200482\n", " 0.211147\n", @@ -2452,7 +2400,7 @@ " 0.358298\n", " \n", " \n", - " Positive control\n", + " Positive control compounds\n", " -0.242072\n", " 0.175922\n", " 0.179734\n", @@ -2465,13 +2413,18 @@ "" ], "text/plain": [ - " collectri granie figr celloracle scglue scenicplus\n", - "Default -0.224471 0.185105 0.186789 0.228927 0.234921 0.33327\n", - "Negative control -0.249304 0.200482 0.211147 0.234089 0.250881 0.358298\n", - "Positive control -0.242072 0.175922 0.179734 0.215799 0.218924 0.316465" + " collectri granie figr celloracle scglue \\\n", + "Default -0.224471 0.185105 0.186789 0.228927 0.234921 \n", + "Negative control compound -0.249304 0.200482 0.211147 0.234089 0.250881 \n", + "Positive control compounds -0.242072 0.175922 0.179734 0.215799 0.218924 \n", + "\n", + " scenicplus \n", + "Default 0.33327 \n", + "Negative control compound 0.358298 \n", + "Positive control compounds 0.316465 " ] }, - "execution_count": 39, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -2495,7 +2448,7 @@ " df_reg1.loc[int(subsample), method] = ex_true_tf_value\n", "\n", "df_reg1 = df_reg1.sort_index()\n", - "df_reg1.index = ['Default', 'Negative control', 'Positive control']\n", + "df_reg1.index = ['Default', 'Negative control compound', 'Positive control compounds']\n", "df_reg1" ] }, @@ -3366,13 +3319,14 @@ "import numpy as np\n", "import pandas as pd\n", "import anndata as ad\n", + "import scanpy as sc\n", "import sys\n", "import numpy as np\n", "from sklearn.preprocessing import StandardScaler\n", "from tqdm import tqdm\n", "\n", "par = {\n", - " \"perturbation_data\": \"resources/grn-benchmark/perturbation_data.h5ad\",\n", + " \"multiomics_rna\": \"resources/grn-benchmark/multiomics_rna.h5ad\",\n", " \"layer\": \"pearson\",\n", " \"tf_all\": \"resources/prior/tf_all.csv\"\n", "}\n", @@ -3384,13 +3338,24 @@ " grn = np.dot(X_sub.T, X_sub) / X_sub.shape[0]\n", " grns.append(grn)\n", " return np.mean(grns, axis=0)\n", - "perturbation_data = ad.read_h5ad(par[\"perturbation_data\"])\n", - "gene_names = perturbation_data.var_names.to_numpy()\n", + "multiomics_rna = ad.read_h5ad(par[\"multiomics_rna\"])\n", + "gene_names = multiomics_rna.var_names.to_numpy()\n", "tf_all = np.loadtxt(par['tf_all'], dtype=str)\n", - "groups = perturbation_data.obs.cell_type\n", + "groups = multiomics_rna.obs.cell_type\n", "tf_all = np.intersect1d(tf_all, gene_names)" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sc.pp.normalize_total(multiomics_rna)\n", + "sc.pp.log1p(multiomics_rna)\n", + "sc.pp.scale(multiomics_rna)" + ] + }, { "cell_type": "code", "execution_count": 49, @@ -3405,7 +3370,7 @@ } ], "source": [ - "net = create_positive_control(perturbation_data.layers[par[\"layer\"]], groups)\n", + "net = create_positive_control(multiomics_rna.X, groups)\n", "net = pd.DataFrame(net, index=gene_names, columns=gene_names)\n", "# subset for genes\n", "# n_genes = 10000\n", @@ -5370,6 +5335,48 @@ "cmd = f\"viash run src/metrics/regression_1/config.vsh.yaml -- --prediction output/causal/grns/corr_net_sub.csv --score output/causal/scores/corr_net_sub.h5ad\"\n", "subprocess.run(cmd, shell=True)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Sinlge omics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!bash scripts/run_benchmark_single_omics.sh" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "download: s3://openproblems-data/resources/grn/results/single_omics/state.yaml to resources/results/single_omics/state.yaml\n", + "download: s3://openproblems-data/resources/grn/results/single_omics/scores.yaml to resources/results/single_omics/scores.yaml\n", + "download: s3://openproblems-data/resources/grn/results/single_omics/trace.txt to resources/results/single_omics/trace.txt\n" + ] + } + ], + "source": [ + "!aws s3 sync s3://openproblems-data/resources/grn/results/single_omics ./resources/results/single_omics" + ] } ], "metadata": { diff --git a/scripts/repo/run_grn_evaluation.sh b/scripts/repo/run_grn_evaluation.sh new file mode 100644 index 000000000..a7de7358a --- /dev/null +++ b/scripts/repo/run_grn_evaluation.sh @@ -0,0 +1,51 @@ +#!/bin/bash + +# Default values +grn="" +sample="200" # Default value for sample +reg_type="ridge" +score="output/score.csv" + +# Parse arguments +while [[ "$#" -gt 0 ]]; do + case $1 in + --grn) grn="$2"; shift ;; + --sample) sample="$2"; shift ;; + --reg_type) reg_type="$2"; shift ;; + --score) score="$2"; shift ;; + *) echo "Unknown parameter passed: $1"; exit 1 ;; + esac + shift +done + +# Ensure required arguments are provided +if [ -z "$grn" ]; then + echo "Usage: $0 --grn [--sample ]" + exit 1 +fi + +# Print parsed arguments (for debugging purposes) +echo "GRN file: $grn" +echo "Sample value: $sample" +echo "Regression model: $reg_type" + +# Clean bin/ folder +rm -r bin +mkdir bin + +# Run regression analysis 1 +echo "Running GRN benchmark with $grn and sample size $sample" +echo "Regression 1" +mkdir -p bin/regression_1 +viash build src/metrics/regression_1/config.vsh.yaml -p docker -o bin/regression_1 +bin/regression_1/regression_1 --perturbation_data resources/grn-benchmark/perturbation_data.h5ad --reg_type $reg_type --prediction $grn --score $score + +# Run regression analysis 2 +echo "Regression 2" +if [ ! -f resources/grn-benchmark/consensus-num-regulators.json ]; then + viash build src/metrics/regression_2/consensus/config.vsh.yaml --platform docker -o bin/regression_2/consensus + bin/regression_2/consensus/consensus_for_regression_2 --perturbation_data resources/grn-benchmark/perturbation_data.h5ad --output resources/grn-benchmark/consensus-num-regulators.json --grn_folder resources/grn-benchmark/grn_models/ --grns ananse.csv,celloracle.csv,figr.csv,granie.csv,scenicplus.csv,scglue.csv +fi +mkdir -p bin/regression_2 +viash build src/metrics/regression_2/config.vsh.yaml -p docker -o bin/regression_2 +bin/regression_2/regression_2 --perturbation_data resources/grn-benchmark/perturbation_data.h5ad --consensus resources/grn-benchmark/consensus-num-regulators.json --layer scgen_pearson --reg_type $reg_type --prediction $grn --score $score diff --git a/scripts/run_grn_evaluation_tw.sh b/scripts/repo/run_grn_evaluation_all_layers.sh similarity index 92% rename from scripts/run_grn_evaluation_tw.sh rename to scripts/repo/run_grn_evaluation_all_layers.sh index 3a60cb3e3..b5e474f6c 100644 --- a/scripts/run_grn_evaluation_tw.sh +++ b/scripts/repo/run_grn_evaluation_all_layers.sh @@ -1,11 +1,13 @@ #!/bin/bash # RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)" -RUN_ID="pearson_gb_subsample_RF" +reg_type=${1} #GB, ridge + +RUN_ID="grn_evaluation_${reg_type}" resources_dir="s3://openproblems-data/resources/grn" publish_dir="s3://openproblems-data/resources/grn/results/${RUN_ID}" grn_models_folder="${resources_dir}/grn_models" -reg_type=RF + subsample=-2 max_workers=10 @@ -21,8 +23,7 @@ grn_names=( "scglue" ) -# layers=("pearson" "lognorm" "scgen_pearson" "scgen_lognorm" "seurat_pearson" "seurat_lognorm") -layers=( "pearson" ) +layers=("pearson" "lognorm" "scgen_pearson" "scgen_lognorm" "seurat_pearson" "seurat_lognorm") # Start writing to the YAML file cat > $param_file << HERE diff --git a/scripts/run_benchmark_single_omics.sh b/scripts/run_benchmark_single_omics.sh index 88f9ec408..0ab78cf33 100644 --- a/scripts/run_benchmark_single_omics.sh +++ b/scripts/run_benchmark_single_omics.sh @@ -2,11 +2,11 @@ # RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)" RUN_ID="single_omics" -# resources_dir="s3://openproblems-data/resources_test/grn" -# publish_dir="s3://openproblems-data/resources_test/grn/results/${RUN_ID}" +resources_dir="s3://openproblems-data/resources_test/grn" +publish_dir="s3://openproblems-data/resources_test/grn/results/${RUN_ID}" -resources_dir="./resources_test/" -publish_dir="output/${RUN_ID}" +# resources_dir="./resources_test/" +# publish_dir="output/${RUN_ID}" reg_type=ridge subsample=-2 diff --git a/scripts/run_grn_evaluation.sh b/scripts/run_grn_evaluation.sh index a7de7358a..0366d56d6 100644 --- a/scripts/run_grn_evaluation.sh +++ b/scripts/run_grn_evaluation.sh @@ -1,51 +1,95 @@ #!/bin/bash -# Default values -grn="" -sample="200" # Default value for sample -reg_type="ridge" -score="output/score.csv" - -# Parse arguments -while [[ "$#" -gt 0 ]]; do - case $1 in - --grn) grn="$2"; shift ;; - --sample) sample="$2"; shift ;; - --reg_type) reg_type="$2"; shift ;; - --score) score="$2"; shift ;; - *) echo "Unknown parameter passed: $1"; exit 1 ;; - esac - shift +# RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)" +reg_type=${1} #GB, ridge + +RUN_ID="grn_evaluation_${reg_type}" +# resources_dir="s3://openproblems-data/resources/grn" +resources_dir="./resources" +publish_dir="${resources_dir}/results/${RUN_ID}" +grn_models_folder="${resources_dir}/grn_models" + +subsample=-2 +max_workers=10 + +param_file="./params/${RUN_ID}.yaml" + +grn_names=( + "collectri" + "celloracle" + "scenicplus" + "figr" + "granie" + "scglue" +) +# Start writing to the YAML file +cat > $param_file << HERE +param_list: +HERE + +append_entry() { + cat >> $param_file << HERE + - id: ${reg_type}_${1}_${3} + perturbation_data: ${resources_dir}/grn-benchmark/perturbation_data.h5ad + reg_type: $reg_type + method_id: $1 + subsample: $subsample + max_workers: $max_workers + tf_all: ${resources_dir}/prior/tf_all.csv + layer: ${3} + consensus: ${resources_dir}/prior/consensus-num-regulators.json +HERE + + # Conditionally append the prediction line if the second argument is "true" + if [[ $2 == "true" ]]; then + cat >> $param_file << HERE + prediction: ${grn_models_folder}/$1.csv +HERE + fi +} +layers=(pearson scgen_pearson) +# Loop through grn_names and layers +for layer in "${layers[@]}"; do + for grn_name in "${grn_names[@]}"; do + append_entry "$grn_name" "true" "$layer" + done +done + +# # Append negative control +grn_name="negative_control" +for layer in "${layers[@]}"; do + append_entry "$grn_name" "false" "$layer" done -# Ensure required arguments are provided -if [ -z "$grn" ]; then - echo "Usage: $0 --grn [--sample ]" - exit 1 -fi - -# Print parsed arguments (for debugging purposes) -echo "GRN file: $grn" -echo "Sample value: $sample" -echo "Regression model: $reg_type" - -# Clean bin/ folder -rm -r bin -mkdir bin - -# Run regression analysis 1 -echo "Running GRN benchmark with $grn and sample size $sample" -echo "Regression 1" -mkdir -p bin/regression_1 -viash build src/metrics/regression_1/config.vsh.yaml -p docker -o bin/regression_1 -bin/regression_1/regression_1 --perturbation_data resources/grn-benchmark/perturbation_data.h5ad --reg_type $reg_type --prediction $grn --score $score - -# Run regression analysis 2 -echo "Regression 2" -if [ ! -f resources/grn-benchmark/consensus-num-regulators.json ]; then - viash build src/metrics/regression_2/consensus/config.vsh.yaml --platform docker -o bin/regression_2/consensus - bin/regression_2/consensus/consensus_for_regression_2 --perturbation_data resources/grn-benchmark/perturbation_data.h5ad --output resources/grn-benchmark/consensus-num-regulators.json --grn_folder resources/grn-benchmark/grn_models/ --grns ananse.csv,celloracle.csv,figr.csv,granie.csv,scenicplus.csv,scglue.csv -fi -mkdir -p bin/regression_2 -viash build src/metrics/regression_2/config.vsh.yaml -p docker -o bin/regression_2 -bin/regression_2/regression_2 --perturbation_data resources/grn-benchmark/perturbation_data.h5ad --consensus resources/grn-benchmark/consensus-num-regulators.json --layer scgen_pearson --reg_type $reg_type --prediction $grn --score $score + +# Append positive controls +grn_name="positive_control" +for layer in "${layers[@]}"; do + append_entry "$grn_name" "false" "$layer" +done + + +# Append the remaining output_state and publish_dir to the YAML file +cat >> $param_file << HERE +output_state: "state.yaml" +publish_dir: "$publish_dir" +HERE + +nextflow run . \ + -main-script target/nextflow/workflows/run_grn_evaluation/main.nf \ + -profile docker \ + -with-trace \ + -c src/common/nextflow_helpers/labels_ci.config \ + -params-file ${param_file} + +# ./tw-windows-x86_64.exe launch ` +# https://github.com/openproblems-bio/task_grn_benchmark.git ` +# --revision build/main ` +# --pull-latest ` +# --main-script target/nextflow/workflows/run_grn_evaluation/main.nf ` +# --workspace 53907369739130 ` +# --compute-env 6TeIFgV5OY4pJCk8I0bfOh ` +# --params-file ./params/scgen_pearson_gb_pcs.yaml ` +# --config src/common/nextflow_helpers/labels_tw.config + + diff --git a/scripts/run_robust_analys.sh b/scripts/run_robust_analys.sh index 6d95347a3..3ee04494b 100644 --- a/scripts/run_robust_analys.sh +++ b/scripts/run_robust_analys.sh @@ -3,7 +3,7 @@ # RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)" degrees=(0 10 20 50 100) -noise_type="$1" +noise_type="$1" #"net" echo $noise_type RUN_ID="robust_analy_$1" @@ -19,7 +19,6 @@ grn_models_folder="${resources_dir}/grn_models" reg_type=ridge subsample=-2 max_workers=10 -layer=pearson param_file="./params/${RUN_ID}.yaml" @@ -33,7 +32,6 @@ grn_names=( ) - # Start writing to the YAML file cat > $param_file << HERE param_list: @@ -41,11 +39,11 @@ HERE append_entry() { cat >> $param_file << HERE - - id: ${1}_${2} + - id: ${1}_${2}_${3} perturbation_data: ${resources_dir}/grn-benchmark/perturbation_data.h5ad - layer: ${layer} reg_type: $reg_type method_id: ${2}-${1} + layer: ${3} subsample: $subsample max_workers: $max_workers consensus: ${resources_dir}/prior/consensus-num-regulators.json @@ -55,12 +53,14 @@ append_entry() { HERE } # Loop through grn_names and layers -for degree in "${degrees[@]}"; do - for grn_name in "${grn_names[@]}"; do - append_entry "$grn_name" "$degree" +layers=(pearson scgen_pearson) +for layer in "${layers[@]}"; do + for degree in "${degrees[@]}"; do + for grn_name in "${grn_names[@]}"; do + append_entry "$grn_name" "$degree" "$layer" + done done -done - +done # Append the remaining output_state and publish_dir to the YAML file cat >> $param_file << HERE diff --git a/src/control_methods/positive_control/script.py b/src/control_methods/positive_control/script.py index 7d9771b93..8f1b715ce 100644 --- a/src/control_methods/positive_control/script.py +++ b/src/control_methods/positive_control/script.py @@ -14,6 +14,7 @@ "prediction": "output/positive_control.csv", } ## VIASH END +print(par) print('Reading input data') perturbation_data = ad.read_h5ad(par["perturbation_data"]) gene_names = perturbation_data.var_names.to_numpy() diff --git a/src/methods/multi_omics/celloracle/main.py b/src/methods/multi_omics/celloracle/main.py index d21481172..d091cd9f7 100644 --- a/src/methods/multi_omics/celloracle/main.py +++ b/src/methods/multi_omics/celloracle/main.py @@ -141,8 +141,13 @@ def refine_grns(par): grn = grn[mask] grn = grn[~(grn.coef_abs==0)] # remove those with 0 coeff # filter based on z score - z_scores = (grn.coef_abs - grn.coef_abs.mean())/grn.coef_abs.std() - mask = z_scores > 2 + # z_scores = (grn.coef_abs - grn.coef_abs.mean())/grn.coef_abs.std() + # mask = z_scores > 2 + # Sort by absolute coefficient values + grn = grn.sort_values(by="coef_abs", ascending=False) + + # Select the top 50,000 links based on absolute weight + mask = grn.index[:par['max_n_links']] grn = grn.loc[mask, :] grn = grn[['source', 'target', 'coef_mean']] diff --git a/src/methods/single_omics/ennet/config.vsh.yaml b/src/methods/single_omics/ennet/config.vsh.yaml index 66e4e9835..b4f20a7a9 100644 --- a/src/methods/single_omics/ennet/config.vsh.yaml +++ b/src/methods/single_omics/ennet/config.vsh.yaml @@ -29,4 +29,4 @@ platforms: - type: native - type: nextflow directives: - label: [midtime,midmem,midcpu] + label: [onedaytime,midmem,midcpu] diff --git a/src/methods/single_omics/grnboost2/config.vsh.yaml b/src/methods/single_omics/grnboost2/config.vsh.yaml index 30fb59eb9..b9effc2dc 100644 --- a/src/methods/single_omics/grnboost2/config.vsh.yaml +++ b/src/methods/single_omics/grnboost2/config.vsh.yaml @@ -26,4 +26,4 @@ platforms: - type: native - type: nextflow directives: - label: [midtime,midmem,midcpu] + label: [onedaytime,midmem,midcpu] diff --git a/src/methods/single_omics/scsgl/config.vsh.yaml b/src/methods/single_omics/scsgl/config.vsh.yaml index 6dd5b532e..bff39ce3b 100644 --- a/src/methods/single_omics/scsgl/config.vsh.yaml +++ b/src/methods/single_omics/scsgl/config.vsh.yaml @@ -28,4 +28,4 @@ platforms: - type: native - type: nextflow directives: - label: [midtime,midmem,midcpu] + label: [onedaytime,midmem,midcpu] diff --git a/src/methods/single_omics/tigress/config.vsh.yaml b/src/methods/single_omics/tigress/config.vsh.yaml index 3d0be3eef..5ff3c6855 100644 --- a/src/methods/single_omics/tigress/config.vsh.yaml +++ b/src/methods/single_omics/tigress/config.vsh.yaml @@ -26,4 +26,4 @@ platforms: - type: native - type: nextflow directives: - label: [midtime, midmem, highcpu] + label: [onedaytime, midmem, highcpu] diff --git a/src/metrics/regression_1/script.py b/src/metrics/regression_1/script.py index f3eb89f44..cba39bbdb 100644 --- a/src/metrics/regression_1/script.py +++ b/src/metrics/regression_1/script.py @@ -9,10 +9,12 @@ "prediction": "resources/grn-benchmark/grn_models/collectri.csv", 'score': 'output/score.h5ad', 'reg_type': 'ridge', - 'layer': 'lognorm', + 'layer': 'pearson', 'subsample': 200, 'max_workers': 4, } +print(par) + ## VIASH END sys.path.append(meta["resources_dir"]) diff --git a/src/metrics/regression_2/script.py b/src/metrics/regression_2/script.py index 70f12aefe..287ab0b1e 100644 --- a/src/metrics/regression_2/script.py +++ b/src/metrics/regression_2/script.py @@ -18,6 +18,7 @@ } ## VIASH END +print(par) sys.path.append(meta['resources_dir']) from main import main diff --git a/src/workflows/run_benchmark_single_omics/config.vsh.yaml b/src/workflows/run_benchmark_single_omics/config.vsh.yaml index 1a7707f4d..b17a5ba8d 100644 --- a/src/workflows/run_benchmark_single_omics/config.vsh.yaml +++ b/src/workflows/run_benchmark_single_omics/config.vsh.yaml @@ -87,4 +87,4 @@ functionality: platforms: - type: nextflow directives: - label: [ hightime, midmem, highcpu ] + label: [ onedaytime, midmem, highcpu] diff --git a/src/workflows/run_benchmark_single_omics/main.nf b/src/workflows/run_benchmark_single_omics/main.nf index d81dad698..d0ef0391d 100644 --- a/src/workflows/run_benchmark_single_omics/main.nf +++ b/src/workflows/run_benchmark_single_omics/main.nf @@ -24,11 +24,9 @@ workflow run_wf { // ] methods = [ - portia, ennet, grnboost2, scsgl, - ppcor, tigress ] diff --git a/src/workflows/run_grn_evaluation/main.nf b/src/workflows/run_grn_evaluation/main.nf index d3eb8ee16..3fac0b295 100644 --- a/src/workflows/run_grn_evaluation/main.nf +++ b/src/workflows/run_grn_evaluation/main.nf @@ -14,7 +14,8 @@ workflow run_wf { // construct list of metrics metrics = [ - regression_1 + regression_1, + regression_2 ] /*************************** @@ -63,13 +64,14 @@ workflow run_wf { // use 'fromState' to fetch the arguments the component requires from the overall state fromState: [ perturbation_data: "perturbation_data", - layer: "layer", prediction: "prediction", subsample: "subsample", reg_type: "reg_type", method_id: "method_id", max_workers: "max_workers", - consensus: "consensus" + consensus: "consensus", + layer: "layer", + tf_all: "tf_all" ], // use 'toState' to publish that component's outputs to the overall state toState: { id, output, state, comp -> diff --git a/src/workflows/run_robustness_analysis/main.nf b/src/workflows/run_robustness_analysis/main.nf index a78e02f3d..0255c6c99 100644 --- a/src/workflows/run_robustness_analysis/main.nf +++ b/src/workflows/run_robustness_analysis/main.nf @@ -14,7 +14,8 @@ workflow run_wf { // construct list of metrics metrics = [ - regression_1 + regression_1, + regression_2 ] /*************************** @@ -44,7 +45,7 @@ workflow run_wf { fromState: [ perturbation_data: "perturbation_data", prediction: "prediction_n", - layer: "layer", + layer: "layer", subsample: "subsample", reg_type: "reg_type", method_id: "method_id",