diff --git a/params/subsample_200_ridge.yaml b/params/subsample_200_ridge.yaml deleted file mode 100644 index 7d6ddfafb..000000000 --- a/params/subsample_200_ridge.yaml +++ /dev/null @@ -1,373 +0,0 @@ -param_list: - - id: pearson_celloracle - perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - layer: pearson - reg_type: ridge - method_id: celloracle - subsample: 200 - max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json - - prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv - - id: lognorm_celloracle - perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - layer: lognorm - reg_type: ridge - method_id: celloracle - subsample: 200 - max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json - - prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv - - id: scgen_pearson_celloracle - perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - layer: scgen_pearson - reg_type: ridge - method_id: celloracle - subsample: 200 - max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json - - prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv - - id: scgen_lognorm_celloracle - perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - layer: scgen_lognorm - reg_type: ridge - method_id: celloracle - subsample: 200 - max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json - - prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv - - id: seurat_pearson_celloracle - perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - layer: seurat_pearson - reg_type: ridge - method_id: celloracle - subsample: 200 - max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json - - prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv - - id: seurat_lognorm_celloracle - perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - layer: seurat_lognorm - reg_type: ridge - method_id: celloracle - subsample: 200 - max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json - - prediction: s3://openproblems-data/resources/grn/grn_models/celloracle.csv - - id: pearson_scenicplus - perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - layer: pearson - reg_type: ridge - method_id: scenicplus - subsample: 200 - max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json - - prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv - - id: lognorm_scenicplus - perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - layer: lognorm - reg_type: ridge - method_id: scenicplus - subsample: 200 - max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json - - prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv - - id: scgen_pearson_scenicplus - perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - layer: scgen_pearson - reg_type: ridge - method_id: scenicplus - subsample: 200 - max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json - - prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv - - id: scgen_lognorm_scenicplus - perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - layer: scgen_lognorm - reg_type: ridge - method_id: scenicplus - subsample: 200 - max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json - - prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv - - id: seurat_pearson_scenicplus - perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - layer: seurat_pearson - reg_type: ridge - method_id: scenicplus - subsample: 200 - max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json - - prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv - - id: seurat_lognorm_scenicplus - perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - layer: seurat_lognorm - reg_type: ridge - method_id: scenicplus - subsample: 200 - max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json - - prediction: s3://openproblems-data/resources/grn/grn_models/scenicplus.csv - - id: pearson_figr - perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - layer: pearson - reg_type: ridge - method_id: figr - subsample: 200 - max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json - - prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv - - id: lognorm_figr - perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - layer: lognorm - reg_type: ridge - method_id: figr - subsample: 200 - max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json - - prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv - - id: scgen_pearson_figr - perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - layer: scgen_pearson - reg_type: ridge - method_id: figr - subsample: 200 - max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json - - prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv - - id: scgen_lognorm_figr - perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - layer: scgen_lognorm - reg_type: ridge - method_id: figr - subsample: 200 - max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json - - prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv - - id: seurat_pearson_figr - perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - layer: seurat_pearson - reg_type: ridge - method_id: figr - subsample: 200 - max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json - - prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv - - id: seurat_lognorm_figr - perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - layer: seurat_lognorm - reg_type: ridge - method_id: figr - subsample: 200 - max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json - - prediction: s3://openproblems-data/resources/grn/grn_models/figr.csv - - id: pearson_granie - perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - layer: pearson - reg_type: ridge - method_id: granie - subsample: 200 - max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json - - prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv - - id: lognorm_granie - perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - layer: lognorm - reg_type: ridge - method_id: granie - subsample: 200 - max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json - - prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv - - id: scgen_pearson_granie - perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - layer: scgen_pearson - reg_type: ridge - method_id: granie - subsample: 200 - max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json - - prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv - - id: scgen_lognorm_granie - perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - layer: scgen_lognorm - reg_type: ridge - method_id: granie - subsample: 200 - max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json - - prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv - - id: seurat_pearson_granie - perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - layer: seurat_pearson - reg_type: ridge - method_id: granie - subsample: 200 - max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json - - prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv - - id: seurat_lognorm_granie - perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - layer: seurat_lognorm - reg_type: ridge - method_id: granie - subsample: 200 - max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json - - prediction: s3://openproblems-data/resources/grn/grn_models/granie.csv - - id: pearson_scglue - perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - layer: pearson - reg_type: ridge - method_id: scglue - subsample: 200 - max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json - - prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv - - id: lognorm_scglue - perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - layer: lognorm - reg_type: ridge - method_id: scglue - subsample: 200 - max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json - - prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv - - id: scgen_pearson_scglue - perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - layer: scgen_pearson - reg_type: ridge - method_id: scglue - subsample: 200 - max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json - - prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv - - id: scgen_lognorm_scglue - perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - layer: scgen_lognorm - reg_type: ridge - method_id: scglue - subsample: 200 - max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json - - prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv - - id: seurat_pearson_scglue - perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - layer: seurat_pearson - reg_type: ridge - method_id: scglue - subsample: 200 - max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json - - prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv - - id: seurat_lognorm_scglue - perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - layer: seurat_lognorm - reg_type: ridge - method_id: scglue - subsample: 200 - max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json - - prediction: s3://openproblems-data/resources/grn/grn_models/scglue.csv - - id: seurat_lognorm_negative_control - perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - layer: seurat_lognorm - reg_type: ridge - method_id: negative_control - subsample: 200 - max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json - - - - id: pearson_positive_control - perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - layer: pearson - reg_type: ridge - method_id: positive_control - subsample: 200 - max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json - tf_all: s3://openproblems-data/resources/grn/prior/tf_all.csv - - - id: lognorm_positive_control - perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - layer: lognorm - reg_type: ridge - method_id: positive_control - subsample: 200 - max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json - tf_all: s3://openproblems-data/resources/grn/prior/tf_all.csv - - - id: scgen_pearson_positive_control - perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - layer: scgen_pearson - reg_type: ridge - method_id: positive_control - subsample: 200 - max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json - tf_all: s3://openproblems-data/resources/grn/prior/tf_all.csv - - - id: scgen_lognorm_positive_control - perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - layer: scgen_lognorm - reg_type: ridge - method_id: positive_control - subsample: 200 - max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json - tf_all: s3://openproblems-data/resources/grn/prior/tf_all.csv - - - id: seurat_pearson_positive_control - perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - layer: seurat_pearson - reg_type: ridge - method_id: positive_control - subsample: 200 - max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json - tf_all: s3://openproblems-data/resources/grn/prior/tf_all.csv - - - id: seurat_lognorm_positive_control - perturbation_data: s3://openproblems-data/resources/grn/grn-benchmark/perturbation_data.h5ad - layer: seurat_lognorm - reg_type: ridge - method_id: positive_control - subsample: 200 - max_workers: 20 - consensus: s3://openproblems-data/resources/grn/prior/consensus-num-regulators.json - tf_all: s3://openproblems-data/resources/grn/prior/tf_all.csv - -output_state: "state.yaml" -publish_dir: "s3://openproblems-data/resources/grn/results/subsample_200_ridge" diff --git a/scripts/run_robust_analys.sh b/scripts/run_robust_analys.sh new file mode 100644 index 000000000..1f464ff30 --- /dev/null +++ b/scripts/run_robust_analys.sh @@ -0,0 +1,70 @@ +#!/bin/bash + +# RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)" + +RUN_ID="robust_analy" +resources_dir="resources" +publish_dir="output/${RUN_ID}" + +grn_models_folder="${resources_dir}/grn_models" +reg_type=ridge +subsample=-2 +max_workers=10 +layer=pearson + +param_file="./params/${RUN_ID}.yaml" + + +grn_names=( + "collectri" + "celloracle" + "scenicplus" + "figr" + "granie" + "scglue" +) + +degrees=(10 20 50 100) +types=(links weight) + +# Start writing to the YAML file +cat > $param_file << HERE +param_list: +HERE + +append_entry() { + cat >> $param_file << HERE + - id: ${1}_${2}_${3} + layer: ${layer} + reg_type: $reg_type + method_id: $1 + subsample: $subsample + max_workers: $max_workers + consensus: ${resources_dir}/prior/consensus-num-regulators.json + prediction: ${grn_models_folder}/$1.csv + degree: ${3} + type: ${2} +HERE +} +# Loop through grn_names and layers +for type in "${types[@]}"; do + for degree in "${degrees[@]}"; do + for grn_name in "${grn_names[@]}"; do + append_entry "$grn_name" "$type" "$degree" + done + done +done + +# Append the remaining output_state and publish_dir to the YAML file +cat >> $param_file << HERE +output_state: "state.yaml" +publish_dir: "$publish_dir" +HERE + +nextflow run . \ + -main-script target/nextflow/workflows/run_robustness_analysis/main.nf \ + -profile docker \ + -with-trace \ + -c src/common/nextflow_helpers/labels_ci.config \ + -params-file ${param_file} + diff --git a/src/api/comp_metric.yaml b/src/api/comp_metric.yaml index c41dfb032..50e60cdf8 100644 --- a/src/api/comp_metric.yaml +++ b/src/api/comp_metric.yaml @@ -39,7 +39,7 @@ functionality: - name: --subsample type: integer direction: input - default: -1 + default: -2 description: number of samples randomly drawn from perturbation data info: test_default: 200 diff --git a/src/robustness_analysis/config.vsh.yaml b/src/robustness_analysis/config.vsh.yaml new file mode 100644 index 000000000..271f8fa6e --- /dev/null +++ b/src/robustness_analysis/config.vsh.yaml @@ -0,0 +1,42 @@ +functionality: + name: noise_grn + namespace: "robustness_analysis" + info: + label: noise_grn + summary: Adds noise to the GRNs + arguments: + - name: --prediction + type: file + direction: input + example: resources/grn_models/collectri.csv + + - name: --prediction_n + type: file + direction: output + example: resources/grn_models/collectri.csv + + - name: --degree + type: integer + direction: input + default: 20 + description: the degree of noise to add + + - name: --noise_type + type: string + direction: input + default: links + # options: links, weight + + + resources: + - type: python_script + path: script.py +platforms: + - type: docker + image: ghcr.io/openproblems-bio/base_python:1.0.4 + setup: + - type: python + packages: [] + - type: nextflow + directives: + label: [ midtime, highmem, highcpu ] \ No newline at end of file diff --git a/src/robustness_analysis/script.py b/src/robustness_analysis/script.py new file mode 100644 index 000000000..68efe5daa --- /dev/null +++ b/src/robustness_analysis/script.py @@ -0,0 +1,34 @@ +import os +import pandas as pd +import numpy as np + +## VIASH START +par = { + "prediction": "resources/grn_models/collectri.csv", + "prediction_n": "output/grn_noised.csv", + 'degree': 20, + 'type': 'links' +} + +## VIASH END + +degree = par['degree']/100 + +prediction = pd.read_csv(par['prediction']) +assert 'weight' in prediction.columns + +if type =='weight': + print('Add noise to weight') + std_dev = prediction['weight'].std() + noise = np.random.normal(0, degree * std_dev, size=prediction['weight'].shape) + prediction['weight'] += noise + +elif type =='links': + print('Permute links') + num_rows_to_permute = int(len(prediction) * degree) + permute_indices = np.random.choice(prediction.index, size=num_rows_to_permute, replace=False) + + prediction.loc[permute_indices, 'weight'] = np.random.permutation(prediction.loc[permute_indices, 'weight'].values) +print('Output noised GRN') +prediction.to_csv(par['prediction_n']) + diff --git a/src/workflows/robustness_analysis/config.vsh.yaml b/src/workflows/robustness_analysis/config.vsh.yaml deleted file mode 100644 index f5c6faa72..000000000 --- a/src/workflows/robustness_analysis/config.vsh.yaml +++ /dev/null @@ -1,28 +0,0 @@ - -functionality: - name: run_robustness_analysis - namespace: "workflows" - info: - label: run_robustness_analysis - summary: "Evaluates GRNs and provides scores using regression analysis." - - - resources: - - type: nextflow_script - path: main.nf - entrypoint: run_wf - - type: file - path: ../../api/task_info.yaml - dependencies: - - name: common/extract_metadata - repository: openproblemsv2 - - name: metrics/regression_1 - repositories: - - name: openproblemsv2 - type: github - repo: openproblems-bio/openproblems-v2 - tag: main_build -platforms: - - type: nextflow - directives: - label: [ midtime, midmem, lowcpu ] diff --git a/src/workflows/run_robustness_analysis/config.vsh.yaml b/src/workflows/run_robustness_analysis/config.vsh.yaml new file mode 100644 index 000000000..6f359ed98 --- /dev/null +++ b/src/workflows/run_robustness_analysis/config.vsh.yaml @@ -0,0 +1,85 @@ + +functionality: + name: run_robustness_analysis + namespace: "workflows" + info: + label: run_robustness_analysis + summary: "Evaluates GRNs and provides scores using regression analysis." + argument_groups: + - name: Inputs + arguments: + - name: --layer + type: string + direction: input + - name: --prediction + type: file + direction: input + required: false + - name: --subsample + type: integer + direction: input + default: 200 + - name: --reg_type + type: string + direction: input + default: ridge + - name: --method_id + type: string + direction: input + required: True + example: collectri + - name: --max_workers + type: integer + direction: input + required: True + - name: --consensus + type: file + required: false + direction: input + default: resources/prior/consensus.json + - name: --degree + type: integer + required: false + direction: input + default: 20 + - name: --type + type: string + required: false + direction: input + default: weight + + + - name: Outputs + arguments: + - name: "--scores" + type: file + required: true + direction: output + default: "scores.yaml" + - name: "--metric_configs" + type: file + required: true + direction: output + default: metric_configs.yaml + + resources: + - type: nextflow_script + path: main.nf + entrypoint: run_wf + - type: file + path: ../../api/task_info.yaml + dependencies: + - name: common/extract_metadata + repository: openproblemsv2 + - name: metrics/regression_1 + - name: metrics/regression_2 + - name: robustness_analysis/noise_grn + repositories: + - name: openproblemsv2 + type: github + repo: openproblems-bio/openproblems-v2 + tag: main_build +platforms: + - type: nextflow + directives: + label: [ midtime, midmem, lowcpu ] diff --git a/src/workflows/robustness_analysis/main.nf b/src/workflows/run_robustness_analysis/main.nf similarity index 77% rename from src/workflows/robustness_analysis/main.nf rename to src/workflows/run_robustness_analysis/main.nf index d3eb8ee16..d2358eb53 100644 --- a/src/workflows/robustness_analysis/main.nf +++ b/src/workflows/run_robustness_analysis/main.nf @@ -24,34 +24,14 @@ workflow run_wf { | map{ id, state -> [id, state + ["_meta": [join_id: id]]] } - - | positive_control.run( - runIf: { id, state -> - state.method_id == 'positive_control' - }, - fromState: [ - perturbation_data: "perturbation_data", - layer: "layer", - tf_all: "tf_all" - ], - toState: {id, output, state -> - state + [ - prediction: output.prediction - ] - } - ) - | negative_control.run( - runIf: { id, state -> - state.method_id == 'negative_control' - }, + + | noise_grn.run( fromState: [ - perturbation_data: "perturbation_data" + prediction: "prediction" ], - toState: {id, output, state -> - state + [ - prediction: output.prediction + toState: [ + prediction_n: "prediction_n" ] - } ) // run all metrics @@ -62,14 +42,15 @@ workflow run_wf { }, // use 'fromState' to fetch the arguments the component requires from the overall state fromState: [ - perturbation_data: "perturbation_data", + prediction_n: "prediction", layer: "layer", - prediction: "prediction", subsample: "subsample", reg_type: "reg_type", method_id: "method_id", max_workers: "max_workers", - consensus: "consensus" + consensus: "consensus", + degree: "degree", + type: "type" ], // use 'toState' to publish that component's outputs to the overall state toState: { id, output, state, comp ->