Skip to content

Commit

Permalink
multiple workflows updated
Browse files Browse the repository at this point in the history
  • Loading branch information
janursa committed Aug 31, 2024
1 parent dd969e0 commit 1bd94de
Show file tree
Hide file tree
Showing 19 changed files with 307 additions and 194 deletions.
2 changes: 1 addition & 1 deletion _viash.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,5 @@ config_mods: |
.platforms[.type == 'docker'].target_image_source := 'https://github.com/openproblems-bio/task_grn_inference'
.platforms[.type == "nextflow"].directives.tag := "$id"
.platforms[.type == "nextflow"].auto.simplifyOutput := false
.platforms[.type == "nextflow"].config.labels := { lowmem : "memory = 20.Gb", midmem : "memory = 50.Gb", highmem : "memory = 100.Gb", lowcpu : "cpus = 5", midcpu : "cpus = 15", highcpu : "cpus = 30", lowtime : "time = 1.h", midtime : "time = 4.h", hightime : "time = 8.h", veryhightime : "time = 24.h", veryveryhightime : "time = 48.h", threedaystime : "time = 72.h", oneweektime : "time = 168.h" }
.platforms[.type == "nextflow"].config.labels := { lowmem : "memory = 20.Gb", midmem : "memory = 50.Gb", highmem : "memory = 100.Gb", lowcpu : "cpus = 5", midcpu : "cpus = 15", highcpu : "cpus = 30", lowtime : "time = 1.h", midtime : "time = 4.h", hightime : "time = 8.h", onedaytime : "time = 24.h", threedaystime : "time = 72.h", oneweektime : "time = 168.h" }
.platforms[.type == "nextflow"].config.script := "process.errorStrategy = 'ignore'"
233 changes: 120 additions & 113 deletions runs.ipynb

Large diffs are not rendered by default.

51 changes: 51 additions & 0 deletions scripts/repo/run_grn_evaluation.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/bin/bash

# Default values
grn=""
sample="200" # Default value for sample
reg_type="ridge"
score="output/score.csv"

# Parse arguments
while [[ "$#" -gt 0 ]]; do
case $1 in
--grn) grn="$2"; shift ;;
--sample) sample="$2"; shift ;;
--reg_type) reg_type="$2"; shift ;;
--score) score="$2"; shift ;;
*) echo "Unknown parameter passed: $1"; exit 1 ;;
esac
shift
done

# Ensure required arguments are provided
if [ -z "$grn" ]; then
echo "Usage: $0 --grn <grn_file> [--sample <sample_value>]"
exit 1
fi

# Print parsed arguments (for debugging purposes)
echo "GRN file: $grn"
echo "Sample value: $sample"
echo "Regression model: $reg_type"

# Clean bin/ folder
rm -r bin
mkdir bin

# Run regression analysis 1
echo "Running GRN benchmark with $grn and sample size $sample"
echo "Regression 1"
mkdir -p bin/regression_1
viash build src/metrics/regression_1/config.vsh.yaml -p docker -o bin/regression_1
bin/regression_1/regression_1 --perturbation_data resources/grn-benchmark/perturbation_data.h5ad --reg_type $reg_type --prediction $grn --score $score

# Run regression analysis 2
echo "Regression 2"
if [ ! -f resources/grn-benchmark/consensus-num-regulators.json ]; then
viash build src/metrics/regression_2/consensus/config.vsh.yaml --platform docker -o bin/regression_2/consensus
bin/regression_2/consensus/consensus_for_regression_2 --perturbation_data resources/grn-benchmark/perturbation_data.h5ad --output resources/grn-benchmark/consensus-num-regulators.json --grn_folder resources/grn-benchmark/grn_models/ --grns ananse.csv,celloracle.csv,figr.csv,granie.csv,scenicplus.csv,scglue.csv
fi
mkdir -p bin/regression_2
viash build src/metrics/regression_2/config.vsh.yaml -p docker -o bin/regression_2
bin/regression_2/regression_2 --perturbation_data resources/grn-benchmark/perturbation_data.h5ad --consensus resources/grn-benchmark/consensus-num-regulators.json --layer scgen_pearson --reg_type $reg_type --prediction $grn --score $score
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
#!/bin/bash

# RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)"
RUN_ID="pearson_gb_subsample_RF"
reg_type=${1} #GB, ridge

RUN_ID="grn_evaluation_${reg_type}"
resources_dir="s3://openproblems-data/resources/grn"
publish_dir="s3://openproblems-data/resources/grn/results/${RUN_ID}"
grn_models_folder="${resources_dir}/grn_models"
reg_type=RF

subsample=-2
max_workers=10

Expand All @@ -21,8 +23,7 @@ grn_names=(
"scglue"
)

# layers=("pearson" "lognorm" "scgen_pearson" "scgen_lognorm" "seurat_pearson" "seurat_lognorm")
layers=( "pearson" )
layers=("pearson" "lognorm" "scgen_pearson" "scgen_lognorm" "seurat_pearson" "seurat_lognorm")

# Start writing to the YAML file
cat > $param_file << HERE
Expand Down
8 changes: 4 additions & 4 deletions scripts/run_benchmark_single_omics.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@

# RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)"
RUN_ID="single_omics"
# resources_dir="s3://openproblems-data/resources_test/grn"
# publish_dir="s3://openproblems-data/resources_test/grn/results/${RUN_ID}"
resources_dir="s3://openproblems-data/resources_test/grn"
publish_dir="s3://openproblems-data/resources_test/grn/results/${RUN_ID}"

resources_dir="./resources_test/"
publish_dir="output/${RUN_ID}"
# resources_dir="./resources_test/"
# publish_dir="output/${RUN_ID}"

reg_type=ridge
subsample=-2
Expand Down
138 changes: 91 additions & 47 deletions scripts/run_grn_evaluation.sh
Original file line number Diff line number Diff line change
@@ -1,51 +1,95 @@
#!/bin/bash

# Default values
grn=""
sample="200" # Default value for sample
reg_type="ridge"
score="output/score.csv"

# Parse arguments
while [[ "$#" -gt 0 ]]; do
case $1 in
--grn) grn="$2"; shift ;;
--sample) sample="$2"; shift ;;
--reg_type) reg_type="$2"; shift ;;
--score) score="$2"; shift ;;
*) echo "Unknown parameter passed: $1"; exit 1 ;;
esac
shift
# RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)"
reg_type=${1} #GB, ridge

RUN_ID="grn_evaluation_${reg_type}"
# resources_dir="s3://openproblems-data/resources/grn"
resources_dir="./resources"
publish_dir="${resources_dir}/results/${RUN_ID}"
grn_models_folder="${resources_dir}/grn_models"

subsample=-2
max_workers=10

param_file="./params/${RUN_ID}.yaml"

grn_names=(
"collectri"
"celloracle"
"scenicplus"
"figr"
"granie"
"scglue"
)
# Start writing to the YAML file
cat > $param_file << HERE
param_list:
HERE

append_entry() {
cat >> $param_file << HERE
- id: ${reg_type}_${1}_${3}
perturbation_data: ${resources_dir}/grn-benchmark/perturbation_data.h5ad
reg_type: $reg_type
method_id: $1
subsample: $subsample
max_workers: $max_workers
tf_all: ${resources_dir}/prior/tf_all.csv
layer: ${3}
consensus: ${resources_dir}/prior/consensus-num-regulators.json
HERE

# Conditionally append the prediction line if the second argument is "true"
if [[ $2 == "true" ]]; then
cat >> $param_file << HERE
prediction: ${grn_models_folder}/$1.csv
HERE
fi
}
layers=(pearson scgen_pearson)
# Loop through grn_names and layers
for layer in "${layers[@]}"; do
for grn_name in "${grn_names[@]}"; do
append_entry "$grn_name" "true" "$layer"
done
done

# # Append negative control
grn_name="negative_control"
for layer in "${layers[@]}"; do
append_entry "$grn_name" "false" "$layer"
done

# Ensure required arguments are provided
if [ -z "$grn" ]; then
echo "Usage: $0 --grn <grn_file> [--sample <sample_value>]"
exit 1
fi

# Print parsed arguments (for debugging purposes)
echo "GRN file: $grn"
echo "Sample value: $sample"
echo "Regression model: $reg_type"

# Clean bin/ folder
rm -r bin
mkdir bin

# Run regression analysis 1
echo "Running GRN benchmark with $grn and sample size $sample"
echo "Regression 1"
mkdir -p bin/regression_1
viash build src/metrics/regression_1/config.vsh.yaml -p docker -o bin/regression_1
bin/regression_1/regression_1 --perturbation_data resources/grn-benchmark/perturbation_data.h5ad --reg_type $reg_type --prediction $grn --score $score

# Run regression analysis 2
echo "Regression 2"
if [ ! -f resources/grn-benchmark/consensus-num-regulators.json ]; then
viash build src/metrics/regression_2/consensus/config.vsh.yaml --platform docker -o bin/regression_2/consensus
bin/regression_2/consensus/consensus_for_regression_2 --perturbation_data resources/grn-benchmark/perturbation_data.h5ad --output resources/grn-benchmark/consensus-num-regulators.json --grn_folder resources/grn-benchmark/grn_models/ --grns ananse.csv,celloracle.csv,figr.csv,granie.csv,scenicplus.csv,scglue.csv
fi
mkdir -p bin/regression_2
viash build src/metrics/regression_2/config.vsh.yaml -p docker -o bin/regression_2
bin/regression_2/regression_2 --perturbation_data resources/grn-benchmark/perturbation_data.h5ad --consensus resources/grn-benchmark/consensus-num-regulators.json --layer scgen_pearson --reg_type $reg_type --prediction $grn --score $score

# Append positive controls
grn_name="positive_control"
for layer in "${layers[@]}"; do
append_entry "$grn_name" "false" "$layer"
done


# Append the remaining output_state and publish_dir to the YAML file
cat >> $param_file << HERE
output_state: "state.yaml"
publish_dir: "$publish_dir"
HERE

nextflow run . \
-main-script target/nextflow/workflows/run_grn_evaluation/main.nf \
-profile docker \
-with-trace \
-c src/common/nextflow_helpers/labels_ci.config \
-params-file ${param_file}

# ./tw-windows-x86_64.exe launch `
# https://github.com/openproblems-bio/task_grn_benchmark.git `
# --revision build/main `
# --pull-latest `
# --main-script target/nextflow/workflows/run_grn_evaluation/main.nf `
# --workspace 53907369739130 `
# --compute-env 6TeIFgV5OY4pJCk8I0bfOh `
# --params-file ./params/scgen_pearson_gb_pcs.yaml `
# --config src/common/nextflow_helpers/labels_tw.config


20 changes: 10 additions & 10 deletions scripts/run_robust_analys.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)"

degrees=(0 10 20 50 100)
noise_type="$1"
noise_type="$1" #"net"
echo $noise_type

RUN_ID="robust_analy_$1"
Expand All @@ -19,7 +19,6 @@ grn_models_folder="${resources_dir}/grn_models"
reg_type=ridge
subsample=-2
max_workers=10
layer=pearson

param_file="./params/${RUN_ID}.yaml"

Expand All @@ -33,19 +32,18 @@ grn_names=(
)



# Start writing to the YAML file
cat > $param_file << HERE
param_list:
HERE

append_entry() {
cat >> $param_file << HERE
- id: ${1}_${2}
- id: ${1}_${2}_${3}
perturbation_data: ${resources_dir}/grn-benchmark/perturbation_data.h5ad
layer: ${layer}
reg_type: $reg_type
method_id: ${2}-${1}
layer: ${3}
subsample: $subsample
max_workers: $max_workers
consensus: ${resources_dir}/prior/consensus-num-regulators.json
Expand All @@ -55,12 +53,14 @@ append_entry() {
HERE
}
# Loop through grn_names and layers
for degree in "${degrees[@]}"; do
for grn_name in "${grn_names[@]}"; do
append_entry "$grn_name" "$degree"
layers=(pearson scgen_pearson)
for layer in "${layers[@]}"; do
for degree in "${degrees[@]}"; do
for grn_name in "${grn_names[@]}"; do
append_entry "$grn_name" "$degree" "$layer"
done
done
done

done

# Append the remaining output_state and publish_dir to the YAML file
cat >> $param_file << HERE
Expand Down
1 change: 1 addition & 0 deletions src/control_methods/positive_control/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"prediction": "output/positive_control.csv",
}
## VIASH END
print(par)
print('Reading input data')
perturbation_data = ad.read_h5ad(par["perturbation_data"])
gene_names = perturbation_data.var_names.to_numpy()
Expand Down
9 changes: 7 additions & 2 deletions src/methods/multi_omics/celloracle/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,13 @@ def refine_grns(par):
grn = grn[mask]
grn = grn[~(grn.coef_abs==0)] # remove those with 0 coeff
# filter based on z score
z_scores = (grn.coef_abs - grn.coef_abs.mean())/grn.coef_abs.std()
mask = z_scores > 2
# z_scores = (grn.coef_abs - grn.coef_abs.mean())/grn.coef_abs.std()
# mask = z_scores > 2
# Sort by absolute coefficient values
grn = grn.sort_values(by="coef_abs", ascending=False)

# Select the top 50,000 links based on absolute weight
mask = grn.index[:par['max_n_links']]
grn = grn.loc[mask, :]

grn = grn[['source', 'target', 'coef_mean']]
Expand Down
2 changes: 1 addition & 1 deletion src/methods/single_omics/ennet/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,4 @@ platforms:
- type: native
- type: nextflow
directives:
label: [midtime,midmem,midcpu]
label: [onedaytime,midmem,midcpu]
2 changes: 1 addition & 1 deletion src/methods/single_omics/grnboost2/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,4 @@ platforms:
- type: native
- type: nextflow
directives:
label: [midtime,midmem,midcpu]
label: [onedaytime,midmem,midcpu]
2 changes: 1 addition & 1 deletion src/methods/single_omics/scsgl/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,4 @@ platforms:
- type: native
- type: nextflow
directives:
label: [midtime,midmem,midcpu]
label: [onedaytime,midmem,midcpu]
2 changes: 1 addition & 1 deletion src/methods/single_omics/tigress/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,4 @@ platforms:
- type: native
- type: nextflow
directives:
label: [midtime, midmem, highcpu]
label: [onedaytime, midmem, highcpu]
4 changes: 3 additions & 1 deletion src/metrics/regression_1/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@
"prediction": "resources/grn-benchmark/grn_models/collectri.csv",
'score': 'output/score.h5ad',
'reg_type': 'ridge',
'layer': 'lognorm',
'layer': 'pearson',
'subsample': 200,
'max_workers': 4,
}
print(par)


## VIASH END
sys.path.append(meta["resources_dir"])
Expand Down
1 change: 1 addition & 0 deletions src/metrics/regression_2/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
}
## VIASH END

print(par)
sys.path.append(meta['resources_dir'])
from main import main

Expand Down
2 changes: 1 addition & 1 deletion src/workflows/run_benchmark_single_omics/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -87,4 +87,4 @@ functionality:
platforms:
- type: nextflow
directives:
label: [ hightime, midmem, highcpu ]
label: [ onedaytime, midmem, highcpu]
2 changes: 0 additions & 2 deletions src/workflows/run_benchmark_single_omics/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,9 @@ workflow run_wf {
// ]

methods = [
portia,
ennet,
grnboost2,
scsgl,
ppcor,
tigress
]

Expand Down
Loading

0 comments on commit 1bd94de

Please sign in to comment.