Skip to content

Commit

Permalink
robustness analysis causal added
Browse files Browse the repository at this point in the history
  • Loading branch information
janursa committed Sep 5, 2024
1 parent 63b40d5 commit 656771a
Show file tree
Hide file tree
Showing 11 changed files with 372 additions and 83 deletions.
38 changes: 19 additions & 19 deletions scripts/run_benchmark_single_omics.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#!/bin/bash

# RUN_ID="run_$(date +%Y-%m-%d_%H-%M-%S)"
RUN_ID="single_omics_all_test"
resources_dir="s3://openproblems-data/resources_test/grn"
publish_dir="s3://openproblems-data/resources_test/grn/results/${RUN_ID}"
RUN_ID="single_omics_all"
resources_dir="s3://openproblems-data/resources/grn"
publish_dir="s3://openproblems-data/resources/grn/results/${RUN_ID}"

# resources_dir="./resources_test/"
# publish_dir="output/${RUN_ID}"
Expand Down Expand Up @@ -31,21 +31,21 @@ output_state: "state.yaml"
publish_dir: "$publish_dir"
HERE

nextflow run . \
-main-script target/nextflow/workflows/run_benchmark_single_omics/main.nf \
-profile docker \
-with-trace \
-c src/common/nextflow_helpers/labels_ci.config \
-params-file ${param_file}

./tw-windows-x86_64.exe launch `
https://github.com/openproblems-bio/task_grn_benchmark.git `
--revision build/main `
--pull-latest `
--main-script target/nextflow/workflows/run_benchmark_single_omics/main.nf `
--workspace 53907369739130 `
--compute-env 6TeIFgV5OY4pJCk8I0bfOh `
--params-file ./params/single_omics_all_test.yaml `
--config src/common/nextflow_helpers/labels_tw.config
# nextflow run . \
# -main-script target/nextflow/workflows/run_benchmark_single_omics/main.nf \
# -profile docker \
# -with-trace \
# -c src/common/nextflow_helpers/labels_ci.config \
# -params-file ${param_file}

# ./tw-windows-x86_64.exe launch `
# https://github.com/openproblems-bio/task_grn_benchmark.git `
# --revision build/main `
# --pull-latest `
# --main-script target/nextflow/workflows/run_benchmark_single_omics/main.nf `
# --workspace 53907369739130 `
# --compute-env 6TeIFgV5OY4pJCk8I0bfOh `
# --params-file ./params/single_omics_all_test.yaml `
# --config src/common/nextflow_helpers/labels_tw.config


55 changes: 55 additions & 0 deletions scripts/run_robust_analys_causal.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#!/bin/bash
viash ns build --parallel
RUN_ID="robust_analy_causal"
resources_dir="resources"
# resources_dir="s3://openproblems-data/resources/grn"

publish_dir="${resources_dir}/results/${RUN_ID}"

reg_type=ridge
subsample=-2
max_workers=10

param_file="./params/${RUN_ID}.yaml"
# Start writing to the YAML file
cat > $param_file << HERE
param_list:
HERE

append_entry() {
cat >> $param_file << HERE
- id: corr-${1}
multiomics_rna: ${resources_dir}/grn-benchmark/multiomics_rna.h5ad
perturbation_data: ${resources_dir}/grn-benchmark/perturbation_data.h5ad
reg_type: $reg_type
method_id: corr-${1}
layer: ${2}
subsample: $subsample
max_workers: $max_workers
consensus: ${resources_dir}/prior/consensus-num-regulators.json
tf_all: ${resources_dir}/prior/tf_all.csv
HERE
}
# Loop through grn_names and layers
layers=("pearson") # Array containing the layer(s)

for layer in "${layers[@]}"; do # Iterate over each layer in the array
for iter in {1..100}; do # Loop from 1 to 100 iterations
append_entry "$iter" "$layer" # Execute the append_entry command
done
done


# Append the remaining output_state and publish_dir to the YAML file
cat >> $param_file << HERE
output_state: "state.yaml"
publish_dir: "$publish_dir"
HERE

nextflow run . \
-main-script target/nextflow/workflows/run_robustness_analysis_causal/main.nf \
-profile docker \
-with-trace \
-c src/common/nextflow_helpers/labels_ci.config \
-params-file ${param_file}

6 changes: 6 additions & 0 deletions src/methods/multi_omics/scenicplus/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ functionality:
required: false
direction: output
description: "Cell-topics prob scores"
- name: --grn_extended
type: file
default: output/grn_extended.csv
required: false
direction: output
description: "Source-target-peak triplets"
resources:
- type: python_script
path: script.py
Expand Down
27 changes: 18 additions & 9 deletions src/methods/multi_omics/scenicplus/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@
models = run_cgs_models(
cistopic_obj,
n_topics=n_topics,
n_cpu=12,
n_cpu=par['num_workers'],
n_iter=500,
random_state=555,
alpha=50,
Expand Down Expand Up @@ -545,7 +545,7 @@
contrasts=None,
adjpval_thr=0.05,
log2fc_thr=np.log2(1.5),
n_cpu=5,
n_cpu=par['num_workers'],
split_pattern='-'
)

Expand Down Expand Up @@ -724,11 +724,11 @@ def download(url: str, filepath: str) -> None:
settings['input_data']['path_to_motif_annotations'] = motif_annotation
settings['params_general']['temp_dir'] = os.path.join(out_dir, 'scplus_pipeline', 'temp')
settings['params_general']['n_cpu'] = par['num_workers']
settings['params_inference']['quantile_thresholds_region_to_gene'] = '0.85 0.90 0.95'
settings['params_inference']['top_n_regionTogenes_per_gene'] = '5 10 15'
settings['params_inference']['quantile_thresholds_region_to_gene'] = '0.7 0.8 0.9'
settings['params_inference']['top_n_regionTogenes_per_gene'] = '10 15 25'
settings['params_inference']['region_to_gene_importance_method'] = 'GBM'
settings['params_inference']['tf_to_gene_importance_method'] = 'GBM'
settings['params_inference']['rho_threshold'] = 0.03
settings['params_inference']['rho_threshold'] = 0.0
settings['params_inference']['region_to_gene_correlation_method'] = 'SR'
settings['params_inference']['min_target_genes'] = 10
settings['params_motif_enrichment']['species'] = 'homo_sapiens'
Expand Down Expand Up @@ -764,8 +764,17 @@ def download(url: str, filepath: str) -> None:
scplus_mdata = mudata.read(par['scplus_mdata'])

# Save results
prediction = scplus_mdata.uns['direct_e_regulon_metadata']
prediction.insert(0, 'source', prediction['TF'])
prediction.insert(1, 'target', prediction['Gene'])
prediction.insert(2, 'weight', prediction['importance_x_abs_rho'])
grn_extended = scplus_mdata.uns['direct_e_regulon_metadata']

grn_extended = grn_extended[['TF', 'Gene', 'rho_TF2G', 'Region']].drop_duplicates().reset_index(drop=True)
grn_extended = grn_extended[grn_extended.rho_TF2G!=0]

grn_extended.columns = ['source', 'target', 'weight', 'peak']

grn_extended = grn_extended[['source','target','weight']].drop_duplicates(ignore_index=True)

prediction = grn_extended.groupby(['source', 'target'], as_index=False)['weight'].sum()

grn_extended.to_csv(par['grn_extended'])
prediction.to_csv(par['prediction'])

55 changes: 0 additions & 55 deletions src/robustness_analysis/add_noise_grn.py

This file was deleted.

34 changes: 34 additions & 0 deletions src/robustness_analysis/causal/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
functionality:
name: causal_grn
namespace: "robustness_analysis"
info:
label: causal_grn
summary: Adds noise to the GRNs
arguments:
- name: --multiomics_rna
type: file
direction: input
example: resources_test/grn-benchmark/multiomics_rna.h5ad

- name: --tf_all
type: file
direction: input
example: resources_test/prior/tf_all.csv

- name: --prediction
type: file
direction: output
example: resources_test/grn_models/collectri.csv

resources:
- type: python_script
path: script.py
platforms:
- type: docker
image: ghcr.io/openproblems-bio/base_python:1.0.4
setup:
- type: python
packages: []
- type: nextflow
directives:
label: [ midtime, highmem, highcpu ]
49 changes: 49 additions & 0 deletions src/robustness_analysis/causal/script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import os
import pandas as pd
import numpy as np
import anndata as ad
import scanpy as sc
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler


## VIASH START
par = {

}

## VIASH END

def create_corr_net(X: np.ndarray, groups: np.ndarray):
grns = []
for group in tqdm(np.unique(groups), desc="Processing groups"):
X_sub = X[groups == group, :]
X_sub = StandardScaler().fit_transform(X_sub)
grn = np.dot(X_sub.T, X_sub) / X_sub.shape[0]
grns.append(grn)
return np.mean(grns, axis=0)


print('Read data')
multiomics_rna = ad.read_h5ad(par["multiomics_rna"])
gene_names = multiomics_rna.var_names.to_numpy()
tf_all = np.loadtxt(par['tf_all'], dtype=str)
groups = multiomics_rna.obs.cell_type
tf_all = np.intersect1d(tf_all, gene_names)

print('Noramlize data')
sc.pp.normalize_total(multiomics_rna)
sc.pp.log1p(multiomics_rna)
sc.pp.scale(multiomics_rna)

print('Create corr net')
net = create_corr_net(multiomics_rna.X, groups)
net = pd.DataFrame(net, index=gene_names, columns=gene_names)

net_corr = net.sample(len(tf_all), axis=1)
net_corr = net_corr.reset_index().melt(id_vars='index', var_name='source', value_name='weight')
net_corr.rename(columns={'index': 'target'}, inplace=True)

print('Output noised GRN')
net_corr.to_csv(par['prediction'])

File renamed without changes.
File renamed without changes.
80 changes: 80 additions & 0 deletions src/workflows/run_robustness_analysis_causal/config.vsh.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
functionality:
name: run_robustness_analysis_causal
namespace: "workflows"
info:
label: run_robustness_analysis_causal
summary: "Evaluates GRNs and provides scores using regression analysis."
argument_groups:
- name: Inputs
arguments:
- name: --multiomics_rna
type: file
direction: input
- name: --perturbation_data
type: file
direction: input
- name: --layer
type: string
direction: input
- name: --subsample
type: integer
direction: input
default: 200
- name: --reg_type
type: string
direction: input
default: ridge
- name: --method_id
type: string
direction: input
required: True
example: collectri
- name: --max_workers
type: integer
direction: input
required: True
- name: --consensus
type: file
required: false
direction: input
default: resources/prior/consensus.json
- name: --tf_all
type: file
required: false
direction: input


- name: Outputs
arguments:
- name: "--scores"
type: file
required: true
direction: output
default: "scores.yaml"
- name: "--metric_configs"
type: file
required: true
direction: output
default: metric_configs.yaml

resources:
- type: nextflow_script
path: main.nf
entrypoint: run_wf
- type: file
path: ../../api/task_info.yaml
dependencies:
- name: common/extract_metadata
repository: openproblemsv2
- name: metrics/regression_1
- name: metrics/regression_2
- name: robustness_analysis/causal_grn
repositories:
- name: openproblemsv2
type: github
repo: openproblems-bio/openproblems-v2
tag: main_build
platforms:
- type: nextflow
directives:
label: [ midtime, midmem, lowcpu ]
Loading

0 comments on commit 656771a

Please sign in to comment.